Index: doc/src/sgml/textsearch.sgml =================================================================== RCS file: /cvsroot/pgsql/doc/src/sgml/textsearch.sgml,v retrieving revision 1.30 diff -c -c -r1.30 textsearch.sgml *** doc/src/sgml/textsearch.sgml 5 Nov 2007 15:55:53 -0000 1.30 --- doc/src/sgml/textsearch.sgml 9 Nov 2007 02:26:17 -0000 *************** *** 2258,2277 **** ! Stop words recognized by the subdictionary are replaced by a stop ! word placeholder to record their position. To illustrate this, ! consider these phrases: ! a one the two : swsw ! the one a two : swsw2 ! Assuming that a and the are stop words according ! to the subdictionary, these two phrases are identical to the thesaurus: ! they both look like stopword one ! stopword two. Input matching this pattern ! will be replaced by swsw2, according to the tie-breaking rule. --- 2258,2274 ---- ! Specific stop words recognized by the subdictionary cannot be ! specified; instead use ? to mark the location where any ! stop word can appear. For example, assuming that a and ! the are stop words according to the subdictionary: ! ? one ? two : swsw ! matches a one the two and the one a two; ! both would be replaced by swsw. Index: src/backend/tsearch/dict_thesaurus.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/tsearch/dict_thesaurus.c,v retrieving revision 1.5 diff -c -c -r1.5 dict_thesaurus.c *** src/backend/tsearch/dict_thesaurus.c 9 Nov 2007 01:32:22 -0000 1.5 --- src/backend/tsearch/dict_thesaurus.c 9 Nov 2007 02:26:17 -0000 *************** *** 412,458 **** { TSLexeme *ptr; ! ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize), ! PointerGetDatum(d->subdict->dictData), ! PointerGetDatum(d->wrds[i].lexeme), ! Int32GetDatum(strlen(d->wrds[i].lexeme)), ! PointerGetDatum(NULL))); ! ! if (!ptr) ! elog(ERROR, "thesaurus word-sample \"%s\" isn't recognized by subdictionary (rule %d)", ! d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1); ! else if (!(ptr->lexeme)) ! { ! elog(NOTICE, "thesaurus word-sample \"%s\" is recognized as stop-word, assign any stop-word (rule %d)", ! d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1); ! newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0); - } else { ! while (ptr->lexeme) { ! TSLexeme *remptr = ptr + 1; ! int tnvar = 1; ! int curvar = ptr->nvariant; ! ! /* compute n words in one variant */ ! while (remptr->lexeme) { ! if (remptr->nvariant != (remptr - 1)->nvariant) ! break; ! tnvar++; ! remptr++; ! } ! ! remptr = ptr; ! while (remptr->lexeme && remptr->nvariant == curvar) ! { ! newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar); ! remptr++; } - - ptr = remptr; } } --- 412,459 ---- { TSLexeme *ptr; ! if (strcmp(d->wrds[i].lexeme, "?") == 0) /* Is stop word marker? */ newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0); else { ! ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize), ! PointerGetDatum(d->subdict->dictData), ! PointerGetDatum(d->wrds[i].lexeme), ! Int32GetDatum(strlen(d->wrds[i].lexeme)), ! PointerGetDatum(NULL))); ! ! if (!ptr) ! elog(ERROR, "thesaurus word-sample \"%s\" isn't recognized by subdictionary (rule %d)", ! d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1); ! else if (!(ptr->lexeme)) ! elog(ERROR, "thesaurus word-sample \"%s\" is recognized as stop-word, use \"?\" for stop words instead (rule %d)", ! d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1); ! else { ! while (ptr->lexeme) { ! TSLexeme *remptr = ptr + 1; ! int tnvar = 1; ! int curvar = ptr->nvariant; ! ! /* compute n words in one variant */ ! while (remptr->lexeme) ! { ! if (remptr->nvariant != (remptr - 1)->nvariant) ! break; ! tnvar++; ! remptr++; ! } ! ! remptr = ptr; ! while (remptr->lexeme && remptr->nvariant == curvar) ! { ! newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar); ! remptr++; ! } ! ! ptr = remptr; } } } Index: src/backend/tsearch/thesaurus_sample.ths =================================================================== RCS file: /cvsroot/pgsql/src/backend/tsearch/thesaurus_sample.ths,v retrieving revision 1.2 diff -c -c -r1.2 thesaurus_sample.ths *** src/backend/tsearch/thesaurus_sample.ths 23 Sep 2007 15:58:58 -0000 1.2 --- src/backend/tsearch/thesaurus_sample.ths 9 Nov 2007 02:26:17 -0000 *************** *** 14,17 **** supernovae stars : *sn supernovae : *sn booking tickets : order invitation cards ! # booking the tickets : order invitation Cards --- 14,18 ---- supernovae stars : *sn supernovae : *sn booking tickets : order invitation cards ! booking ? tickets : order invitation Cards ! Index: src/test/regress/expected/tsdicts.out =================================================================== RCS file: /cvsroot/pgsql/src/test/regress/expected/tsdicts.out,v retrieving revision 1.3 diff -c -c -r1.3 tsdicts.out *** src/test/regress/expected/tsdicts.out 23 Oct 2007 20:46:12 -0000 1.3 --- src/test/regress/expected/tsdicts.out 9 Nov 2007 02:26:20 -0000 *************** *** 311,318 **** (1 row) SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets'); ! to_tsvector ! --------------------------------------------------------------------- ! 'book':8 'card':3 'like':6 'look':5 'invit':2 'order':1 'ticket':10 (1 row) --- 311,318 ---- (1 row) SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets'); ! to_tsvector ! ------------------------------------------------------- ! 'card':3,10 'like':6 'look':5 'invit':2,9 'order':1,8 (1 row)