Index: doc/src/sgml/textsearch.sgml =================================================================== RCS file: /cvsroot/pgsql/doc/src/sgml/textsearch.sgml,v retrieving revision 1.36 diff -c -r1.36 textsearch.sgml *** doc/src/sgml/textsearch.sgml 16 Nov 2007 03:23:07 -0000 1.36 --- doc/src/sgml/textsearch.sgml 19 Nov 2007 13:22:11 -0000 *************** *** 1862,1873 **** tag ! HTML tag <A HREF="dictionaries.html"> entity ! HTML entity &amp; --- 1862,1873 ---- tag ! HTML-type tag <A HREF="dictionaries.html"> entity ! HTML-type entity &amp; Index: src/backend/tsearch/wparser_def.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/tsearch/wparser_def.c,v retrieving revision 1.10 diff -c -r1.10 wparser_def.c *** src/backend/tsearch/wparser_def.c 15 Nov 2007 22:25:16 -0000 1.10 --- src/backend/tsearch/wparser_def.c 19 Nov 2007 13:22:11 -0000 *************** *** 95,101 **** "Hyphenated word part, all letters", "Hyphenated word part, all ASCII", "Space symbols", ! "HTML tag", "Protocol head", "Hyphenated word, letters and digits", "Hyphenated word, all ASCII", --- 95,101 ---- "Hyphenated word part, all letters", "Hyphenated word part, all ASCII", "Space symbols", ! "HTML-type tag", "Protocol head", "Hyphenated word, letters and digits", "Hyphenated word, all ASCII", *************** *** 105,111 **** "Decimal notation", "Signed integer", "Unsigned integer", ! "HTML entity" }; --- 105,111 ---- "Decimal notation", "Signed integer", "Unsigned integer", ! "HTML-type entity" }; *************** *** 136,141 **** --- 136,143 ---- TPS_InHTMLEntity, TPS_InHTMLEntityNumFirst, TPS_InHTMLEntityNum, + TPS_InHTMLEntityHexNumFirst, + TPS_InHTMLEntityHexNum, TPS_InHTMLEntityEnd, TPS_InTagFirst, TPS_InXMLBegin, *************** *** 815,836 **** {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, {p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst, 0, NULL}, {p_isasclet, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL}, {NULL, 0, A_POP, TPS_Null, 0, NULL} }; static const TParserStateActionItem actionTPS_InHTMLEntity[] = { {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, ! {p_isasclet, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL}, {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL}, {NULL, 0, A_POP, TPS_Null, 0, NULL} }; static const TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = { {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL}, {NULL, 0, A_POP, TPS_Null, 0, NULL} }; static const TParserStateActionItem actionTPS_InHTMLEntityNum[] = { {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL}, --- 817,852 ---- {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, {p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst, 0, NULL}, {p_isasclet, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL}, + {p_iseqC, ':', A_NEXT, TPS_InHTMLEntity, 0, NULL}, + {p_iseqC, '_', A_NEXT, TPS_InHTMLEntity, 0, NULL}, {NULL, 0, A_POP, TPS_Null, 0, NULL} }; static const TParserStateActionItem actionTPS_InHTMLEntity[] = { {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, ! {p_isalnum, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL}, ! {p_iseqC, ':', A_NEXT, TPS_InHTMLEntity, 0, NULL}, ! {p_iseqC, '_', A_NEXT, TPS_InHTMLEntity, 0, NULL}, ! {p_iseqC, ':', A_NEXT, TPS_InHTMLEntity, 0, NULL}, ! {p_iseqC, '.', A_NEXT, TPS_InHTMLEntity, 0, NULL}, ! {p_iseqC, '-', A_NEXT, TPS_InHTMLEntity, 0, NULL}, {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL}, {NULL, 0, A_POP, TPS_Null, 0, NULL} }; static const TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = { {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, 'x', A_NEXT, TPS_InHTMLEntityHexNumFirst, 0, NULL}, {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL}, {NULL, 0, A_POP, TPS_Null, 0, NULL} }; + static const TParserStateActionItem actionTPS_InHTMLEntityHexNumFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isxdigit, 0, A_NEXT, TPS_InHTMLEntityHexNum, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} + }; + static const TParserStateActionItem actionTPS_InHTMLEntityNum[] = { {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL}, *************** *** 838,843 **** --- 854,866 ---- {NULL, 0, A_POP, TPS_Null, 0, NULL} }; + static const TParserStateActionItem actionTPS_InHTMLEntityHexNum[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isxdigit, 0, A_NEXT, TPS_InHTMLEntityHexNum, 0, NULL}, + {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} + }; + static const TParserStateActionItem actionTPS_InHTMLEntityEnd[] = { {NULL, 0, A_BINGO | A_CLEAR, TPS_Base, HTMLENTITY, NULL} }; *************** *** 854,861 **** static const TParserStateActionItem actionTPS_InXMLBegin[] = { {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, /* qwer jf sdjk ewr1> ewri2 --- 232,238 ---- 20 | float | Decimal notation 21 | int | Signed integer 22 | uint | Unsigned integer ! 23 | entity | HTML-type entity (23 rows) SELECT * FROM ts_parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf qwer jf sdjk ewr1> ewri2