Index: doc/src/sgml/textsearch.sgml
===================================================================
RCS file: /cvsroot/pgsql/doc/src/sgml/textsearch.sgml,v
retrieving revision 1.36
diff -c -r1.36 textsearch.sgml
*** doc/src/sgml/textsearch.sgml 16 Nov 2007 03:23:07 -0000 1.36
--- doc/src/sgml/textsearch.sgml 19 Nov 2007 13:22:11 -0000
***************
*** 1862,1873 ****
tag>
! HTML tag
<A HREF="dictionaries.html">
entity>
! HTML entity
&
--- 1862,1873 ----
tag>
! HTML-type tag
<A HREF="dictionaries.html">
entity>
! HTML-type entity
&
Index: src/backend/tsearch/wparser_def.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/tsearch/wparser_def.c,v
retrieving revision 1.10
diff -c -r1.10 wparser_def.c
*** src/backend/tsearch/wparser_def.c 15 Nov 2007 22:25:16 -0000 1.10
--- src/backend/tsearch/wparser_def.c 19 Nov 2007 13:22:11 -0000
***************
*** 95,101 ****
"Hyphenated word part, all letters",
"Hyphenated word part, all ASCII",
"Space symbols",
! "HTML tag",
"Protocol head",
"Hyphenated word, letters and digits",
"Hyphenated word, all ASCII",
--- 95,101 ----
"Hyphenated word part, all letters",
"Hyphenated word part, all ASCII",
"Space symbols",
! "HTML-type tag",
"Protocol head",
"Hyphenated word, letters and digits",
"Hyphenated word, all ASCII",
***************
*** 105,111 ****
"Decimal notation",
"Signed integer",
"Unsigned integer",
! "HTML entity"
};
--- 105,111 ----
"Decimal notation",
"Signed integer",
"Unsigned integer",
! "HTML-type entity"
};
***************
*** 136,141 ****
--- 136,143 ----
TPS_InHTMLEntity,
TPS_InHTMLEntityNumFirst,
TPS_InHTMLEntityNum,
+ TPS_InHTMLEntityHexNumFirst,
+ TPS_InHTMLEntityHexNum,
TPS_InHTMLEntityEnd,
TPS_InTagFirst,
TPS_InXMLBegin,
***************
*** 815,836 ****
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static const TParserStateActionItem actionTPS_InHTMLEntity[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
! {p_isasclet, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
{p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static const TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static const TParserStateActionItem actionTPS_InHTMLEntityNum[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
--- 817,852 ----
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst, 0, NULL},
{p_isasclet, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
+ {p_iseqC, ':', A_NEXT, TPS_InHTMLEntity, 0, NULL},
+ {p_iseqC, '_', A_NEXT, TPS_InHTMLEntity, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static const TParserStateActionItem actionTPS_InHTMLEntity[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
! {p_isalnum, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
! {p_iseqC, ':', A_NEXT, TPS_InHTMLEntity, 0, NULL},
! {p_iseqC, '_', A_NEXT, TPS_InHTMLEntity, 0, NULL},
! {p_iseqC, ':', A_NEXT, TPS_InHTMLEntity, 0, NULL},
! {p_iseqC, '.', A_NEXT, TPS_InHTMLEntity, 0, NULL},
! {p_iseqC, '-', A_NEXT, TPS_InHTMLEntity, 0, NULL},
{p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static const TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, 'x', A_NEXT, TPS_InHTMLEntityHexNumFirst, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
+ static const TParserStateActionItem actionTPS_InHTMLEntityHexNumFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isxdigit, 0, A_NEXT, TPS_InHTMLEntityHexNum, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+ };
+
static const TParserStateActionItem actionTPS_InHTMLEntityNum[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
***************
*** 838,843 ****
--- 854,866 ----
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
+ static const TParserStateActionItem actionTPS_InHTMLEntityHexNum[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isxdigit, 0, A_NEXT, TPS_InHTMLEntityHexNum, 0, NULL},
+ {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+ };
+
static const TParserStateActionItem actionTPS_InHTMLEntityEnd[] = {
{NULL, 0, A_BINGO | A_CLEAR, TPS_Base, HTMLENTITY, NULL}
};
***************
*** 854,861 ****
static const TParserStateActionItem actionTPS_InXMLBegin[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
/* qwer jf sdjk ewr1> ewri2
--- 232,238 ----
20 | float | Decimal notation
21 | int | Signed integer
22 | uint | Unsigned integer
! 23 | entity | HTML-type entity
(23 rows)
SELECT * FROM ts_parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf qwer jf sdjk ewr1> ewri2