From c508b4bae6b0727193de8f8dd196664511dbe317 Mon Sep 17 00:00:00 2001 From: Nikita Glukhov Date: Sat, 7 Mar 2020 03:36:16 +0300 Subject: [PATCH 2/2] Ltree syntax improvements --- contrib/ltree/expected/ltree.out | 1620 ++++++++++++++++++++++++++++++++++++++ contrib/ltree/ltree.h | 23 +- contrib/ltree/ltree_io.c | 687 +++++++++++----- contrib/ltree/ltxtquery_io.c | 158 ++-- contrib/ltree/sql/ltree.sql | 386 +++++++++ doc/src/sgml/ltree.sgml | 38 +- 6 files changed, 2653 insertions(+), 259 deletions(-) diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out index 8226930..6215f0e 100644 --- a/contrib/ltree/expected/ltree.out +++ b/contrib/ltree/expected/ltree.out @@ -1,4 +1,5 @@ CREATE EXTENSION ltree; +SET standard_conforming_strings=on; -- Check whether any of our opclasses fail amvalidate SELECT amname, opcname FROM pg_opclass opc LEFT JOIN pg_am am ON am.oid = opcmethod @@ -313,6 +314,11 @@ SELECT lca('1.2.2.3','1.2.3.4.5.6','1'); (1 row) +SELECT ''::lquery; +ERROR: syntax error +LINE 1: SELECT ''::lquery; + ^ +DETAIL: Unexpected end of line. SELECT '1'::lquery; lquery -------- @@ -445,6 +451,16 @@ SELECT '1.*.4|3|2.*{1}'::lquery; 1.*.4|3|2.*{1} (1 row) +SELECT '*'::lquery; + lquery +-------- + * +(1 row) + +SELECT '*{1}|2'::lquery; +ERROR: syntax error at position 4 +LINE 1: SELECT '*{1}|2'::lquery; + ^ SELECT 'qwerty%@*.tu'::lquery; lquery -------------- @@ -7679,3 +7695,1607 @@ SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ; 15 (1 row) +-- Extended syntax, escaping, quoting etc +-- success +SELECT E'\\.'::ltree; + ltree +------- + "." +(1 row) + +SELECT E'\\ '::ltree; + ltree +------- + " " +(1 row) + +SELECT E'\\\\'::ltree; + ltree +------- + "\\" +(1 row) + +SELECT E'\\a'::ltree; + ltree +------- + a +(1 row) + +SELECT E'\\n'::ltree; + ltree +------- + n +(1 row) + +SELECT E'x\\\\'::ltree; + ltree +------- + "x\\" +(1 row) + +SELECT E'x\\ '::ltree; + ltree +------- + "x " +(1 row) + +SELECT E'x\\.'::ltree; + ltree +------- + "x." +(1 row) + +SELECT E'x\\a'::ltree; + ltree +------- + xa +(1 row) + +SELECT E'x\\n'::ltree; + ltree +------- + xn +(1 row) + +SELECT 'a b.с d'::ltree; +ERROR: syntax error at position 2 +LINE 1: SELECT 'a b.с d'::ltree; + ^ +SELECT '"a b"."с d"'::ltree; + ltree +------------- + "a b"."с d" +(1 row) + +SELECT ' e . f '::ltree; + ltree +------- + e.f +(1 row) + +SELECT ' '::ltree; + ltree +------- + +(1 row) + +SELECT E'\\ g . h\\ '::ltree; + ltree +----------- + " g"."h " +(1 row) + +SELECT E'\\ g'::ltree; + ltree +------- + " g" +(1 row) + +SELECT E' h\\ '::ltree; + ltree +------- + "h " +(1 row) + +SELECT '"g" '::ltree; + ltree +------- + g +(1 row) + +SELECT '"g" . h'::ltree; + ltree +------- + g.h +(1 row) + +SELECT '" g "." h "'::ltree; + ltree +-------------- + " g "." h " +(1 row) + +SELECT '" g " '::ltree; + ltree +-------- + " g " +(1 row) + +SELECT '" g " ." h " '::ltree; + ltree +-------------- + " g "." h " +(1 row) + +SELECT nlevel(E'Bottom\\.Test'::ltree); + nlevel +-------- + 1 +(1 row) + +SELECT subpath(E'Bottom\\.'::ltree, 0, 1); + subpath +----------- + "Bottom." +(1 row) + +SELECT subpath(E'a\\.b', 0, 1); + subpath +--------- + "a.b" +(1 row) + +SELECT subpath(E'a\\..b', 1, 1); + subpath +--------- + b +(1 row) + +SELECT subpath(E'a\\..\\b', 1, 1); + subpath +--------- + b +(1 row) + +SELECT subpath(E'"a b"."с d"'::ltree, 1, 1); + subpath +--------- + "с d" +(1 row) + +SELECT( +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\z\z\z\z\z')::ltree; + ltree +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789zzzzz +(1 row) + +SELECT(' ' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\a\b\c\d\e ')::ltree; + ltree +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abcde +(1 row) + +SELECT 'abc\|d'::lquery; + lquery +--------- + "abc|d" +(1 row) + +SELECT 'abc\|d'::ltree ~ 'abc\|d'::lquery; + ?column? +---------- + t +(1 row) + +SELECT 'abc\|d'::ltree ~ 'abc*'::lquery; --true + ?column? +---------- + t +(1 row) + +SELECT 'abc\|d'::ltree ~ 'abc\*'::lquery; --false + ?column? +---------- + f +(1 row) + +SELECT E'abc\\|\\.'::ltree ~ 'abc\|*'::lquery; --true + ?column? +---------- + t +(1 row) + +SELECT E'"\\""'::ltree; + ltree +------- + "\"" +(1 row) + +SELECT '\"'::ltree; + ltree +------- + "\"" +(1 row) + +SELECT E'\\"'::ltree; + ltree +------- + "\"" +(1 row) + +SELECT 'a\"b'::ltree; + ltree +-------- + "a\"b" +(1 row) + +SELECT '"ab"'::ltree; + ltree +------- + ab +(1 row) + +SELECT '"."'::ltree; + ltree +------- + "." +(1 row) + +SELECT E'".\\""'::ltree; + ltree +------- + ".\"" +(1 row) + +SELECT( +'"01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\z\z\z\z\z"')::ltree; + ltree +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789zzzzz +(1 row) + +SELECT E'"\\""'::lquery; + lquery +-------- + "\"" +(1 row) + +SELECT '\"'::lquery; + lquery +-------- + "\"" +(1 row) + +SELECT E'\\"'::lquery; + lquery +-------- + "\"" +(1 row) + +SELECT 'a\"b'::lquery; + lquery +-------- + "a\"b" +(1 row) + +SELECT '"ab"'::lquery; + lquery +-------- + ab +(1 row) + +SELECT '"."'::lquery; + lquery +-------- + "." +(1 row) + +SELECT E'".\\""'::lquery; + lquery +-------- + ".\"" +(1 row) + +SELECT( +'"01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\z\z\z\z\z"')::lquery; + lquery +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789zzzzz +(1 row) + +SELECT ' e . f '::lquery; + lquery +-------- + e.f +(1 row) + +SELECT ' e | f '::lquery; + lquery +-------- + e|f +(1 row) + +SELECT E'\\ g . h\\ '::lquery; + lquery +----------- + " g"."h " +(1 row) + +SELECT E'\\ g'::lquery; + lquery +-------- + " g" +(1 row) + +SELECT E' h\\ '::lquery; + lquery +-------- + "h " +(1 row) + +SELECT E'"\\ g"'::lquery; + lquery +-------- + " g" +(1 row) + +SELECT E' "h\\ "'::lquery; + lquery +-------- + "h " +(1 row) + +SELECT '" g "." h "'::lquery; + lquery +-------------- + " g "." h " +(1 row) + +SELECT E'\\ g | h\\ '::lquery; + lquery +----------- + " g"|"h " +(1 row) + +SELECT '" g "|" h "'::lquery; + lquery +-------------- + " g "|" h " +(1 row) + +SELECT '"g" '::lquery; + lquery +-------- + g +(1 row) + +SELECT '"g" . h'::lquery; + lquery +-------- + g.h +(1 row) + +SELECT '" g " '::lquery; + lquery +-------- + " g " +(1 row) + +SELECT '" g " ." h " '::lquery; + lquery +-------------- + " g "." h " +(1 row) + +SELECT '" g " | " h " '::lquery; + lquery +-------------- + " g "|" h " +(1 row) + +SELECT(' ' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\a\b\c\d\e ')::lquery; + lquery +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abcde +(1 row) + +SELECT E'"a\\"b"'::lquery; + lquery +-------- + "a\"b" +(1 row) + +SELECT '"a!b"'::lquery; + lquery +-------- + "a!b" +(1 row) + +SELECT '"a%b"'::lquery; + lquery +-------- + "a%b" +(1 row) + +SELECT '"a*b"'::lquery; + lquery +-------- + "a*b" +(1 row) + +SELECT '"a@b"'::lquery; + lquery +-------- + "a@b" +(1 row) + +SELECT '"a{b"'::lquery; + lquery +-------- + "a{b" +(1 row) + +SELECT '"a}b"'::lquery; + lquery +-------- + "a}b" +(1 row) + +SELECT '"a|b"'::lquery; + lquery +-------- + "a|b" +(1 row) + +SELECT E'a\\"b'::lquery; + lquery +-------- + "a\"b" +(1 row) + +SELECT E'a\\!b'::lquery; + lquery +-------- + "a!b" +(1 row) + +SELECT E'a\\%b'::lquery; + lquery +-------- + "a%b" +(1 row) + +SELECT E'a\\*b'::lquery; + lquery +-------- + "a*b" +(1 row) + +SELECT E'a\\@b'::lquery; + lquery +-------- + "a@b" +(1 row) + +SELECT E'a\\{b'::lquery; + lquery +-------- + "a{b" +(1 row) + +SELECT E'a\\}b'::lquery; + lquery +-------- + "a}b" +(1 row) + +SELECT E'a\\|b'::lquery; + lquery +-------- + "a|b" +(1 row) + +SELECT '!"!b"'::lquery; + lquery +-------- + !"!b" +(1 row) + +SELECT '!"%b"'::lquery; + lquery +-------- + !"%b" +(1 row) + +SELECT '!"*b"'::lquery; + lquery +-------- + !"*b" +(1 row) + +SELECT '!"@b"'::lquery; + lquery +-------- + !"@b" +(1 row) + +SELECT '!"{b"'::lquery; + lquery +-------- + !"{b" +(1 row) + +SELECT '!"}b"'::lquery; + lquery +-------- + !"}b" +(1 row) + +SELECT E'!\\!b'::lquery; + lquery +-------- + !"!b" +(1 row) + +SELECT E'!\\%b'::lquery; + lquery +-------- + !"%b" +(1 row) + +SELECT E'!\\*b'::lquery; + lquery +-------- + !"*b" +(1 row) + +SELECT E'!\\@b'::lquery; + lquery +-------- + !"@b" +(1 row) + +SELECT E'!\\{b'::lquery; + lquery +-------- + !"{b" +(1 row) + +SELECT E'!\\}b'::lquery; + lquery +-------- + !"}b" +(1 row) + +SELECT '"1"'::lquery; + lquery +-------- + 1 +(1 row) + +SELECT '"2.*"'::lquery; + lquery +-------- + "2.*" +(1 row) + +SELECT '!"1"'::lquery; + lquery +-------- + !1 +(1 row) + +SELECT '!"1|"'::lquery; + lquery +-------- + !"1|" +(1 row) + +SELECT '4|3|"2"'::lquery; + lquery +-------- + 4|3|2 +(1 row) + +SELECT '"1".2'::lquery; + lquery +-------- + 1.2 +(1 row) + +SELECT '"1.4"|"3"|2'::lquery; + lquery +----------- + "1.4"|3|2 +(1 row) + +SELECT '"1"."4"|"3"|"2"'::lquery; + lquery +--------- + 1.4|3|2 +(1 row) + +SELECT '"1"."0"'::lquery; + lquery +-------- + 1.0 +(1 row) + +SELECT '"1".0'::lquery; + lquery +-------- + 1.0 +(1 row) + +SELECT '"1".*'::lquery; + lquery +-------- + 1.* +(1 row) + +SELECT '4|"3"|2.*'::lquery; + lquery +--------- + 4|3|2.* +(1 row) + +SELECT '4|"3"|"2.*"'::lquery; + lquery +----------- + 4|3|"2.*" +(1 row) + +SELECT '2."*"'::lquery; + lquery +-------- + 2."*" +(1 row) + +SELECT '"*".1."*"'::lquery; + lquery +----------- + "*".1."*" +(1 row) + +SELECT '"*.4"|3|2.*'::lquery; + lquery +------------- + "*.4"|3|2.* +(1 row) + +SELECT '"*.4"|3|"2.*"'::lquery; + lquery +--------------- + "*.4"|3|"2.*" +(1 row) + +SELECT '1.*.4|3|2.*{,4}'::lquery; + lquery +----------------- + 1.*.4|3|2.*{,4} +(1 row) + +SELECT '1.*.4|3|2.*{1,}'::lquery; + lquery +----------------- + 1.*.4|3|2.*{1,} +(1 row) + +SELECT '1.*.4|3|2.*{1}'::lquery; + lquery +---------------- + 1.*.4|3|2.*{1} +(1 row) + +SELECT '"qwerty"%@*.tu'::lquery; + lquery +-------------- + qwerty%@*.tu +(1 row) + +SELECT '1.*.4|3|"2".*{1,4}'::lquery; + lquery +------------------ + 1.*.4|3|2.*{1,4} +(1 row) + +SELECT '1."*".4|3|"2".*{1,4}'::lquery; + lquery +-------------------- + 1."*".4|3|2.*{1,4} +(1 row) + +SELECT '\%\ \@'::lquery; + lquery +-------- + "% @" +(1 row) + +SELECT '"\% \@"'::lquery; + lquery +-------- + "% @" +(1 row) + +SELECT E'\\aa.b.c.d.e'::ltree ~ 'A@.b.c.d.e'; + ?column? +---------- + f +(1 row) + +SELECT E'a\\a.b.c.\\d.e'::ltree ~ 'A*.b.c.d.e'; + ?column? +---------- + f +(1 row) + +SELECT E'a\\a.b.c.\\d.e'::ltree ~ E'A*@.b.c.d.\\e'; + ?column? +---------- + t +(1 row) + +SELECT E'a\\a.b.c.\\d.e'::ltree ~ E'A*@|\\g.b.c.d.e'; + ?column? +---------- + t +(1 row) + +--ltxtquery +SELECT '!"tree" & aWdf@*'::ltxtquery; + ltxtquery +---------------- + !tree & aWdf@* +(1 row) + +SELECT '"!tree" & aWdf@*'::ltxtquery; + ltxtquery +------------------ + "!tree" & aWdf@* +(1 row) + +SELECT E'tr\\ee'::ltree @ E'\\t\\r\\e\\e'::ltxtquery; + ?column? +---------- + t +(1 row) + +SELECT E'tr\\ee.awd\\fg'::ltree @ E'tre\\e & a\\Wdf@*'::ltxtquery; + ?column? +---------- + t +(1 row) + +SELECT 'tree & aw_qw%*'::ltxtquery; + ltxtquery +---------------- + tree & aw_qw%* +(1 row) + +SELECT 'tree."awdfg"'::ltree @ E'tree & a\\Wdf@*'::ltxtquery; + ?column? +---------- + t +(1 row) + +SELECT 'tree."awdfg"'::ltree @ E'tree & "a\\Wdf"@*'::ltxtquery; + ?column? +---------- + t +(1 row) + +SELECT 'tree.awdfg_qwerty'::ltree @ 'tree & aw_qw%*'::ltxtquery; + ?column? +---------- + t +(1 row) + +SELECT 'tree.awdfg_qwerty'::ltree @ 'tree & "aw_rw"%*'::ltxtquery; + ?column? +---------- + f +(1 row) + +SELECT 'tree.awdfg_qwerty'::ltree @ E'tree & "aw\\_qw"%*'::ltxtquery; + ?column? +---------- + t +(1 row) + +SELECT 'tree.awdfg_qwerty'::ltree @ E'tree & aw\\_qw%*'::ltxtquery; + ?column? +---------- + t +(1 row) + +SELECT E'"a\\"b"'::ltxtquery; + ltxtquery +----------- + "a\"b" +(1 row) + +SELECT '"a!b"'::ltxtquery; + ltxtquery +----------- + "a!b" +(1 row) + +SELECT '"a%b"'::ltxtquery; + ltxtquery +----------- + "a%b" +(1 row) + +SELECT '"a*b"'::ltxtquery; + ltxtquery +----------- + "a*b" +(1 row) + +SELECT '"a@b"'::ltxtquery; + ltxtquery +----------- + "a@b" +(1 row) + +SELECT '"a{b"'::ltxtquery; + ltxtquery +----------- + "a{b" +(1 row) + +SELECT '"a}b"'::ltxtquery; + ltxtquery +----------- + "a}b" +(1 row) + +SELECT '"a|b"'::ltxtquery; + ltxtquery +----------- + "a|b" +(1 row) + +SELECT '"a&b"'::ltxtquery; + ltxtquery +----------- + "a&b" +(1 row) + +SELECT '"a(b"'::ltxtquery; + ltxtquery +----------- + "a(b" +(1 row) + +SELECT '"a)b"'::ltxtquery; + ltxtquery +----------- + "a)b" +(1 row) + +SELECT E'a\\"b'::ltxtquery; + ltxtquery +----------- + "a\"b" +(1 row) + +SELECT E'a\\!b'::ltxtquery; + ltxtquery +----------- + "a!b" +(1 row) + +SELECT E'a\\%b'::ltxtquery; + ltxtquery +----------- + "a%b" +(1 row) + +SELECT E'a\\*b'::ltxtquery; + ltxtquery +----------- + "a*b" +(1 row) + +SELECT E'a\\@b'::ltxtquery; + ltxtquery +----------- + "a@b" +(1 row) + +SELECT E'a\\{b'::ltxtquery; + ltxtquery +----------- + "a{b" +(1 row) + +SELECT E'a\\}b'::ltxtquery; + ltxtquery +----------- + "a}b" +(1 row) + +SELECT E'a\\|b'::ltxtquery; + ltxtquery +----------- + "a|b" +(1 row) + +SELECT E'a\\&b'::ltxtquery; + ltxtquery +----------- + "a&b" +(1 row) + +SELECT E'a\\(b'::ltxtquery; + ltxtquery +----------- + "a(b" +(1 row) + +SELECT E'a\\)b'::ltxtquery; + ltxtquery +----------- + "a)b" +(1 row) + +SELECT E'"\\"b"'::ltxtquery; + ltxtquery +----------- + "\"b" +(1 row) + +SELECT '"!b"'::ltxtquery; + ltxtquery +----------- + "!b" +(1 row) + +SELECT '"%b"'::ltxtquery; + ltxtquery +----------- + "%b" +(1 row) + +SELECT '"*b"'::ltxtquery; + ltxtquery +----------- + "*b" +(1 row) + +SELECT '"@b"'::ltxtquery; + ltxtquery +----------- + "@b" +(1 row) + +SELECT '"{b"'::ltxtquery; + ltxtquery +----------- + "{b" +(1 row) + +SELECT '"}b"'::ltxtquery; + ltxtquery +----------- + "}b" +(1 row) + +SELECT '"|b"'::ltxtquery; + ltxtquery +----------- + "|b" +(1 row) + +SELECT '"&b"'::ltxtquery; + ltxtquery +----------- + "&b" +(1 row) + +SELECT '"(b"'::ltxtquery; + ltxtquery +----------- + "(b" +(1 row) + +SELECT '")b"'::ltxtquery; + ltxtquery +----------- + ")b" +(1 row) + +SELECT E'\\"b'::ltxtquery; + ltxtquery +----------- + "\"b" +(1 row) + +SELECT E'\\!b'::ltxtquery; + ltxtquery +----------- + "!b" +(1 row) + +SELECT E'\\%b'::ltxtquery; + ltxtquery +----------- + "%b" +(1 row) + +SELECT E'\\*b'::ltxtquery; + ltxtquery +----------- + "*b" +(1 row) + +SELECT E'\\@b'::ltxtquery; + ltxtquery +----------- + "@b" +(1 row) + +SELECT E'\\{b'::ltxtquery; + ltxtquery +----------- + "{b" +(1 row) + +SELECT E'\\}b'::ltxtquery; + ltxtquery +----------- + "}b" +(1 row) + +SELECT E'\\|b'::ltxtquery; + ltxtquery +----------- + "|b" +(1 row) + +SELECT E'\\&b'::ltxtquery; + ltxtquery +----------- + "&b" +(1 row) + +SELECT E'\\(b'::ltxtquery; + ltxtquery +----------- + "(b" +(1 row) + +SELECT E'\\)b'::ltxtquery; + ltxtquery +----------- + ")b" +(1 row) + +SELECT E'"a\\""'::ltxtquery; + ltxtquery +----------- + "a\"" +(1 row) + +SELECT '"a!"'::ltxtquery; + ltxtquery +----------- + "a!" +(1 row) + +SELECT '"a%"'::ltxtquery; + ltxtquery +----------- + "a%" +(1 row) + +SELECT '"a*"'::ltxtquery; + ltxtquery +----------- + "a*" +(1 row) + +SELECT '"a@"'::ltxtquery; + ltxtquery +----------- + "a@" +(1 row) + +SELECT '"a{"'::ltxtquery; + ltxtquery +----------- + "a{" +(1 row) + +SELECT '"a}"'::ltxtquery; + ltxtquery +----------- + "a}" +(1 row) + +SELECT '"a|"'::ltxtquery; + ltxtquery +----------- + "a|" +(1 row) + +SELECT '"a&"'::ltxtquery; + ltxtquery +----------- + "a&" +(1 row) + +SELECT '"a("'::ltxtquery; + ltxtquery +----------- + "a(" +(1 row) + +SELECT '"a)"'::ltxtquery; + ltxtquery +----------- + "a)" +(1 row) + +SELECT E'a\\"'::ltxtquery; + ltxtquery +----------- + "a\"" +(1 row) + +SELECT E'a\\!'::ltxtquery; + ltxtquery +----------- + "a!" +(1 row) + +SELECT E'a\\%'::ltxtquery; + ltxtquery +----------- + "a%" +(1 row) + +SELECT E'a\\*'::ltxtquery; + ltxtquery +----------- + "a*" +(1 row) + +SELECT E'a\\@'::ltxtquery; + ltxtquery +----------- + "a@" +(1 row) + +SELECT E'a\\{'::ltxtquery; + ltxtquery +----------- + "a{" +(1 row) + +SELECT E'a\\}'::ltxtquery; + ltxtquery +----------- + "a}" +(1 row) + +SELECT E'a\\|'::ltxtquery; + ltxtquery +----------- + "a|" +(1 row) + +SELECT E'a\\&'::ltxtquery; + ltxtquery +----------- + "a&" +(1 row) + +SELECT E'a\\('::ltxtquery; + ltxtquery +----------- + "a(" +(1 row) + +SELECT E'a\\)'::ltxtquery; + ltxtquery +----------- + "a)" +(1 row) + +--failures +SELECT E'\\'::ltree; +ERROR: unclosed escape sequence at position 0 +LINE 1: SELECT E'\\'::ltree; + ^ +SELECT E'n\\'::ltree; +ERROR: unclosed escape sequence at position 1 +LINE 1: SELECT E'n\\'::ltree; + ^ +SELECT '"'::ltree; +ERROR: unclosed quote at position 0 +LINE 1: SELECT '"'::ltree; + ^ +SELECT '"a'::ltree; +ERROR: unclosed quote at position 0 +LINE 1: SELECT '"a'::ltree; + ^ +SELECT '""'::ltree; +ERROR: name of level is empty +LINE 1: SELECT '""'::ltree; + ^ +DETAIL: Name length is 0 in position 2. +SELECT 'a"b'::ltree; +ERROR: unclosed quote at position 1 +LINE 1: SELECT 'a"b'::ltree; + ^ +SELECT E'\\"ab"'::ltree; +ERROR: unclosed quote at position 4 +LINE 1: SELECT E'\\"ab"'::ltree; + ^ +SELECT '"a"."a'::ltree; +ERROR: unclosed quote at position 4 +LINE 1: SELECT '"a"."a'::ltree; + ^ +SELECT '"a."a"'::ltree; +ERROR: syntax error at position 4 +LINE 1: SELECT '"a."a"'::ltree; + ^ +SELECT '"".a'::ltree; +ERROR: name of level is empty +LINE 1: SELECT '"".a'::ltree; + ^ +DETAIL: Name length is 0 in position 2. +SELECT 'a.""'::ltree; +ERROR: name of level is empty +LINE 1: SELECT 'a.""'::ltree; + ^ +DETAIL: Name length is 0 in position 4. +SELECT '"".""'::ltree; +ERROR: name of level is empty +LINE 1: SELECT '"".""'::ltree; + ^ +DETAIL: Name length is 0 in position 2. +SELECT '""'::lquery; +ERROR: name of level is empty +LINE 1: SELECT '""'::lquery; + ^ +DETAIL: Name length is 0 in position 2. +SELECT '"".""'::lquery; +ERROR: name of level is empty +LINE 1: SELECT '"".""'::lquery; + ^ +DETAIL: Name length is 0 in position 2. +SELECT 'a.""'::lquery; +ERROR: name of level is empty +LINE 1: SELECT 'a.""'::lquery; + ^ +DETAIL: Name length is 0 in position 4. +SELECT ' . '::ltree; +ERROR: syntax error at position 1 +LINE 1: SELECT ' . '::ltree; + ^ +SELECT ' . '::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT ' . '::lquery; + ^ +SELECT ' | '::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT ' | '::lquery; + ^ +SELECT( +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'z\z\z\z\z\z')::ltree; +ERROR: name of level is too long +DETAIL: Name length is 256, must be < 256, in position 261. +SELECT( +'"01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\z\z\z\z\z\z"')::ltree; +ERROR: name of level is too long +DETAIL: Name length is 256, must be < 256, in position 264. +SELECT '"'::lquery; +ERROR: unclosed quote at position 0 +LINE 1: SELECT '"'::lquery; + ^ +SELECT '"a'::lquery; +ERROR: unclosed quote at position 0 +LINE 1: SELECT '"a'::lquery; + ^ +SELECT '"a"."a'::lquery; +ERROR: unclosed quote at position 4 +LINE 1: SELECT '"a"."a'::lquery; + ^ +SELECT '"a."a"'::lquery; +ERROR: syntax error at position 4 +LINE 1: SELECT '"a."a"'::lquery; + ^ +SELECT E'\\"ab"'::lquery; +ERROR: unclosed quote at position 4 +LINE 1: SELECT E'\\"ab"'::lquery; + ^ +SELECT 'a"b'::lquery; +ERROR: unclosed quote at position 1 +LINE 1: SELECT 'a"b'::lquery; + ^ +SELECT 'a!b'::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT 'a!b'::lquery; + ^ +SELECT 'a%b'::lquery; +ERROR: syntax error at position 2 +LINE 1: SELECT 'a%b'::lquery; + ^ +SELECT 'a*b'::lquery; +ERROR: syntax error at position 2 +LINE 1: SELECT 'a*b'::lquery; + ^ +SELECT 'a@b'::lquery; +ERROR: syntax error at position 2 +LINE 1: SELECT 'a@b'::lquery; + ^ +SELECT 'a{b'::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT 'a{b'::lquery; + ^ +SELECT 'a}b'::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT 'a}b'::lquery; + ^ +SELECT 'a!'::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT 'a!'::lquery; + ^ +SELECT 'a{'::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT 'a{'::lquery; + ^ +SELECT 'a}'::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT 'a}'::lquery; + ^ +SELECT '%b'::lquery; +ERROR: syntax error at position 0 +LINE 1: SELECT '%b'::lquery; + ^ +SELECT '*b'::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT '*b'::lquery; + ^ +SELECT '@b'::lquery; +ERROR: syntax error at position 0 +LINE 1: SELECT '@b'::lquery; + ^ +SELECT '{b'::lquery; +ERROR: syntax error at position 0 +LINE 1: SELECT '{b'::lquery; + ^ +SELECT '}b'::lquery; +ERROR: syntax error at position 0 +LINE 1: SELECT '}b'::lquery; + ^ +SELECT '!%b'::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT '!%b'::lquery; + ^ +SELECT '!*b'::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT '!*b'::lquery; + ^ +SELECT '!@b'::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT '!@b'::lquery; + ^ +SELECT '!{b'::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT '!{b'::lquery; + ^ +SELECT '!}b'::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT '!}b'::lquery; + ^ +SELECT '"qwert"y.tu'::lquery; +ERROR: syntax error at position 7 +LINE 1: SELECT '"qwert"y.tu'::lquery; + ^ +SELECT 'q"wert"y"%@*.tu'::lquery; +ERROR: syntax error at position 1 +LINE 1: SELECT 'q"wert"y"%@*.tu'::lquery; + ^ +SELECT( +'"01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\z\z\z\z\z\z"')::lquery; +ERROR: name of level is too long +DETAIL: Name length is 256, must be < 256, in position 264. +SELECT 'a | ""'::ltxtquery; +ERROR: empty labels are forbidden +LINE 1: SELECT 'a | ""'::ltxtquery; + ^ +SELECT '"" & ""'::ltxtquery; +ERROR: empty labels are forbidden +LINE 1: SELECT '"" & ""'::ltxtquery; + ^ +SELECT 'a.""'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a.""'::ltxtquery; + ^ +SELECT '"'::ltxtquery; +ERROR: unclosed quote at position 0 +LINE 1: SELECT '"'::ltxtquery; + ^ +SELECT '"""'::ltxtquery; +ERROR: unclosed quote at position 2 +LINE 1: SELECT '"""'::ltxtquery; + ^ +SELECT '"a'::ltxtquery; +ERROR: unclosed quote at position 0 +LINE 1: SELECT '"a'::ltxtquery; + ^ +SELECT '"a" & "a'::ltxtquery; +ERROR: unclosed quote at position 6 +LINE 1: SELECT '"a" & "a'::ltxtquery; + ^ +SELECT '"a | "a"'::ltxtquery; +ERROR: unquoted special symbol at position 6 +LINE 1: SELECT '"a | "a"'::ltxtquery; + ^ +SELECT '"!tree" & aWdf@*"'::ltxtquery; +ERROR: unclosed quote at position 16 +LINE 1: SELECT '"!tree" & aWdf@*"'::ltxtquery; + ^ +SELECT 'a"b'::ltxtquery; +ERROR: unclosed quote at position 1 +LINE 1: SELECT 'a"b'::ltxtquery; + ^ +SELECT 'a!b'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a!b'::ltxtquery; + ^ +SELECT 'a%b'::ltxtquery; +ERROR: unquoted special symbol at position 2 +LINE 1: SELECT 'a%b'::ltxtquery; + ^ +SELECT 'a*b'::ltxtquery; +ERROR: unquoted special symbol at position 2 +LINE 1: SELECT 'a*b'::ltxtquery; + ^ +SELECT 'a@b'::ltxtquery; +ERROR: unquoted special symbol at position 2 +LINE 1: SELECT 'a@b'::ltxtquery; + ^ +SELECT 'a{b'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a{b'::ltxtquery; + ^ +SELECT 'a}b'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a}b'::ltxtquery; + ^ +SELECT 'a|b'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a|b'::ltxtquery; + ^ +SELECT 'a&b'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a&b'::ltxtquery; + ^ +SELECT 'a(b'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a(b'::ltxtquery; + ^ +SELECT 'a)b'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a)b'::ltxtquery; + ^ +SELECT '"b'::ltxtquery; +ERROR: unclosed quote at position 0 +LINE 1: SELECT '"b'::ltxtquery; + ^ +SELECT '%b'::ltxtquery; +ERROR: unquoted special symbol at position 0 +LINE 1: SELECT '%b'::ltxtquery; + ^ +SELECT '*b'::ltxtquery; +ERROR: unquoted special symbol at position 0 +LINE 1: SELECT '*b'::ltxtquery; + ^ +SELECT '@b'::ltxtquery; +ERROR: unquoted special symbol at position 0 +LINE 1: SELECT '@b'::ltxtquery; + ^ +SELECT '{b'::ltxtquery; +ERROR: unquoted special symbol at position 0 +LINE 1: SELECT '{b'::ltxtquery; + ^ +SELECT '}b'::ltxtquery; +ERROR: unquoted special symbol at position 0 +LINE 1: SELECT '}b'::ltxtquery; + ^ +SELECT '|b'::ltxtquery; +ERROR: unquoted special symbol at position 0 +LINE 1: SELECT '|b'::ltxtquery; + ^ +SELECT '&b'::ltxtquery; +ERROR: unquoted special symbol at position 0 +LINE 1: SELECT '&b'::ltxtquery; + ^ +SELECT '(b'::ltxtquery; +ERROR: syntax error +LINE 1: SELECT '(b'::ltxtquery; + ^ +SELECT ')b'::ltxtquery; +ERROR: unquoted special symbol at position 0 +LINE 1: SELECT ')b'::ltxtquery; + ^ +SELECT 'a"'::ltxtquery; +ERROR: unclosed quote at position 1 +LINE 1: SELECT 'a"'::ltxtquery; + ^ +SELECT 'a!'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a!'::ltxtquery; + ^ +SELECT 'a{'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a{'::ltxtquery; + ^ +SELECT 'a}'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a}'::ltxtquery; + ^ +SELECT 'a|'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a|'::ltxtquery; + ^ +SELECT 'a&'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a&'::ltxtquery; + ^ +SELECT 'a('::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a('::ltxtquery; + ^ +SELECT 'a)'::ltxtquery; +ERROR: unquoted special symbol at position 1 +LINE 1: SELECT 'a)'::ltxtquery; + ^ diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h index 366e580..9774175 100644 --- a/contrib/ltree/ltree.h +++ b/contrib/ltree/ltree.h @@ -80,8 +80,6 @@ typedef struct #define LQUERY_HASNOT 0x01 -#define ISALNUM(x) ( t_isalpha(x) || t_isdigit(x) || ( pg_mblen(x) == 1 && t_iseq((x), '_') ) ) - /* full text query */ /* @@ -128,6 +126,24 @@ typedef struct #define VALTRUE 6 /* for stop words */ #define VALFALSE 7 +typedef enum ltree_token +{ + LTREE_TOK_END, + LTREE_TOK_SPACE, + LTREE_TOK_LABEL, + LTREE_TOK_DOT, + LTREE_TOK_ASTERISK, + LTREE_TOK_NOT, + LTREE_TOK_OR, + LTREE_TOK_AND, + LTREE_TOK_AT, + LTREE_TOK_PERCENT, + LTREE_TOK_LBRACE, + LTREE_TOK_RBRACE, + LTREE_TOK_LPAREN, + LTREE_TOK_RPAREN, + LTREE_TOK_COMMA +} ltree_token; /* use in array iterator */ Datum ltree_isparent(PG_FUNCTION_ARGS); @@ -164,6 +180,9 @@ bool compare_subnode(ltree_level *t, char *q, int len, int (*cmpptr) (const char *, const char *, size_t), bool anyend); ltree *lca_inner(ltree **a, int len); int ltree_strncasecmp(const char *a, const char *b, size_t s); +int bytes_to_escape(const char *start, const int len); +void copy_level(char *dst, const char *src, int len, int extra_bytes); +ltree_token ltree_get_token(char *ptr, int pos, int *len, int *wlen, int *escaped_count); /* fmgr macros for ltree objects */ #define DatumGetLtreeP(X) ((ltree *) PG_DETOAST_DATUM(X)) diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c index e97f035..479cb90 100644 --- a/contrib/ltree/ltree_io.c +++ b/contrib/ltree/ltree_io.c @@ -34,6 +34,335 @@ typedef struct #define LTPRS_WAITNAME 0 #define LTPRS_WAITDELIM 1 +/* + * Calculating the number of literals in the string to be parsed. + * For ltree, returns a number of not escaped delimiters (dots). + * If pORs is not NULL, calculates the number of alternate templates (used in lquery parsing). + * The function can return more levels than is really necessesary, + * it will be corrected during the real parsing process. + */ +static void +count_parts_ors(const char *ptr, int *plevels, int *pORs) +{ + int escape_mode = 0; + int charlen; + + while (*ptr) + { + charlen = pg_mblen(ptr); + + if (escape_mode == 1) + escape_mode = 0; + else if (charlen == 1) + { + if (t_iseq(ptr, '\\')) + escape_mode = 1; + else if (t_iseq(ptr, '.')) + (*plevels)++; + else if (t_iseq(ptr, '|') && pORs != NULL) + (*pORs)++; + } + + ptr += charlen; + } + + (*plevels)++; + if (pORs != NULL) + (*pORs)++; +} + +/* + * Char-by-char copying from src to dst representation removing escaping \\ + * Total amount of copied bytes is len + */ +static void +copy_unescaped(char *dst, const char *src, int len) +{ + uint16 copied = 0; + int charlen; + bool escaping = false; + + while (*src && copied < len) + { + charlen = pg_mblen(src); + if ((charlen == 1) && t_iseq(src, '\\') && escaping == 0) + { + escaping = 1; + src++; + continue; + }; + + if (copied + charlen > len) + elog(ERROR, "internal error during splitting levels"); + + memcpy(dst, src, charlen); + src += charlen; + dst += charlen; + copied += charlen; + escaping = 0; + } + + if (copied != len) + elog(ERROR, "internal error during splitting levels"); +} + +static bool +is_unquoted_char(const char *ptr, int charlen) +{ + return t_isalpha(ptr) || t_isdigit(ptr) || (charlen == 1 && t_iseq(ptr, '_')); +} + +/* + * Function calculating bytes to escape + * Behvaiour: + * If there is no "special" symbols, return 0 + * If there are any special symbol, we need initial and final quote, so return 2 + * If there are any quotes or backslashes, we need to escape all of them and also + * initial and final quote, so return 2 + number of quotes/backslashes + */ +int +bytes_to_escape(const char *start, const int len) +{ + uint16 copied = 0; + int charlen; + int escapes = 0; + bool quotes = false; + const char *buf = start; + + if (len == 0) + return 2; + + while (*start && copied < len) + { + charlen = pg_mblen(buf); + + if (charlen == 1 && (t_iseq(buf, '"') || t_iseq(buf, '\\'))) + escapes++; + else if (!is_unquoted_char(buf, charlen)) + quotes = true; + + if (copied + charlen > len) + elog(ERROR, "internal error during merging levels"); + + buf += charlen; + copied += charlen; + } + + return (escapes > 0) ? escapes + 2 : quotes ? 2 : 0; +} + +static int +copy_escaped(char *dst, const char *src, int len) +{ + uint16 copied = 0; + int charlen; + int escapes = 0; + char *buf = dst; + + while (*src && copied < len) + { + charlen = pg_mblen(src); + if ((charlen == 1) && (t_iseq(src, '"') || t_iseq(src, '\\'))) + { + *buf = '\\'; + buf++; + escapes++; + }; + + if (copied + charlen > len) + elog(ERROR, "internal error during merging levels"); + + memcpy(buf, src, charlen); + src += charlen; + buf += charlen; + copied += charlen; + } + return escapes; +} + +void +copy_level(char *dst, const char *src, int len, int extra_bytes) +{ + if (extra_bytes == 0) + memcpy(dst, src, len); + else if (extra_bytes == 2) + { + *dst = '"'; + memcpy(dst + 1, src, len); + dst[len + 1] = '"'; + } + else + { + *dst = '"'; + copy_escaped(dst + 1, src, len); + dst[len + extra_bytes - 1] = '"'; + } +} + +/* + * If we have a part beginning with quote, + * we must be sure it is finished with quote either. + * After that we moving start of the part a byte ahead + * and excluding beginning and final quotes from the part itself. + * */ +static void +adjust_quoted_nodeitem(nodeitem *lptr) +{ + if (*lptr->start == '"') + { + lptr->start++; + lptr->len -= 2; + lptr->wlen -= 2; + } +} + +static void +check_level_length(const nodeitem *lptr, int pos) +{ + if (lptr->len < 0) + elog(ERROR, "internal error: invalid level length"); + + if (lptr->wlen <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("name of level is empty"), + errdetail("Name length is 0 in position %d.", + pos))); + + if (lptr->wlen > 255) + ereport(ERROR, + (errcode(ERRCODE_NAME_TOO_LONG), + errmsg("name of level is too long"), + errdetail("Name length is %d, must " + "be < 256, in position %d.", + lptr->wlen, pos))); +} + +static void +init_nodeitem(nodeitem *lptr, char *start, int len, int wlen, int escapes, int pos) +{ + lptr->start = start; + lptr->len = len - escapes; + lptr->wlen = wlen - escapes; + + adjust_quoted_nodeitem(lptr); + check_level_length(lptr, pos + len); +} + +ltree_token +ltree_get_token(char *ptr, int pos, int *len, int *wlen, int *escaped_count) +{ + char *ptr_start = ptr; + int charlen; + bool quoted = false; + bool escaped = false; + + *escaped_count = 0; + *len = 0; + *wlen = 0; + + if (!*ptr) + return LTREE_TOK_END; + + charlen = pg_mblen(ptr); + + if (t_isspace(ptr)) + { + ++*wlen; + ptr += charlen; + + while (*ptr && t_isspace(ptr)) + { + charlen = pg_mblen(ptr); + ptr += charlen; + ++*wlen; + } + + *len = ptr - ptr_start; + return LTREE_TOK_SPACE; + } + + if (charlen == 1 && strchr(".*!|&@%{}(),", *ptr)) + { + *wlen = *len = 1; + + if (t_iseq(ptr, '.')) + return LTREE_TOK_DOT; + else if (t_iseq(ptr, '*')) + return LTREE_TOK_ASTERISK; + else if (t_iseq(ptr, '!')) + return LTREE_TOK_NOT; + else if (t_iseq(ptr, '|')) + return LTREE_TOK_OR; + else if (t_iseq(ptr, '&')) + return LTREE_TOK_AND; + else if (t_iseq(ptr, '@')) + return LTREE_TOK_AT; + else if (t_iseq(ptr, '%')) + return LTREE_TOK_PERCENT; + else if (t_iseq(ptr, ',')) + return LTREE_TOK_COMMA; + else if (t_iseq(ptr, '{')) + return LTREE_TOK_LBRACE; + else if (t_iseq(ptr, '}')) + return LTREE_TOK_RBRACE; + else if (t_iseq(ptr, '(')) + return LTREE_TOK_LPAREN; + else if (t_iseq(ptr, ')')) + return LTREE_TOK_RPAREN; + else + elog(ERROR, "invalid special character"); + } + else if (charlen == 1 && t_iseq(ptr, '\\')) + escaped = true; + else if (charlen == 1 && t_iseq(ptr, '"')) + quoted = true; + else if (!is_unquoted_char(ptr, charlen)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error at position %d", pos) + /*errdetail("Unexpected character")*/)); + + for (ptr += charlen, ++*wlen; *ptr; ptr += charlen, ++*wlen) + { + charlen = pg_mblen(ptr); + + if (escaped) + { + ++*escaped_count; + escaped = false; + } + else if (charlen == 1 && t_iseq(ptr, '\\')) + escaped = true; + else if (quoted) + { + if (charlen == 1 && t_iseq(ptr, '"')) + { + quoted = false; + ptr += charlen; + ++*wlen; + break; + } + } + else if (!is_unquoted_char(ptr, charlen)) + break; + } + + if (quoted) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unclosed quote at position %d", pos))); + + if (escaped) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unclosed escape sequence at position %d", pos + *wlen - 1))); + + *len = ptr - ptr_start; + + return LTREE_TOK_LABEL; +} + Datum ltree_in(PG_FUNCTION_ARGS) { @@ -41,92 +370,65 @@ ltree_in(PG_FUNCTION_ARGS) char *ptr; nodeitem *list, *lptr; - int num = 0, + int levels = 0, totallen = 0; int state = LTPRS_WAITNAME; ltree *result; ltree_level *curlevel; - int charlen; + + /* Position in strings, in symbols. */ int pos = 0; + int wlen; + int len; ptr = buf; - while (*ptr) - { - charlen = pg_mblen(ptr); - if (charlen == 1 && t_iseq(ptr, '.')) - num++; - ptr += charlen; - } + count_parts_ors(ptr, &levels, NULL); - if (num + 1 > MaxAllocSize / sizeof(nodeitem)) + if (levels > MaxAllocSize / sizeof(nodeitem)) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("number of levels (%d) exceeds the maximum allowed (%d)", - num + 1, (int) (MaxAllocSize / sizeof(nodeitem))))); - list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1)); - ptr = buf; - while (*ptr) + levels, (int) (MaxAllocSize / sizeof(nodeitem))))); + list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (levels)); + + /* + * This block calculates single nodes' settings + */ + for (ptr = buf; *ptr; ptr += len, pos += wlen) { - charlen = pg_mblen(ptr); + int escaped_count; + ltree_token tok = ltree_get_token(ptr, pos, &len, &wlen, &escaped_count); + + if (tok == LTREE_TOK_SPACE) + continue; switch (state) { case LTPRS_WAITNAME: - if (ISALNUM(ptr)) - { - lptr->start = ptr; - lptr->wlen = 0; - state = LTPRS_WAITDELIM; - } - else + if (tok != LTREE_TOK_LABEL) UNCHAR; + + init_nodeitem(lptr, ptr, len, wlen, escaped_count, pos); + totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE); + lptr++; + + state = LTPRS_WAITDELIM; break; case LTPRS_WAITDELIM: - if (charlen == 1 && t_iseq(ptr, '.')) - { - lptr->len = ptr - lptr->start; - if (lptr->wlen > 255) - ereport(ERROR, - (errcode(ERRCODE_NAME_TOO_LONG), - errmsg("name of level is too long"), - errdetail("Name length is %d, must " - "be < 256, in position %d.", - lptr->wlen, pos))); - - totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE); - lptr++; - state = LTPRS_WAITNAME; - } - else if (!ISALNUM(ptr)) + if (tok != LTREE_TOK_DOT) UNCHAR; + + state = LTPRS_WAITNAME; break; default: /* internal error */ elog(ERROR, "internal error in parser"); } - - ptr += charlen; - lptr->wlen++; - pos++; } - if (state == LTPRS_WAITDELIM) - { - lptr->len = ptr - lptr->start; - if (lptr->wlen > 255) - ereport(ERROR, - (errcode(ERRCODE_NAME_TOO_LONG), - errmsg("name of level is too long"), - errdetail("Name length is %d, must " - "be < 256, in position %d.", - lptr->wlen, pos))); - - totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE); - lptr++; - } - else if (!(state == LTPRS_WAITNAME && lptr == list)) + if (state == LTPRS_WAITNAME && lptr != list) /* Empty string */ ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error"), @@ -140,7 +442,10 @@ ltree_in(PG_FUNCTION_ARGS) while (lptr - list < result->numlevel) { curlevel->len = (uint16) lptr->len; - memcpy(curlevel->name, lptr->start, lptr->len); + + if (lptr->len > 0) + copy_unescaped(curlevel->name, lptr->start, lptr->len); + curlevel = LEVEL_NEXT(curlevel); lptr++; } @@ -157,8 +462,10 @@ ltree_out(PG_FUNCTION_ARGS) *ptr; int i; ltree_level *curlevel; + Size allocated = VARSIZE(in); + Size filled = 0; - ptr = buf = (char *) palloc(VARSIZE(in)); + ptr = buf = (char *) palloc(allocated); curlevel = LTREE_FIRST(in); for (i = 0; i < in->numlevel; i++) { @@ -166,9 +473,22 @@ ltree_out(PG_FUNCTION_ARGS) { *ptr = '.'; ptr++; + filled++; + } + if (curlevel->len >= 0) + { + int extra_bytes = bytes_to_escape(curlevel->name, curlevel->len); + + if (filled + extra_bytes + curlevel->len >= allocated) + { + buf = repalloc(buf, allocated + (extra_bytes + curlevel->len) * 2); + allocated += (extra_bytes + curlevel->len) * 2; + ptr = buf + filled; + } + + copy_level(ptr, curlevel->name, curlevel->len, extra_bytes); + ptr += curlevel->len + extra_bytes; } - memcpy(ptr, curlevel->name, curlevel->len); - ptr += curlevel->len; curlevel = LEVEL_NEXT(curlevel); } @@ -188,7 +508,6 @@ ltree_out(PG_FUNCTION_ARGS) #define LQPRS_WAITEND 7 #define LQPRS_WAITVAR 8 - #define GETVAR(x) ( *((nodeitem**)LQL_FIRST(x)) ) #define ITEMSIZE MAXALIGN(LQL_HDRSIZE+sizeof(nodeitem*)) #define NEXTLEV(x) ( (lquery_level*)( ((char*)(x)) + ITEMSIZE) ) @@ -198,7 +517,7 @@ lquery_in(PG_FUNCTION_ARGS) { char *buf = (char *) PG_GETARG_POINTER(0); char *ptr; - int num = 0, + int levels = 0, totallen = 0, numOR = 0; int state = LQPRS_WAITLEVEL; @@ -210,145 +529,121 @@ lquery_in(PG_FUNCTION_ARGS) lquery_variant *lrptr = NULL; bool hasnot = false; bool wasbad = false; - int charlen; + int real_levels = 0; int pos = 0; + int wlen; + int len; ptr = buf; - while (*ptr) - { - charlen = pg_mblen(ptr); - - if (charlen == 1) - { - if (t_iseq(ptr, '.')) - num++; - else if (t_iseq(ptr, '|')) - numOR++; - } - - ptr += charlen; - } + count_parts_ors(ptr, &levels, &numOR); - num++; - if (num > MaxAllocSize / ITEMSIZE) + if (levels > MaxAllocSize / ITEMSIZE) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("number of levels (%d) exceeds the maximum allowed (%d)", - num, (int) (MaxAllocSize / ITEMSIZE)))); - curqlevel = tmpql = (lquery_level *) palloc0(ITEMSIZE * num); - ptr = buf; - while (*ptr) + levels, (int) (MaxAllocSize / ITEMSIZE)))); + curqlevel = tmpql = (lquery_level *) palloc0(ITEMSIZE * levels); + + for (ptr = buf; *ptr; ptr += len, pos += wlen) { - charlen = pg_mblen(ptr); + int escaped_count; + ltree_token tok = ltree_get_token(ptr, pos, &len, &wlen, &escaped_count); switch (state) { case LQPRS_WAITLEVEL: - if (ISALNUM(ptr)) + if (tok == LTREE_TOK_SPACE) + break; + + if (tok == LTREE_TOK_NOT) { - GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1)); - lptr->start = ptr; - state = LQPRS_WAITDELIM; - curqlevel->numvar = 1; + if (curqlevel->flag & LQL_NOT) /* '!!' is disallowed */ + UNCHAR; + + curqlevel->flag |= LQL_NOT; + hasnot = true; + break; } - else if (charlen == 1 && t_iseq(ptr, '!')) + + real_levels++; + + GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * numOR); + + if (tok == LTREE_TOK_LABEL) { - GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1)); - lptr->start = ptr + 1; - state = LQPRS_WAITDELIM; curqlevel->numvar = 1; - curqlevel->flag |= LQL_NOT; - hasnot = true; + init_nodeitem(lptr, ptr, len, wlen, escaped_count, pos); + + state = LQPRS_WAITDELIM; } - else if (charlen == 1 && t_iseq(ptr, '*')) + else if (tok == LTREE_TOK_ASTERISK) + { + if (curqlevel->flag & LQL_NOT) /* '!*' is meaningless */ + UNCHAR; + + lptr->start = ptr; + + curqlevel->low = 0; + curqlevel->high = 0xffff; + state = LQPRS_WAITOPEN; + } else UNCHAR; + break; case LQPRS_WAITVAR: - if (ISALNUM(ptr)) - { - lptr++; - lptr->start = ptr; - state = LQPRS_WAITDELIM; - curqlevel->numvar++; - } - else + if (tok == LTREE_TOK_SPACE) + break; + + if (tok != LTREE_TOK_LABEL) UNCHAR; + + curqlevel->numvar++; + + lptr++; + init_nodeitem(lptr, ptr, len, wlen, escaped_count, pos); + + state = LQPRS_WAITDELIM; break; case LQPRS_WAITDELIM: - if (charlen == 1 && t_iseq(ptr, '@')) + if (tok == LTREE_TOK_SPACE) + break; + else if (tok == LTREE_TOK_AT) { - if (lptr->start == ptr) - UNCHAR; lptr->flag |= LVAR_INCASE; curqlevel->flag |= LVAR_INCASE; } - else if (charlen == 1 && t_iseq(ptr, '*')) + else if (tok == LTREE_TOK_ASTERISK) { - if (lptr->start == ptr) - UNCHAR; lptr->flag |= LVAR_ANYEND; curqlevel->flag |= LVAR_ANYEND; } - else if (charlen == 1 && t_iseq(ptr, '%')) + else if (tok == LTREE_TOK_PERCENT) { - if (lptr->start == ptr) - UNCHAR; lptr->flag |= LVAR_SUBLEXEME; curqlevel->flag |= LVAR_SUBLEXEME; } - else if (charlen == 1 && t_iseq(ptr, '|')) - { - lptr->len = ptr - lptr->start - - ((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) - - ((lptr->flag & LVAR_INCASE) ? 1 : 0) - - ((lptr->flag & LVAR_ANYEND) ? 1 : 0); - if (lptr->wlen > 255) - ereport(ERROR, - (errcode(ERRCODE_NAME_TOO_LONG), - errmsg("name of level is too long"), - errdetail("Name length is %d, must " - "be < 256, in position %d.", - lptr->wlen, pos))); - + else if (tok == LTREE_TOK_OR) state = LQPRS_WAITVAR; - } - else if (charlen == 1 && t_iseq(ptr, '.')) + else if (tok == LTREE_TOK_DOT) { - lptr->len = ptr - lptr->start - - ((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) - - ((lptr->flag & LVAR_INCASE) ? 1 : 0) - - ((lptr->flag & LVAR_ANYEND) ? 1 : 0); - if (lptr->wlen > 255) - ereport(ERROR, - (errcode(ERRCODE_NAME_TOO_LONG), - errmsg("name of level is too long"), - errdetail("Name length is %d, must " - "be < 256, in position %d.", - lptr->wlen, pos))); - - state = LQPRS_WAITLEVEL; curqlevel = NEXTLEV(curqlevel); - } - else if (ISALNUM(ptr)) - { - if (lptr->flag) - UNCHAR; + state = LQPRS_WAITLEVEL; } else UNCHAR; break; case LQPRS_WAITOPEN: - if (charlen == 1 && t_iseq(ptr, '{')) + if (tok == LTREE_TOK_SPACE) + break; + else if (tok == LTREE_TOK_LBRACE) state = LQPRS_WAITFNUM; - else if (charlen == 1 && t_iseq(ptr, '.')) + else if (tok == LTREE_TOK_DOT) { - curqlevel->low = 0; - curqlevel->high = 0xffff; curqlevel = NEXTLEV(curqlevel); state = LQPRS_WAITLEVEL; } @@ -357,11 +652,12 @@ lquery_in(PG_FUNCTION_ARGS) break; case LQPRS_WAITFNUM: - if (charlen == 1 && t_iseq(ptr, ',')) + if (tok == LTREE_TOK_COMMA) state = LQPRS_WAITSNUM; else if (t_isdigit(ptr)) { curqlevel->low = atoi(ptr); + len = wlen = 1; state = LQPRS_WAITND; } else @@ -372,38 +668,36 @@ lquery_in(PG_FUNCTION_ARGS) if (t_isdigit(ptr)) { curqlevel->high = atoi(ptr); + len = wlen = 1; state = LQPRS_WAITCLOSE; } - else if (charlen == 1 && t_iseq(ptr, '}')) - { - curqlevel->high = 0xffff; + else if (tok == LTREE_TOK_RBRACE) state = LQPRS_WAITEND; - } else UNCHAR; break; case LQPRS_WAITCLOSE: - if (charlen == 1 && t_iseq(ptr, '}')) + if (tok == LTREE_TOK_RBRACE) state = LQPRS_WAITEND; else if (!t_isdigit(ptr)) UNCHAR; break; case LQPRS_WAITND: - if (charlen == 1 && t_iseq(ptr, '}')) + if (tok == LTREE_TOK_RBRACE) { curqlevel->high = curqlevel->low; state = LQPRS_WAITEND; } - else if (charlen == 1 && t_iseq(ptr, ',')) + else if (tok == LTREE_TOK_COMMA) state = LQPRS_WAITSNUM; else if (!t_isdigit(ptr)) UNCHAR; break; case LQPRS_WAITEND: - if (charlen == 1 && t_iseq(ptr, '.')) + if (tok == LTREE_TOK_DOT) { state = LQPRS_WAITLEVEL; curqlevel = NEXTLEV(curqlevel); @@ -416,42 +710,11 @@ lquery_in(PG_FUNCTION_ARGS) /* internal error */ elog(ERROR, "internal error in parser"); } - - ptr += charlen; - if (state == LQPRS_WAITDELIM) - lptr->wlen++; - pos++; } - if (state == LQPRS_WAITDELIM) - { - if (lptr->start == ptr) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"), - errdetail("Unexpected end of line."))); - - lptr->len = ptr - lptr->start - - ((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) - - ((lptr->flag & LVAR_INCASE) ? 1 : 0) - - ((lptr->flag & LVAR_ANYEND) ? 1 : 0); - if (lptr->len == 0) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"), - errdetail("Unexpected end of line."))); - - if (lptr->wlen > 255) - ereport(ERROR, - (errcode(ERRCODE_NAME_TOO_LONG), - errmsg("name of level is too long"), - errdetail("Name length is %d, must " - "be < 256, in position %d.", - lptr->wlen, pos))); - } - else if (state == LQPRS_WAITOPEN) - curqlevel->high = 0xffff; - else if (state != LQPRS_WAITEND) + if (state != LQPRS_WAITDELIM && + state != LQPRS_WAITOPEN && + state != LQPRS_WAITEND) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error"), @@ -459,7 +722,7 @@ lquery_in(PG_FUNCTION_ARGS) curqlevel = tmpql; totallen = LQUERY_HDRSIZE; - while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE) + while ((char *) curqlevel - (char *) tmpql < levels * ITEMSIZE) { totallen += LQL_HDRSIZE; if (curqlevel->numvar) @@ -483,14 +746,14 @@ lquery_in(PG_FUNCTION_ARGS) result = (lquery *) palloc0(totallen); SET_VARSIZE(result, totallen); - result->numlevel = num; + result->numlevel = real_levels; result->firstgood = 0; result->flag = 0; if (hasnot) result->flag |= LQUERY_HASNOT; cur = LQUERY_FIRST(result); curqlevel = tmpql; - while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE) + while ((char *) curqlevel - (char *) tmpql < levels * ITEMSIZE) { memcpy(cur, curqlevel, LQL_HDRSIZE); cur->totallen = LQL_HDRSIZE; @@ -503,8 +766,8 @@ lquery_in(PG_FUNCTION_ARGS) cur->totallen += MAXALIGN(LVAR_HDRSIZE + lptr->len); lrptr->len = lptr->len; lrptr->flag = lptr->flag; - lrptr->val = ltree_crc32_sz(lptr->start, lptr->len); - memcpy(lrptr->name, lptr->start, lptr->len); + copy_unescaped(lrptr->name, lptr->start, lptr->len); + lrptr->val = ltree_crc32_sz(lrptr->name, lptr->len); lptr++; lrptr = LVAR_NEXT(lrptr); } @@ -532,7 +795,8 @@ lquery_out(PG_FUNCTION_ARGS) *ptr; int i, j, - totallen = 1; + totallen = 1, + filled = 0; lquery_level *curqlevel; lquery_variant *curtlevel; @@ -555,6 +819,7 @@ lquery_out(PG_FUNCTION_ARGS) { *ptr = '.'; ptr++; + filled++; } if (curqlevel->numvar) { @@ -562,31 +827,46 @@ lquery_out(PG_FUNCTION_ARGS) { *ptr = '!'; ptr++; + filled++; } curtlevel = LQL_FIRST(curqlevel); for (j = 0; j < curqlevel->numvar; j++) { + int extra_bytes = bytes_to_escape(curtlevel->name, curtlevel->len); + if (j != 0) { *ptr = '|'; ptr++; + filled++; } - memcpy(ptr, curtlevel->name, curtlevel->len); - ptr += curtlevel->len; + if (filled + extra_bytes + curtlevel->len >= totallen) + { + buf = repalloc(buf, totallen + (extra_bytes + curtlevel->len) * 2); + totallen += (extra_bytes + curtlevel->len) * 2; + ptr = buf + filled; + } + + copy_level(ptr, curtlevel->name, curtlevel->len, extra_bytes); + ptr += curtlevel->len + extra_bytes; + if ((curtlevel->flag & LVAR_SUBLEXEME)) { *ptr = '%'; ptr++; + filled++; } if ((curtlevel->flag & LVAR_INCASE)) { *ptr = '@'; ptr++; + filled++; } if ((curtlevel->flag & LVAR_ANYEND)) { *ptr = '*'; ptr++; + filled++; } curtlevel = LVAR_NEXT(curtlevel); } @@ -614,6 +894,7 @@ lquery_out(PG_FUNCTION_ARGS) else sprintf(ptr, "*{%d,%d}", curqlevel->low, curqlevel->high); ptr = strchr(ptr, '\0'); + filled = ptr - buf; } curqlevel = LQL_NEXT(curqlevel); diff --git a/contrib/ltree/ltxtquery_io.c b/contrib/ltree/ltxtquery_io.c index db347f7..4c237b1 100644 --- a/contrib/ltree/ltxtquery_io.c +++ b/contrib/ltree/ltxtquery_io.c @@ -37,6 +37,7 @@ typedef struct NODE typedef struct { char *buf; + int pos; int32 state; int32 count; /* reverse polish notation in list (for temporary usage) */ @@ -57,86 +58,136 @@ typedef struct static int32 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag) { - int charlen; - for (;;) { - charlen = pg_mblen(state->buf); + int len; + int wlen; + int escaped_count; + int pos = state->pos; + char *buf = state->buf; + ltree_token tok = ltree_get_token(buf, pos, &len, &wlen, &escaped_count); + + state->buf += len; + state->pos += wlen; switch (state->state) { case WAITOPERAND: - if (charlen == 1 && t_iseq(state->buf, '!')) + if (tok == LTREE_TOK_NOT) { - (state->buf)++; *val = (int32) '!'; return OPR; } - else if (charlen == 1 && t_iseq(state->buf, '(')) + else if (tok == LTREE_TOK_LPAREN) { state->count++; - (state->buf)++; return OPEN; } - else if (ISALNUM(state->buf)) + else if (tok == LTREE_TOK_LABEL) { - state->state = INOPERAND; - *strval = state->buf; - *lenval = charlen; + *strval = buf; + *lenval = state->buf - buf; *flag = 0; + + if (t_iseq(buf, '"')) /* strip quotes */ + { + *lenval -= 2; + *strval += 1; + } + + state->state = INOPERAND; + } + else if (tok == LTREE_TOK_SPACE) + { + /* do nothing */ } - else if (!t_isspace(state->buf)) + else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("operand syntax error"))); + errmsg("unquoted special symbol at position %d", pos))); break; + case INOPERAND: - if (ISALNUM(state->buf)) + if (tok == LTREE_TOK_END || tok == LTREE_TOK_SPACE) { - if (*flag) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("modifiers syntax error"))); - *lenval += charlen; + state->state = WAITOPERATOR; + return VAL; } - else if (charlen == 1 && t_iseq(state->buf, '%')) + else if (tok == LTREE_TOK_PERCENT) *flag |= LVAR_SUBLEXEME; - else if (charlen == 1 && t_iseq(state->buf, '@')) + else if (tok == LTREE_TOK_AT) *flag |= LVAR_INCASE; - else if (charlen == 1 && t_iseq(state->buf, '*')) + else if (tok == LTREE_TOK_ASTERISK) *flag |= LVAR_ANYEND; else - { - state->state = WAITOPERATOR; - return VAL; - } + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unquoted special symbol at position %d", pos))); break; + case WAITOPERATOR: - if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))) + if (tok == LTREE_TOK_OR || tok == LTREE_TOK_AND) { state->state = WAITOPERAND; - *val = (int32) *(state->buf); - (state->buf)++; + *val = (int32) *buf; return OPR; } - else if (charlen == 1 && t_iseq(state->buf, ')')) + else if (tok == LTREE_TOK_RPAREN) { - (state->buf)++; state->count--; return (state->count < 0) ? ERR : CLOSE; } - else if (*(state->buf) == '\0') + else if (tok == LTREE_TOK_END) return (state->count) ? ERR : END; - else if (charlen == 1 && !t_iseq(state->buf, ' ')) + else if (tok != LTREE_TOK_SPACE) return ERR; break; + default: return ERR; - break; } + } +} + +/* + * This function is similar to copy_unescaped. + * It proceeds total_len bytes from src + * Copying all to dst skipping escapes + * Returns amount of skipped symbols + * */ +static int +copy_skip_escapes(char *dst, const char *src, int total_len) +{ + uint16 copied = 0; + int charlen; + bool escaping = false; + int skipped = 0; - state->buf += charlen; + while (*src && (copied + skipped < total_len)) + { + charlen = pg_mblen(src); + if ((charlen == 1) && t_iseq(src, '\\') && escaping == 0) + { + escaping = 1; + src++; + skipped++; + continue; + }; + + if (copied + skipped + charlen > total_len) + elog(ERROR, "internal error during copying"); + + memcpy(dst, src, charlen); + src += charlen; + dst += charlen; + copied += charlen; + escaping = 0; } + + if (copied + skipped != total_len) + elog(ERROR, "internal error during copying"); + + return skipped; } /* @@ -171,14 +222,18 @@ pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval static void pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag) { + int skipped = 0; + + if (lenval == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("empty labels are forbidden"))); + if (lenval > 0xffff) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("word is too long"))); - pushquery(state, type, ltree_crc32_sz(strval, lenval), - state->curop - state->op, lenval, flag); - while (state->curop - state->op + lenval + 1 >= state->lenop) { int32 tmp = state->curop - state->op; @@ -187,11 +242,19 @@ pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag) state->op = (char *) repalloc((void *) state->op, state->lenop); state->curop = state->op + tmp; } - memcpy((void *) state->curop, (void *) strval, lenval); - state->curop += lenval; + skipped = copy_skip_escapes((void *) state->curop, (void *) strval, lenval); + if (lenval == skipped) /* Empty quoted literal */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("empty labels are forbidden"))); + + pushquery(state, type, ltree_crc32_sz(state->curop, lenval - skipped), + state->curop - state->op, lenval - skipped, flag); + + state->curop += lenval - skipped; *(state->curop) = '\0'; state->curop++; - state->sumlen += lenval + 1; + state->sumlen += lenval - skipped + 1; } #define STACKDEPTH 32 @@ -324,6 +387,7 @@ queryin(char *buf) /* init state */ state.buf = buf; + state.pos = 0; state.state = WAITOPERAND; state.count = 0; state.num = 0; @@ -421,14 +485,14 @@ infix(INFIX *in, bool first) if (in->curpol->type == VAL) { char *op = in->op + in->curpol->distance; + char *opend = strchr(op, '\0'); + int delta = opend - op; + int extra_bytes = bytes_to_escape(op, delta); RESIZEBUF(in, in->curpol->length * 2 + 5); - while (*op) - { - *(in->cur) = *op; - op++; - in->cur++; - } + copy_level(in->cur, op, delta, extra_bytes); + in->cur += delta + extra_bytes; + if (in->curpol->flag & LVAR_SUBLEXEME) { *(in->cur) = '%'; diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql index 846b04e..b1f14f5 100644 --- a/contrib/ltree/sql/ltree.sql +++ b/contrib/ltree/sql/ltree.sql @@ -1,5 +1,7 @@ CREATE EXTENSION ltree; +SET standard_conforming_strings=on; + -- Check whether any of our opclasses fail amvalidate SELECT amname, opcname FROM pg_opclass opc LEFT JOIN pg_am am ON am.oid = opcmethod @@ -65,6 +67,7 @@ SELECT lca('1.2.2.3','1.2.3.4.5.6','2'); SELECT lca('1.2.2.3','1.2.3.4.5.6','1'); +SELECT ''::lquery; SELECT '1'::lquery; SELECT '4|3|2'::lquery; SELECT '1.2'::lquery; @@ -87,6 +90,8 @@ SELECT '1.*.4|3|2.*{1,4}'::lquery; SELECT '1.*.4|3|2.*{,4}'::lquery; SELECT '1.*.4|3|2.*{1,}'::lquery; SELECT '1.*.4|3|2.*{1}'::lquery; +SELECT '*'::lquery; +SELECT '*{1}|2'::lquery; SELECT 'qwerty%@*.tu'::lquery; SELECT nlevel('1.2.3.4'); @@ -291,3 +296,384 @@ SELECT count(*) FROM _ltreetest WHERE t ~ '23.*{1}.1' ; SELECT count(*) FROM _ltreetest WHERE t ~ '23.*.1' ; SELECT count(*) FROM _ltreetest WHERE t ~ '23.*.2' ; SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ; + +-- Extended syntax, escaping, quoting etc +-- success +SELECT E'\\.'::ltree; +SELECT E'\\ '::ltree; +SELECT E'\\\\'::ltree; +SELECT E'\\a'::ltree; +SELECT E'\\n'::ltree; +SELECT E'x\\\\'::ltree; +SELECT E'x\\ '::ltree; +SELECT E'x\\.'::ltree; +SELECT E'x\\a'::ltree; +SELECT E'x\\n'::ltree; +SELECT 'a b.с d'::ltree; +SELECT '"a b"."с d"'::ltree; +SELECT ' e . f '::ltree; +SELECT ' '::ltree; + +SELECT E'\\ g . h\\ '::ltree; +SELECT E'\\ g'::ltree; +SELECT E' h\\ '::ltree; +SELECT '"g" '::ltree; +SELECT '"g" . h'::ltree; +SELECT '" g "." h "'::ltree; +SELECT '" g " '::ltree; +SELECT '" g " ." h " '::ltree; + +SELECT nlevel(E'Bottom\\.Test'::ltree); +SELECT subpath(E'Bottom\\.'::ltree, 0, 1); + +SELECT subpath(E'a\\.b', 0, 1); +SELECT subpath(E'a\\..b', 1, 1); +SELECT subpath(E'a\\..\\b', 1, 1); +SELECT subpath(E'"a b"."с d"'::ltree, 1, 1); + +SELECT( +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\z\z\z\z\z')::ltree; + +SELECT(' ' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\a\b\c\d\e ')::ltree; + +SELECT 'abc\|d'::lquery; +SELECT 'abc\|d'::ltree ~ 'abc\|d'::lquery; +SELECT 'abc\|d'::ltree ~ 'abc*'::lquery; --true +SELECT 'abc\|d'::ltree ~ 'abc\*'::lquery; --false +SELECT E'abc\\|\\.'::ltree ~ 'abc\|*'::lquery; --true + +SELECT E'"\\""'::ltree; +SELECT '\"'::ltree; +SELECT E'\\"'::ltree; +SELECT 'a\"b'::ltree; +SELECT '"ab"'::ltree; +SELECT '"."'::ltree; +SELECT E'".\\""'::ltree; +SELECT( +'"01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\z\z\z\z\z"')::ltree; + +SELECT E'"\\""'::lquery; +SELECT '\"'::lquery; +SELECT E'\\"'::lquery; +SELECT 'a\"b'::lquery; +SELECT '"ab"'::lquery; +SELECT '"."'::lquery; +SELECT E'".\\""'::lquery; +SELECT( +'"01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\z\z\z\z\z"')::lquery; + +SELECT ' e . f '::lquery; +SELECT ' e | f '::lquery; + +SELECT E'\\ g . h\\ '::lquery; +SELECT E'\\ g'::lquery; +SELECT E' h\\ '::lquery; +SELECT E'"\\ g"'::lquery; +SELECT E' "h\\ "'::lquery; +SELECT '" g "." h "'::lquery; + +SELECT E'\\ g | h\\ '::lquery; +SELECT '" g "|" h "'::lquery; + +SELECT '"g" '::lquery; +SELECT '"g" . h'::lquery; +SELECT '" g " '::lquery; +SELECT '" g " ." h " '::lquery; +SELECT '" g " | " h " '::lquery; + +SELECT(' ' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\a\b\c\d\e ')::lquery; + +SELECT E'"a\\"b"'::lquery; +SELECT '"a!b"'::lquery; +SELECT '"a%b"'::lquery; +SELECT '"a*b"'::lquery; +SELECT '"a@b"'::lquery; +SELECT '"a{b"'::lquery; +SELECT '"a}b"'::lquery; +SELECT '"a|b"'::lquery; + +SELECT E'a\\"b'::lquery; +SELECT E'a\\!b'::lquery; +SELECT E'a\\%b'::lquery; +SELECT E'a\\*b'::lquery; +SELECT E'a\\@b'::lquery; +SELECT E'a\\{b'::lquery; +SELECT E'a\\}b'::lquery; +SELECT E'a\\|b'::lquery; + +SELECT '!"!b"'::lquery; +SELECT '!"%b"'::lquery; +SELECT '!"*b"'::lquery; +SELECT '!"@b"'::lquery; +SELECT '!"{b"'::lquery; +SELECT '!"}b"'::lquery; + +SELECT E'!\\!b'::lquery; +SELECT E'!\\%b'::lquery; +SELECT E'!\\*b'::lquery; +SELECT E'!\\@b'::lquery; +SELECT E'!\\{b'::lquery; +SELECT E'!\\}b'::lquery; + +SELECT '"1"'::lquery; +SELECT '"2.*"'::lquery; +SELECT '!"1"'::lquery; +SELECT '!"1|"'::lquery; +SELECT '4|3|"2"'::lquery; +SELECT '"1".2'::lquery; +SELECT '"1.4"|"3"|2'::lquery; +SELECT '"1"."4"|"3"|"2"'::lquery; +SELECT '"1"."0"'::lquery; +SELECT '"1".0'::lquery; +SELECT '"1".*'::lquery; +SELECT '4|"3"|2.*'::lquery; +SELECT '4|"3"|"2.*"'::lquery; +SELECT '2."*"'::lquery; +SELECT '"*".1."*"'::lquery; +SELECT '"*.4"|3|2.*'::lquery; +SELECT '"*.4"|3|"2.*"'::lquery; +SELECT '1.*.4|3|2.*{,4}'::lquery; +SELECT '1.*.4|3|2.*{1,}'::lquery; +SELECT '1.*.4|3|2.*{1}'::lquery; +SELECT '"qwerty"%@*.tu'::lquery; + +SELECT '1.*.4|3|"2".*{1,4}'::lquery; +SELECT '1."*".4|3|"2".*{1,4}'::lquery; +SELECT '\%\ \@'::lquery; +SELECT '"\% \@"'::lquery; + +SELECT E'\\aa.b.c.d.e'::ltree ~ 'A@.b.c.d.e'; +SELECT E'a\\a.b.c.\\d.e'::ltree ~ 'A*.b.c.d.e'; +SELECT E'a\\a.b.c.\\d.e'::ltree ~ E'A*@.b.c.d.\\e'; +SELECT E'a\\a.b.c.\\d.e'::ltree ~ E'A*@|\\g.b.c.d.e'; +--ltxtquery +SELECT '!"tree" & aWdf@*'::ltxtquery; +SELECT '"!tree" & aWdf@*'::ltxtquery; +SELECT E'tr\\ee'::ltree @ E'\\t\\r\\e\\e'::ltxtquery; +SELECT E'tr\\ee.awd\\fg'::ltree @ E'tre\\e & a\\Wdf@*'::ltxtquery; +SELECT 'tree & aw_qw%*'::ltxtquery; +SELECT 'tree."awdfg"'::ltree @ E'tree & a\\Wdf@*'::ltxtquery; +SELECT 'tree."awdfg"'::ltree @ E'tree & "a\\Wdf"@*'::ltxtquery; +SELECT 'tree.awdfg_qwerty'::ltree @ 'tree & aw_qw%*'::ltxtquery; +SELECT 'tree.awdfg_qwerty'::ltree @ 'tree & "aw_rw"%*'::ltxtquery; +SELECT 'tree.awdfg_qwerty'::ltree @ E'tree & "aw\\_qw"%*'::ltxtquery; +SELECT 'tree.awdfg_qwerty'::ltree @ E'tree & aw\\_qw%*'::ltxtquery; + +SELECT E'"a\\"b"'::ltxtquery; +SELECT '"a!b"'::ltxtquery; +SELECT '"a%b"'::ltxtquery; +SELECT '"a*b"'::ltxtquery; +SELECT '"a@b"'::ltxtquery; +SELECT '"a{b"'::ltxtquery; +SELECT '"a}b"'::ltxtquery; +SELECT '"a|b"'::ltxtquery; +SELECT '"a&b"'::ltxtquery; +SELECT '"a(b"'::ltxtquery; +SELECT '"a)b"'::ltxtquery; + +SELECT E'a\\"b'::ltxtquery; +SELECT E'a\\!b'::ltxtquery; +SELECT E'a\\%b'::ltxtquery; +SELECT E'a\\*b'::ltxtquery; +SELECT E'a\\@b'::ltxtquery; +SELECT E'a\\{b'::ltxtquery; +SELECT E'a\\}b'::ltxtquery; +SELECT E'a\\|b'::ltxtquery; +SELECT E'a\\&b'::ltxtquery; +SELECT E'a\\(b'::ltxtquery; +SELECT E'a\\)b'::ltxtquery; + +SELECT E'"\\"b"'::ltxtquery; +SELECT '"!b"'::ltxtquery; +SELECT '"%b"'::ltxtquery; +SELECT '"*b"'::ltxtquery; +SELECT '"@b"'::ltxtquery; +SELECT '"{b"'::ltxtquery; +SELECT '"}b"'::ltxtquery; +SELECT '"|b"'::ltxtquery; +SELECT '"&b"'::ltxtquery; +SELECT '"(b"'::ltxtquery; +SELECT '")b"'::ltxtquery; + +SELECT E'\\"b'::ltxtquery; +SELECT E'\\!b'::ltxtquery; +SELECT E'\\%b'::ltxtquery; +SELECT E'\\*b'::ltxtquery; +SELECT E'\\@b'::ltxtquery; +SELECT E'\\{b'::ltxtquery; +SELECT E'\\}b'::ltxtquery; +SELECT E'\\|b'::ltxtquery; +SELECT E'\\&b'::ltxtquery; +SELECT E'\\(b'::ltxtquery; +SELECT E'\\)b'::ltxtquery; + +SELECT E'"a\\""'::ltxtquery; +SELECT '"a!"'::ltxtquery; +SELECT '"a%"'::ltxtquery; +SELECT '"a*"'::ltxtquery; +SELECT '"a@"'::ltxtquery; +SELECT '"a{"'::ltxtquery; +SELECT '"a}"'::ltxtquery; +SELECT '"a|"'::ltxtquery; +SELECT '"a&"'::ltxtquery; +SELECT '"a("'::ltxtquery; +SELECT '"a)"'::ltxtquery; + +SELECT E'a\\"'::ltxtquery; +SELECT E'a\\!'::ltxtquery; +SELECT E'a\\%'::ltxtquery; +SELECT E'a\\*'::ltxtquery; +SELECT E'a\\@'::ltxtquery; +SELECT E'a\\{'::ltxtquery; +SELECT E'a\\}'::ltxtquery; +SELECT E'a\\|'::ltxtquery; +SELECT E'a\\&'::ltxtquery; +SELECT E'a\\('::ltxtquery; +SELECT E'a\\)'::ltxtquery; + +--failures +SELECT E'\\'::ltree; +SELECT E'n\\'::ltree; +SELECT '"'::ltree; +SELECT '"a'::ltree; +SELECT '""'::ltree; +SELECT 'a"b'::ltree; +SELECT E'\\"ab"'::ltree; +SELECT '"a"."a'::ltree; +SELECT '"a."a"'::ltree; +SELECT '"".a'::ltree; +SELECT 'a.""'::ltree; +SELECT '"".""'::ltree; +SELECT '""'::lquery; +SELECT '"".""'::lquery; +SELECT 'a.""'::lquery; +SELECT ' . '::ltree; +SELECT ' . '::lquery; +SELECT ' | '::lquery; + +SELECT( +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'z\z\z\z\z\z')::ltree; +SELECT( +'"01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\z\z\z\z\z\z"')::ltree; + +SELECT '"'::lquery; +SELECT '"a'::lquery; +SELECT '"a"."a'::lquery; +SELECT '"a."a"'::lquery; + +SELECT E'\\"ab"'::lquery; +SELECT 'a"b'::lquery; +SELECT 'a!b'::lquery; +SELECT 'a%b'::lquery; +SELECT 'a*b'::lquery; +SELECT 'a@b'::lquery; +SELECT 'a{b'::lquery; +SELECT 'a}b'::lquery; + +SELECT 'a!'::lquery; +SELECT 'a{'::lquery; +SELECT 'a}'::lquery; + +SELECT '%b'::lquery; +SELECT '*b'::lquery; +SELECT '@b'::lquery; +SELECT '{b'::lquery; +SELECT '}b'::lquery; + +SELECT '!%b'::lquery; +SELECT '!*b'::lquery; +SELECT '!@b'::lquery; +SELECT '!{b'::lquery; +SELECT '!}b'::lquery; + +SELECT '"qwert"y.tu'::lquery; +SELECT 'q"wert"y"%@*.tu'::lquery; + +SELECT( +'"01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'01234567890123456789012345678901234567890123456789' || +'\z\z\z\z\z\z"')::lquery; + +SELECT 'a | ""'::ltxtquery; +SELECT '"" & ""'::ltxtquery; +SELECT 'a.""'::ltxtquery; +SELECT '"'::ltxtquery; + +SELECT '"""'::ltxtquery; +SELECT '"a'::ltxtquery; +SELECT '"a" & "a'::ltxtquery; +SELECT '"a | "a"'::ltxtquery; +SELECT '"!tree" & aWdf@*"'::ltxtquery; + +SELECT 'a"b'::ltxtquery; +SELECT 'a!b'::ltxtquery; +SELECT 'a%b'::ltxtquery; +SELECT 'a*b'::ltxtquery; +SELECT 'a@b'::ltxtquery; +SELECT 'a{b'::ltxtquery; +SELECT 'a}b'::ltxtquery; +SELECT 'a|b'::ltxtquery; +SELECT 'a&b'::ltxtquery; +SELECT 'a(b'::ltxtquery; +SELECT 'a)b'::ltxtquery; + +SELECT '"b'::ltxtquery; +SELECT '%b'::ltxtquery; +SELECT '*b'::ltxtquery; +SELECT '@b'::ltxtquery; +SELECT '{b'::ltxtquery; +SELECT '}b'::ltxtquery; +SELECT '|b'::ltxtquery; +SELECT '&b'::ltxtquery; +SELECT '(b'::ltxtquery; +SELECT ')b'::ltxtquery; + +SELECT 'a"'::ltxtquery; +SELECT 'a!'::ltxtquery; +SELECT 'a{'::ltxtquery; +SELECT 'a}'::ltxtquery; +SELECT 'a|'::ltxtquery; +SELECT 'a&'::ltxtquery; +SELECT 'a('::ltxtquery; +SELECT 'a)'::ltxtquery; + diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml index b4e07f6..f6d8541 100644 --- a/doc/src/sgml/ltree.sgml +++ b/doc/src/sgml/ltree.sgml @@ -23,14 +23,36 @@ Definitions - A label is a sequence of alphanumeric characters - and underscores (for example, in C locale the characters - A-Za-z0-9_ are allowed). Labels must be less than 256 bytes - long. + A label is a sequence of characters. Labels must be + fewer than 256 characters in length. Label may contain any character supported + by PostgreSQL except \0. + If label contains characters other than alphanumeric characters and + underscores, they should be escaped. + Escaping can be done with either by a preceding backslash (\\) + symbol or by wrapping the whole label in double quotes ("). + Initial and final unescaped whitespace is stripped. - Examples: 42, Personal_Services + Examples: 42, Personal_Services, + "This is a literal", Literal\\ with\\ spaces. + + + + During converting to internal representation, wrapping double quotes + and escaping backslashes are removed. During converting from internal + representation to text, if the label contain only alphanumeric characters + and underscores, it is printed as is. Otherwise, it is wrapped in quotes and, + if there are internal quotes or backslashes, they are escaped with backslashes. + + + + Examples: 42, "\\42", + \\4\\2, 42 and "42" + will have the same internal representation and, being + converted from internal representation, will become 42. + Literal abc def will turn into "abc + def". @@ -687,11 +709,13 @@ ltreetest=> SELECT ins_label(path,2,'Space') FROM test WHERE path <@ 'Top. Authors - All work was done by Teodor Sigaev (teodor@stack.net) and + Initial version was done by Teodor Sigaev (teodor@sigaev.ru) and Oleg Bartunov (oleg@sai.msu.su). See for additional information. Authors would like to thank Eugeny Rodichev for - helpful discussions. Comments and bug reports are welcome. + helpful discussions. Implementation of escaping syntax was done by Dmitry Belyavskiy + (beldmit@gmail.com) directed by Teodor Sigaev. + Comments and bug reports are welcome. -- 2.7.4