diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index a5b6adc4bb..2bc9060e47 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -3108,6 +3108,80 @@ repeat('Pg', 4) PgPgPgPg
+
+
+
+ regexp_count
+
+ regexp_count ( string text, pattern text
+ [, start integer
+ [, flags text ] ] )
+ integer
+
+
+ Returns the number of times the POSIX regular
+ expression pattern matches in
+ the string; see
+ .
+
+
+ regexp_count('123456789012', '\d\d\d', 2)
+ 3
+
+
+
+
+
+
+ regexp_instr
+
+ regexp_instr ( string text, pattern text
+ [, start integer
+ [, N integer
+ [, endoption integer
+ [, flags text
+ [, subexpr integer ] ] ] ] ] )
+ integer
+
+
+ Returns the position within string where
+ the N'th match of the POSIX regular
+ expression pattern occurs, or zero if there is
+ no such match; see .
+
+
+ regexp_instr('ABCDEF', 'c(.)(..)', 1, 1, 0, 'i')
+ 3
+
+
+ regexp_instr('ABCDEF', 'c(.)(..)', 1, 1, 0, 'i', 2)
+ 5
+
+
+
+
+
+
+ regexp_like
+
+ regexp_like ( string text, pattern text
+ [, start integer
+ [, flags text ] ] )
+ boolean
+
+
+ Checks whether a match of the POSIX regular
+ expression pattern occurs
+ within string starting at
+ the start'th character; see
+ .
+
+
+ regexp_like('Hello World', 'world$', 1, 'i')
+ t
+
+
+
@@ -3117,8 +3191,9 @@ repeat('Pg', 4) PgPgPgPg
text[]
- Returns captured substrings resulting from the first match of a POSIX
- regular expression to the string; see
+ Returns captured substrings resulting from the first match of the
+ POSIX regular expression pattern to
+ the string; see
.
@@ -3136,10 +3211,11 @@ repeat('Pg', 4) PgPgPgPg
setof text[]
- Returns captured substrings resulting from the first match of a
- POSIX regular expression to the string,
- or multiple matches if the g flag is used;
- see .
+ Returns captured substrings resulting from the first match of the
+ POSIX regular expression pattern to
+ the string, or all matches if
+ the g flag is used; see
+ .
regexp_matches('foobarbequebaz', 'ba.', 'g')
@@ -3156,14 +3232,16 @@ repeat('Pg', 4) PgPgPgPg
regexp_replace
- regexp_replace ( string text, pattern text, replacement text [, flags text ] )
+ regexp_replace ( string text, pattern text, replacement text
+ [, start integer ]
+ [, flags text ] )
text
- Replaces substrings resulting from the first match of a
- POSIX regular expression, or multiple substring matches
- if the g flag is used; see .
+ Replaces the substring that is the first match to the POSIX
+ regular expression pattern, or all matches
+ if the g flag is used; see
+ .
regexp_replace('Thomas', '.[mN]a.', 'M')
@@ -3171,6 +3249,26 @@ repeat('Pg', 4) PgPgPgPg
+
+
+ regexp_replace ( string text, pattern text, replacement text,
+ start integer,
+ N integer
+ [, flags text ] )
+ text
+
+
+ Replaces the substring that is the N'th
+ match to the POSIX regular expression pattern,
+ or all matches if N is zero; see
+ .
+
+
+ regexp_replace('Thomas', '.', 'X', 3, 2)
+ ThoXas
+
+
+
@@ -3213,6 +3311,35 @@ repeat('Pg', 4) PgPgPgPg
+
+
+
+ regexp_substr
+
+ regexp_substr ( string text, pattern text
+ [, start integer
+ [, N integer
+ [, flags text
+ [, subexpr integer ] ] ] ] )
+ text
+
+
+ Returns the substring within string that
+ matches the N'th occurrence of the POSIX
+ regular expression pattern,
+ or NULL if there is no such match; see
+ .
+
+
+ regexp_substr('ABCDEF', 'c(.)(..)', 1, 1, 'i')
+ CDEF
+
+
+ regexp_substr('ABCDEF', 'c(.)(..)', 1, 1, 'i', 2)
+ EF
+
+
+
@@ -5377,6 +5504,15 @@ substring('foobar' similar '#"o_b#"%' escape '#') NULL
substring
+
+ regexp_count
+
+
+ regexp_instr
+
+
+ regexp_like
+
regexp_match
@@ -5392,6 +5528,9 @@ substring('foobar' similar '#"o_b#"%' escape '#') NULL
regexp_split_to_array
+
+ regexp_substr
+
lists the available
@@ -5542,6 +5681,114 @@ substring('foobar' from 'o(.)b') o
+
+ The regexp_count function counts the number of
+ places where a POSIX regular expression pattern matches a string.
+ It has the syntax
+ regexp_count(string,
+ pattern
+ , start
+ , flags
+ ).
+ pattern is searched for
+ in string, normally from the beginning of
+ the string, but if the start parameter is
+ provided then beginning from that character index.
+ The flags parameter is an optional text
+ string containing zero or more single-letter flags that change the
+ function's behavior. For example, including i in
+ flags specifies case-insensitive matching.
+ Supported flags are described in
+ .
+
+
+
+ Some examples:
+
+regexp_count('ABCABCAXYaxy', 'A.') 3
+regexp_count('ABCABCAXYaxy', 'A.', 1, 'i') 4
+
+
+
+
+ The regexp_instr function returns the starting or
+ ending position of the N'th match of a
+ POSIX regular expression pattern to a string, or zero if there is no
+ such match. It has the syntax
+ regexp_instr(string,
+ pattern
+ , start
+ , N
+ , endoption
+ , flags
+ , subexpr
+ ).
+ pattern is searched for
+ in string, normally from the beginning of
+ the string, but if the start parameter is
+ provided then beginning from that character index.
+ If N is specified
+ then the N'th match of the pattern
+ is located, otherwise the first match is located.
+ If the endoption parameter is omitted or
+ specified as zero, the function returns the position of the first
+ character of the match. Otherwise, endoption
+ must be one, and the function returns the position of the character
+ following the match.
+ The flags parameter is an optional text
+ string containing zero or more single-letter flags that change the
+ function's behavior. Supported flags are described
+ in .
+ For a pattern containing parenthesized
+ subexpressions, subexpr is an integer
+ indicating which subexpression is of interest: the result identifies
+ the position of the substring matching that subexpression.
+ Subexpressions are numbered in the order of their leading parentheses.
+ When subexpr is omitted or zero, the result
+ identifies the position of the whole match regardless of
+ parenthesized subexpressions.
+
+
+
+ Some examples:
+
+regexp_instr('number of your street, town zip, FR', '[^,]+', 1, 2)
+ 23
+regexp_instr('ABCDEFGHI', '(c..)(...)', 1, 1, 0, 'i', 2)
+ 6
+
+
+
+
+ The regexp_like function checks whether a match
+ of a POSIX regular expression pattern occurs within a string,
+ returning boolean true or false. It has the syntax
+ regexp_like(string,
+ pattern
+ , start
+ , flags ).
+ pattern is searched for
+ in string, normally from the beginning of
+ the string, but if the start parameter is
+ provided then beginning from that character index.
+ The flags parameter is an optional text
+ string containing zero or more single-letter flags that change the
+ function's behavior. Supported flags are described
+ in .
+ This function has the same results as the ~
+ operator if no flags are specified. If only the i
+ flag is specified, it has the same results as
+ the ~* operator.
+
+
+
+ Some examples:
+
+regexp_like('Hello World', 'world') false
+regexp_like('Hello World', 'world', 1, 'i') true
+
+
+
The regexp_match function returns a text array of
captured substring(s) resulting from the first match of a POSIX
@@ -5579,8 +5826,17 @@ SELECT regexp_match('foobarbequebaz', '(bar)(beque)');
{bar,beque}
(1 row)
- In the common case where you just want the whole matching substring
- or NULL for no match, write something like
+
+
+
+
+ In the common case where you just want the whole matching substring
+ or NULL for no match, the best solution is to
+ use regexp_substr().
+ However, regexp_substr() only exists
+ in PostgreSQL version 15 and up. When
+ working in older versions, you can extract the first element
+ of regexp_match()'s result, for example:
SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1];
regexp_match
@@ -5588,7 +5844,8 @@ SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1];
barbeque
(1 row)
-
+
+
The regexp_matches function returns a set of text arrays
@@ -5650,7 +5907,13 @@ SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab;
It has the syntax
regexp_replace(source,
pattern, replacement
+ , start
+ , N
+
, flags ).
+ (Notice that N cannot be specified
+ unless start is,
+ but flags can be given in any case.)
The source string is returned unchanged if
there is no match to the pattern. If there is a
match, the source string is returned with the
@@ -5663,11 +5926,22 @@ SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab;
substring matching the entire pattern should be inserted. Write
\\ if you need to put a literal backslash in the replacement
text.
+ pattern is searched for
+ in string, normally from the beginning of
+ the string, but if the start parameter is
+ provided then beginning from that character index.
+ By default, only the first match of the pattern is replaced.
+ If N is specified and is greater than zero,
+ then the N'th match of the pattern
+ is replaced.
+ If the g flag is given, or
+ if N is specified and is zero, then all
+ matches at or after the start position are
+ replaced. (The g flag is ignored
+ when N is specified.)
The flags parameter is an optional text
string containing zero or more single-letter flags that change the
- function's behavior. Flag i specifies case-insensitive
- matching, while flag g specifies replacement of each matching
- substring rather than only the first one. Supported flags (though
+ function's behavior. Supported flags (though
not g) are
described in .
@@ -5681,6 +5955,10 @@ regexp_replace('foobarbaz', 'b..', 'X', 'g')
fooXX
regexp_replace('foobarbaz', 'b(..)', 'X\1Y', 'g')
fooXarYXazY
+regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 0, 'i')
+ X PXstgrXSQL fXnctXXn
+regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 3, 'i')
+ A PostgrXSQL function
@@ -5712,7 +5990,6 @@ regexp_replace('foobarbaz', 'b(..)', 'X\1Y', 'g')
Some examples:
-
SELECT foo FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', '\s+') AS foo;
foo
-------
@@ -5761,11 +6038,51 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo;
zero-length matches that occur at the start or end of the string
or immediately after a previous match. This is contrary to the strict
definition of regexp matching that is implemented by
- regexp_match and
- regexp_matches, but is usually the most convenient behavior
+ the other regexp functions, but is usually the most convenient behavior
in practice. Other software systems such as Perl use similar definitions.
+
+ The regexp_substr function returns the substring
+ that matches a POSIX regular expression pattern,
+ or NULL if there is no match. It has the syntax
+ regexp_substr(string,
+ pattern
+ , start
+ , N
+ , flags
+ , subexpr
+ ).
+ pattern is searched for
+ in string, normally from the beginning of
+ the string, but if the start parameter is
+ provided then beginning from that character index.
+ If N is specified
+ then the N'th match of the pattern
+ is returned, otherwise the first match is returned.
+ The flags parameter is an optional text
+ string containing zero or more single-letter flags that change the
+ function's behavior. Supported flags are described
+ in .
+ For a pattern containing parenthesized
+ subexpressions, subexpr is an integer
+ indicating which subexpression is of interest: the result is the
+ substring matching that subexpression.
+ Subexpressions are numbered in the order of their leading parentheses.
+ When subexpr is omitted or zero, the result
+ is the whole match regardless of parenthesized subexpressions.
+
+
+
+ Some examples:
+
+regexp_substr('number of your street, town zip, FR', '[^,]+', 1, 2)
+ town zip
+regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2)
+ FGH
+
+
+
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index a32c5c82ab..ede20a4d14 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -113,6 +113,7 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
/* Local functions */
static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
pg_re_flags *flags,
+ int start_search,
Oid collation,
bool use_subpatterns,
bool ignore_degenerate,
@@ -629,7 +630,7 @@ textregexreplace_noopt(PG_FUNCTION_ARGS)
re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());
- PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, false));
+ PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, 0, 1));
}
/*
@@ -646,11 +647,97 @@ textregexreplace(PG_FUNCTION_ARGS)
regex_t *re;
pg_re_flags flags;
+ /*
+ * regexp_replace() with four arguments will be preferentially resolved as
+ * this form when the fourth argument is of type UNKNOWN. However, the
+ * user might have intended to call textregexreplace_extended_no_n. If we
+ * see flags that look like an integer, emit the same error that
+ * parse_re_flags would, but add a HINT about how to fix it.
+ */
+ if (VARSIZE_ANY_EXHDR(opt) > 0)
+ {
+ char *opt_p = VARDATA_ANY(opt);
+
+ if (*opt_p >= '0' && *opt_p <= '9')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid regular expression option: \"%.*s\"",
+ pg_mblen(opt_p), opt_p),
+ errhint("If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly.")));
+ }
+
parse_re_flags(&flags, opt);
re = RE_compile_and_cache(p, flags.cflags, PG_GET_COLLATION());
- PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, flags.glob));
+ PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, 0,
+ flags.glob ? 0 : 1));
+}
+
+/*
+ * textregexreplace_extended()
+ * Return a string matched by a regular expression, with replacement.
+ * Extends textregexreplace by allowing a start position and the
+ * choice of the occurrence to replace (0 means all occurrences).
+ */
+Datum
+textregexreplace_extended(PG_FUNCTION_ARGS)
+{
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+ text *r = PG_GETARG_TEXT_PP(2);
+ int start = 1;
+ int n = 1;
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5);
+ pg_re_flags re_flags;
+ regex_t *re;
+
+ /* Collect optional parameters */
+ if (PG_NARGS() > 3)
+ {
+ start = PG_GETARG_INT32(3);
+ if (start <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "start", start)));
+ }
+ if (PG_NARGS() > 4)
+ {
+ n = PG_GETARG_INT32(4);
+ if (n < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "n", n)));
+ }
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+
+ /* If N was not specified, deduce it from the 'g' flag */
+ if (PG_NARGS() <= 4)
+ n = re_flags.glob ? 0 : 1;
+
+ /* Compile the regular expression */
+ re = RE_compile_and_cache(p, re_flags.cflags, PG_GET_COLLATION());
+
+ /* Do the replacement(s) */
+ PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, start - 1, n));
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+textregexreplace_extended_no_n(PG_FUNCTION_ARGS)
+{
+ return textregexreplace_extended(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+textregexreplace_extended_no_flags(PG_FUNCTION_ARGS)
+{
+ return textregexreplace_extended(fcinfo);
}
/*
@@ -958,6 +1045,255 @@ similar_escape(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(result);
}
+/*
+ * regexp_count()
+ * Return the number of matches of a pattern within a string.
+ */
+Datum
+regexp_count(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ int start = 1;
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(3);
+ pg_re_flags re_flags;
+ regexp_matches_ctx *matchctx;
+
+ /* Collect optional parameters */
+ if (PG_NARGS() > 2)
+ {
+ start = PG_GETARG_INT32(2);
+ if (start <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "start", start)));
+ }
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+ /* User mustn't specify 'g' */
+ if (re_flags.glob)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ /* translator: %s is a SQL function name */
+ errmsg("%s does not support the \"global\" option",
+ "regexp_count()")));
+ /* But we find all the matches anyway */
+ re_flags.glob = true;
+
+ /* Do the matching */
+ matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
+ PG_GET_COLLATION(),
+ false, /* can ignore subexprs */
+ false, false);
+
+ PG_RETURN_INT32(matchctx->nmatches);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_count_no_start(PG_FUNCTION_ARGS)
+{
+ return regexp_count(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_count_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_count(fcinfo);
+}
+
+/*
+ * regexp_instr()
+ * Return the match's position within the string
+ */
+Datum
+regexp_instr(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ int start = 1;
+ int n = 1;
+ int endoption = 0;
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5);
+ int subexpr = 0;
+ int pos;
+ pg_re_flags re_flags;
+ regexp_matches_ctx *matchctx;
+
+ /* Collect optional parameters */
+ if (PG_NARGS() > 2)
+ {
+ start = PG_GETARG_INT32(2);
+ if (start <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "start", start)));
+ }
+ if (PG_NARGS() > 3)
+ {
+ n = PG_GETARG_INT32(3);
+ if (n <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "n", n)));
+ }
+ if (PG_NARGS() > 4)
+ {
+ endoption = PG_GETARG_INT32(4);
+ if (endoption != 0 && endoption != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "endoption", endoption)));
+ }
+ if (PG_NARGS() > 6)
+ {
+ subexpr = PG_GETARG_INT32(6);
+ if (subexpr < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "subexpr", subexpr)));
+ }
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+ /* User mustn't specify 'g' */
+ if (re_flags.glob)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ /* translator: %s is a SQL function name */
+ errmsg("%s does not support the \"global\" option",
+ "regexp_instr()")));
+ /* But we find all the matches anyway */
+ re_flags.glob = true;
+
+ /* Do the matching */
+ matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
+ PG_GET_COLLATION(),
+ (subexpr > 0), /* need submatches? */
+ false, false);
+
+ /* When n exceeds matches return 0 (includes case of no matches) */
+ if (n > matchctx->nmatches)
+ PG_RETURN_INT32(0);
+
+ /* When subexpr exceeds number of subexpressions return 0 */
+ if (subexpr > matchctx->npatterns)
+ PG_RETURN_INT32(0);
+
+ /* Select the appropriate match position to return */
+ pos = (n - 1) * matchctx->npatterns;
+ if (subexpr > 0)
+ pos += subexpr - 1;
+ pos *= 2;
+ if (endoption == 1)
+ pos += 1;
+
+ if (matchctx->match_locs[pos] >= 0)
+ PG_RETURN_INT32(matchctx->match_locs[pos] + 1);
+ else
+ PG_RETURN_INT32(0); /* position not identifiable */
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_start(PG_FUNCTION_ARGS)
+{
+ return regexp_instr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_n(PG_FUNCTION_ARGS)
+{
+ return regexp_instr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_endoption(PG_FUNCTION_ARGS)
+{
+ return regexp_instr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_instr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_subexpr(PG_FUNCTION_ARGS)
+{
+ return regexp_instr(fcinfo);
+}
+
+/*
+ * regexp_like()
+ * Test for a pattern match within a string.
+ */
+Datum
+regexp_like(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ int start = 1;
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(3);
+ pg_re_flags re_flags;
+ regexp_matches_ctx *matchctx;
+
+ /* Collect optional parameters */
+ if (PG_NARGS() > 2)
+ {
+ start = PG_GETARG_INT32(2);
+ if (start <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "start", start)));
+ }
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+ /* User mustn't specify 'g' */
+ if (re_flags.glob)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ /* translator: %s is a SQL function name */
+ errmsg("%s does not support the \"global\" option",
+ "regexp_like()")));
+
+ /* Do the matching */
+ matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
+ PG_GET_COLLATION(),
+ false, /* can ignore subexprs */
+ false, false);
+
+ PG_RETURN_BOOL((matchctx->nmatches > 0) ? true : false);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_like_no_start(PG_FUNCTION_ARGS)
+{
+ return regexp_like(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_like_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_like(fcinfo);
+}
+
/*
* regexp_match()
* Return the first substring(s) matching a pattern within a string.
@@ -982,7 +1318,7 @@ regexp_match(PG_FUNCTION_ARGS)
"regexp_match()"),
errhint("Use the regexp_matches function instead.")));
- matchctx = setup_regexp_matches(orig_str, pattern, &re_flags,
+ matchctx = setup_regexp_matches(orig_str, pattern, &re_flags, 0,
PG_GET_COLLATION(), true, false, false);
if (matchctx->nmatches == 0)
@@ -1029,7 +1365,7 @@ regexp_matches(PG_FUNCTION_ARGS)
/* be sure to copy the input string into the multi-call ctx */
matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
- &re_flags,
+ &re_flags, 0,
PG_GET_COLLATION(),
true, false, false);
@@ -1064,24 +1400,28 @@ regexp_matches_no_flags(PG_FUNCTION_ARGS)
}
/*
- * setup_regexp_matches --- do the initial matching for regexp_match
- * and regexp_split functions
+ * setup_regexp_matches --- do the initial matching for regexp_match,
+ * regexp_split, and related functions
*
* To avoid having to re-find the compiled pattern on each call, we do
* all the matching in one swoop. The returned regexp_matches_ctx contains
* the locations of all the substrings matching the pattern.
*
- * The three bool parameters have only two patterns (one for matching, one for
- * splitting) but it seems clearer to distinguish the functionality this way
- * than to key it all off one "is_split" flag. We don't currently assume that
- * fetching_unmatched is exclusive of fetching the matched text too; if it's
- * set, the conversion buffer is large enough to fetch any single matched or
- * unmatched string, but not any larger substring. (In practice, when splitting
- * the matches are usually small anyway, and it didn't seem worth complicating
- * the code further.)
+ * start_search: the character (not byte) offset in orig_str at which to
+ * begin the search. Returned positions are relative to orig_str anyway.
+ * use_subpatterns: collect data about matches to parenthesized subexpressions.
+ * ignore_degenerate: ignore zero-length matches.
+ * fetching_unmatched: caller wants to fetch unmatched substrings.
+ *
+ * We don't currently assume that fetching_unmatched is exclusive of fetching
+ * the matched text too; if it's set, the conversion buffer is large enough to
+ * fetch any single matched or unmatched string, but not any larger
+ * substring. (In practice, when splitting the matches are usually small
+ * anyway, and it didn't seem worth complicating the code further.)
*/
static regexp_matches_ctx *
setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
+ int start_search,
Oid collation,
bool use_subpatterns,
bool ignore_degenerate,
@@ -1099,7 +1439,6 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
int array_idx;
int prev_match_end;
int prev_valid_match_end;
- int start_search;
int maxlen = 0; /* largest fetch length in characters */
/* save original string --- we'll extract result substrings from it */
@@ -1142,7 +1481,6 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
/* search for the pattern, perhaps repeatedly */
prev_match_end = 0;
prev_valid_match_end = 0;
- start_search = 0;
while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
pmatch_len, pmatch))
{
@@ -1367,7 +1705,7 @@ regexp_split_to_table(PG_FUNCTION_ARGS)
/* be sure to copy the input string into the multi-call ctx */
splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
- &re_flags,
+ &re_flags, 0,
PG_GET_COLLATION(),
false, true, true);
@@ -1422,7 +1760,7 @@ regexp_split_to_array(PG_FUNCTION_ARGS)
splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
PG_GETARG_TEXT_PP(1),
- &re_flags,
+ &re_flags, 0,
PG_GET_COLLATION(),
false, true, true);
@@ -1489,6 +1827,125 @@ build_regexp_split_result(regexp_matches_ctx *splitctx)
}
}
+/*
+ * regexp_substr()
+ * Return the substring that matches a regular expression pattern
+ */
+Datum
+regexp_substr(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ int start = 1;
+ int n = 1;
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(4);
+ int subexpr = 0;
+ int so,
+ eo,
+ pos;
+ pg_re_flags re_flags;
+ regexp_matches_ctx *matchctx;
+
+ /* Collect optional parameters */
+ if (PG_NARGS() > 2)
+ {
+ start = PG_GETARG_INT32(2);
+ if (start <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "start", start)));
+ }
+ if (PG_NARGS() > 3)
+ {
+ n = PG_GETARG_INT32(3);
+ if (n <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "n", n)));
+ }
+ if (PG_NARGS() > 5)
+ {
+ subexpr = PG_GETARG_INT32(5);
+ if (subexpr < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "subexpr", subexpr)));
+ }
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+ /* User mustn't specify 'g' */
+ if (re_flags.glob)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ /* translator: %s is a SQL function name */
+ errmsg("%s does not support the \"global\" option",
+ "regexp_substr()")));
+ /* But we find all the matches anyway */
+ re_flags.glob = true;
+
+ /* Do the matching */
+ matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
+ PG_GET_COLLATION(),
+ (subexpr > 0), /* need submatches? */
+ false, false);
+
+ /* When n exceeds matches return NULL (includes case of no matches) */
+ if (n > matchctx->nmatches)
+ PG_RETURN_NULL();
+
+ /* When subexpr exceeds number of subexpressions return NULL */
+ if (subexpr > matchctx->npatterns)
+ PG_RETURN_NULL();
+
+ /* Select the appropriate match position to return */
+ pos = (n - 1) * matchctx->npatterns;
+ if (subexpr > 0)
+ pos += subexpr - 1;
+ pos *= 2;
+ so = matchctx->match_locs[pos];
+ eo = matchctx->match_locs[pos + 1];
+
+ if (so < 0 || eo < 0)
+ PG_RETURN_NULL(); /* unidentifiable location */
+
+ PG_RETURN_DATUM(DirectFunctionCall3(text_substr,
+ PointerGetDatum(matchctx->orig_str),
+ Int32GetDatum(so + 1),
+ Int32GetDatum(eo - so)));
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_substr_no_start(PG_FUNCTION_ARGS)
+{
+ return regexp_substr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_substr_no_n(PG_FUNCTION_ARGS)
+{
+ return regexp_substr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_substr_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_substr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_substr_no_subexpr(PG_FUNCTION_ARGS)
+{
+ return regexp_substr(fcinfo);
+}
+
/*
* regexp_fixed_prefix - extract fixed prefix, if any, for a regexp
*
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d2a11b1b5d..a0bde4e352 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -4496,23 +4496,28 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
/*
* replace_text_regexp
*
- * replace text that matches to regexp in src_text to replace_text.
+ * replace text that matches to regexp in src_text with replace_text.
+ *
+ * search_start: the character (not byte) offset in src_text at which to
+ * begin searching.
+ * n: if 0, replace all matches; if > 0, replace only the N'th match.
*
* Note: to avoid having to include regex.h in builtins.h, we declare
* the regexp argument as void *, but really it's regex_t *.
*/
text *
replace_text_regexp(text *src_text, void *regexp,
- text *replace_text, bool glob)
+ text *replace_text,
+ int search_start, int n)
{
text *ret_text;
regex_t *re = (regex_t *) regexp;
int src_text_len = VARSIZE_ANY_EXHDR(src_text);
+ int nmatches = 0;
StringInfoData buf;
regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
pg_wchar *data;
size_t data_len;
- int search_start;
int data_pos;
char *start_ptr;
bool have_escape;
@@ -4530,7 +4535,6 @@ replace_text_regexp(text *src_text, void *regexp,
start_ptr = (char *) VARDATA_ANY(src_text);
data_pos = 0;
- search_start = 0;
while (search_start <= data_len)
{
int regexec_result;
@@ -4560,6 +4564,23 @@ replace_text_regexp(text *src_text, void *regexp,
errmsg("regular expression failed: %s", errMsg)));
}
+ /*
+ * Count matches, and decide whether to replace this match.
+ */
+ nmatches++;
+ if (n > 0 && nmatches != n)
+ {
+ /*
+ * No, so advance search_start, but not start_ptr/data_pos. (Thus,
+ * we treat the matched text as if it weren't matched, and copy it
+ * to the output later.)
+ */
+ search_start = pmatch[0].rm_eo;
+ if (pmatch[0].rm_so == pmatch[0].rm_eo)
+ search_start++;
+ continue;
+ }
+
/*
* Copy the text to the left of the match position. Note we are given
* character not byte indexes.
@@ -4596,9 +4617,9 @@ replace_text_regexp(text *src_text, void *regexp,
data_pos = pmatch[0].rm_eo;
/*
- * When global option is off, replace the first instance only.
+ * If we only want to replace one occurrence, we're done.
*/
- if (!glob)
+ if (n > 0)
break;
/*
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 8cd0252082..32e5d25714 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3565,6 +3565,18 @@
{ oid => '2285', descr => 'replace text using regexp',
proname => 'regexp_replace', prorettype => 'text',
proargtypes => 'text text text text', prosrc => 'textregexreplace' },
+{ oid => '9611', descr => 'replace text using regexp',
+ proname => 'regexp_replace', prorettype => 'text',
+ proargtypes => 'text text text int4 int4 text',
+ prosrc => 'textregexreplace_extended' },
+{ oid => '9612', descr => 'replace text using regexp',
+ proname => 'regexp_replace', prorettype => 'text',
+ proargtypes => 'text text text int4 int4',
+ prosrc => 'textregexreplace_extended_no_flags' },
+{ oid => '9613', descr => 'replace text using regexp',
+ proname => 'regexp_replace', prorettype => 'text',
+ proargtypes => 'text text text int4',
+ prosrc => 'textregexreplace_extended_no_n' },
{ oid => '3396', descr => 'find first match for regexp',
proname => 'regexp_match', prorettype => '_text', proargtypes => 'text text',
prosrc => 'regexp_match_no_flags' },
@@ -3579,6 +3591,61 @@
proname => 'regexp_matches', prorows => '10', proretset => 't',
prorettype => '_text', proargtypes => 'text text text',
prosrc => 'regexp_matches' },
+{ oid => '9614', descr => 'count regexp matches',
+ proname => 'regexp_count', prorettype => 'int4', proargtypes => 'text text',
+ prosrc => 'regexp_count_no_start' },
+{ oid => '9615', descr => 'count regexp matches',
+ proname => 'regexp_count', prorettype => 'int4',
+ proargtypes => 'text text int4', prosrc => 'regexp_count_no_flags' },
+{ oid => '9616', descr => 'count regexp matches',
+ proname => 'regexp_count', prorettype => 'int4',
+ proargtypes => 'text text int4 text', prosrc => 'regexp_count' },
+{ oid => '9617', descr => 'position of regexp match',
+ proname => 'regexp_instr', prorettype => 'int4', proargtypes => 'text text',
+ prosrc => 'regexp_instr_no_start' },
+{ oid => '9618', descr => 'position of regexp match',
+ proname => 'regexp_instr', prorettype => 'int4',
+ proargtypes => 'text text int4', prosrc => 'regexp_instr_no_n' },
+{ oid => '9619', descr => 'position of regexp match',
+ proname => 'regexp_instr', prorettype => 'int4',
+ proargtypes => 'text text int4 int4', prosrc => 'regexp_instr_no_endoption' },
+{ oid => '9620', descr => 'position of regexp match',
+ proname => 'regexp_instr', prorettype => 'int4',
+ proargtypes => 'text text int4 int4 int4',
+ prosrc => 'regexp_instr_no_flags' },
+{ oid => '9621', descr => 'position of regexp match',
+ proname => 'regexp_instr', prorettype => 'int4',
+ proargtypes => 'text text int4 int4 int4 text',
+ prosrc => 'regexp_instr_no_subexpr' },
+{ oid => '9622', descr => 'position of regexp match',
+ proname => 'regexp_instr', prorettype => 'int4',
+ proargtypes => 'text text int4 int4 int4 text int4',
+ prosrc => 'regexp_instr' },
+{ oid => '9623', descr => 'test for regexp match',
+ proname => 'regexp_like', prorettype => 'bool', proargtypes => 'text text',
+ prosrc => 'regexp_like_no_start' },
+{ oid => '9624', descr => 'test for regexp match',
+ proname => 'regexp_like', prorettype => 'bool',
+ proargtypes => 'text text int4', prosrc => 'regexp_like_no_flags' },
+{ oid => '9630', descr => 'test for regexp match',
+ proname => 'regexp_like', prorettype => 'bool',
+ proargtypes => 'text text int4 text', prosrc => 'regexp_like' },
+{ oid => '9625', descr => 'extract substring that matches regexp',
+ proname => 'regexp_substr', prorettype => 'text', proargtypes => 'text text',
+ prosrc => 'regexp_substr_no_start' },
+{ oid => '9626', descr => 'extract substring that matches regexp',
+ proname => 'regexp_substr', prorettype => 'text',
+ proargtypes => 'text text int4', prosrc => 'regexp_substr_no_n' },
+{ oid => '9627', descr => 'extract substring that matches regexp',
+ proname => 'regexp_substr', prorettype => 'text',
+ proargtypes => 'text text int4 int4', prosrc => 'regexp_substr_no_flags' },
+{ oid => '9628', descr => 'extract substring that matches regexp',
+ proname => 'regexp_substr', prorettype => 'text',
+ proargtypes => 'text text int4 int4 text',
+ prosrc => 'regexp_substr_no_subexpr' },
+{ oid => '9629', descr => 'extract substring that matches regexp',
+ proname => 'regexp_substr', prorettype => 'text',
+ proargtypes => 'text text int4 int4 text int4', prosrc => 'regexp_substr' },
{ oid => '2088', descr => 'split string by field_sep and return field_num',
proname => 'split_part', prorettype => 'text',
proargtypes => 'text text int4', prosrc => 'split_part' },
diff --git a/src/include/utils/varlena.h b/src/include/utils/varlena.h
index 5c39723332..6645e2af13 100644
--- a/src/include/utils/varlena.h
+++ b/src/include/utils/varlena.h
@@ -34,6 +34,7 @@ extern bool SplitDirectoriesString(char *rawstring, char separator,
extern bool SplitGUCList(char *rawstring, char separator,
List **namelist);
extern text *replace_text_regexp(text *src_text, void *regexp,
- text *replace_text, bool glob);
+ text *replace_text,
+ int search_start, int n);
#endif
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 91aa819804..2359105348 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -515,6 +515,13 @@ SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
cde
(1 row)
+-- Check case where we have a match, but not a subexpression match
+SELECT SUBSTRING('foo' FROM 'foo(bar)?') IS NULL AS t;
+ t
+---
+ t
+(1 row)
+
-- Check behavior of SIMILAR TO, which uses largely the same regexp variant
SELECT 'abcdefg' SIMILAR TO '_bcd%' AS true;
true
@@ -592,6 +599,378 @@ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
-- invalid regexp option
SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
ERROR: invalid regular expression option: "z"
+-- extended regexp_replace tests
+SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 1);
+ regexp_replace
+-----------------------
+ X PostgreSQL function
+(1 row)
+
+SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 1, 2);
+ regexp_replace
+-----------------------
+ A PXstgreSQL function
+(1 row)
+
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 0, 'i');
+ regexp_replace
+-----------------------
+ X PXstgrXSQL fXnctXXn
+(1 row)
+
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 1, 'i');
+ regexp_replace
+-----------------------
+ X PostgreSQL function
+(1 row)
+
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 2, 'i');
+ regexp_replace
+-----------------------
+ A PXstgreSQL function
+(1 row)
+
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 3, 'i');
+ regexp_replace
+-----------------------
+ A PostgrXSQL function
+(1 row)
+
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 9, 'i');
+ regexp_replace
+-----------------------
+ A PostgreSQL function
+(1 row)
+
+SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 7, 0, 'i');
+ regexp_replace
+-----------------------
+ A PostgrXSQL fXnctXXn
+(1 row)
+
+-- 'g' flag should be ignored when N is specified
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 1, 'g');
+ regexp_replace
+-----------------------
+ A PXstgreSQL function
+(1 row)
+
+-- errors
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', -1, 0, 'i');
+ERROR: invalid value for parameter "start": -1
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, -1, 'i');
+ERROR: invalid value for parameter "n": -1
+-- erroneous invocation of non-extended form
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', '1');
+ERROR: invalid regular expression option: "1"
+HINT: If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly.
+-- regexp_count tests
+SELECT regexp_count('123123123123123', '(12)3');
+ regexp_count
+--------------
+ 5
+(1 row)
+
+SELECT regexp_count('123123123123', '123', 1);
+ regexp_count
+--------------
+ 4
+(1 row)
+
+SELECT regexp_count('123123123123', '123', 3);
+ regexp_count
+--------------
+ 3
+(1 row)
+
+SELECT regexp_count('123123123123', '123', 33);
+ regexp_count
+--------------
+ 0
+(1 row)
+
+SELECT regexp_count('ABCABCABCABC', 'Abc', 1, '');
+ regexp_count
+--------------
+ 0
+(1 row)
+
+SELECT regexp_count('ABCABCABCABC', 'Abc', 1, 'i');
+ regexp_count
+--------------
+ 4
+(1 row)
+
+-- errors
+SELECT regexp_count('123123123123', '123', 0);
+ERROR: invalid value for parameter "start": 0
+SELECT regexp_count('123123123123', '123', -3);
+ERROR: invalid value for parameter "start": -3
+-- regexp_like tests
+SELECT regexp_like('Steven', '^Ste(v|ph)en$');
+ regexp_like
+-------------
+ t
+(1 row)
+
+SELECT regexp_like('a'||CHR(10)||'d', 'a.d', 1, 'n');
+ regexp_like
+-------------
+ f
+(1 row)
+
+SELECT regexp_like('a'||CHR(10)||'d', 'a.d', 1, 's');
+ regexp_like
+-------------
+ t
+(1 row)
+
+SELECT regexp_like('abc', ' a . c ', 1, 'x');
+ regexp_like
+-------------
+ t
+(1 row)
+
+SELECT regexp_like('abc', 'a.c', 2);
+ regexp_like
+-------------
+ f
+(1 row)
+
+SELECT regexp_like('abc', 'a.c', 0); -- error
+ERROR: invalid value for parameter "start": 0
+SELECT regexp_like('abc', 'a.c', 1, 'g'); -- error
+ERROR: regexp_like() does not support the "global" option
+-- regexp_instr tests
+SELECT regexp_instr('abcdefghi', 'd.f');
+ regexp_instr
+--------------
+ 4
+(1 row)
+
+SELECT regexp_instr('abcdefghi', 'd.q');
+ regexp_instr
+--------------
+ 0
+(1 row)
+
+SELECT regexp_instr('abcabcabc', 'a.c');
+ regexp_instr
+--------------
+ 1
+(1 row)
+
+SELECT regexp_instr('abcabcabc', 'a.c', 2);
+ regexp_instr
+--------------
+ 4
+(1 row)
+
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 3);
+ regexp_instr
+--------------
+ 7
+(1 row)
+
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 4);
+ regexp_instr
+--------------
+ 0
+(1 row)
+
+SELECT regexp_instr('abcabcabc', 'A.C', 1, 2, 0, 'i');
+ regexp_instr
+--------------
+ 4
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 0);
+ regexp_instr
+--------------
+ 1
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 1);
+ regexp_instr
+--------------
+ 1
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 2);
+ regexp_instr
+--------------
+ 4
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 3);
+ regexp_instr
+--------------
+ 5
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 4);
+ regexp_instr
+--------------
+ 7
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 5);
+ regexp_instr
+--------------
+ 0
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 0);
+ regexp_instr
+--------------
+ 9
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 1);
+ regexp_instr
+--------------
+ 4
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 2);
+ regexp_instr
+--------------
+ 9
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 3);
+ regexp_instr
+--------------
+ 7
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 4);
+ regexp_instr
+--------------
+ 9
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 5);
+ regexp_instr
+--------------
+ 0
+(1 row)
+
+-- Check case where we have a match, but not a subexpression match
+SELECT regexp_instr('foo', 'foo(bar)?', 1, 1, 0, '', 1);
+ regexp_instr
+--------------
+ 0
+(1 row)
+
+-- errors
+SELECT regexp_instr('abcabcabc', 'a.c', 0, 1);
+ERROR: invalid value for parameter "start": 0
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 0);
+ERROR: invalid value for parameter "n": 0
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, -1);
+ERROR: invalid value for parameter "endoption": -1
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 2);
+ERROR: invalid value for parameter "endoption": 2
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 0, 'g');
+ERROR: regexp_instr() does not support the "global" option
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 0, '', -1);
+ERROR: invalid value for parameter "subexpr": -1
+-- regexp_substr tests
+SELECT regexp_substr('abcdefghi', 'd.f');
+ regexp_substr
+---------------
+ def
+(1 row)
+
+SELECT regexp_substr('abcdefghi', 'd.q') IS NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT regexp_substr('abcabcabc', 'a.c');
+ regexp_substr
+---------------
+ abc
+(1 row)
+
+SELECT regexp_substr('abcabcabc', 'a.c', 2);
+ regexp_substr
+---------------
+ abc
+(1 row)
+
+SELECT regexp_substr('abcabcabc', 'a.c', 1, 3);
+ regexp_substr
+---------------
+ abc
+(1 row)
+
+SELECT regexp_substr('abcabcabc', 'a.c', 1, 4) IS NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT regexp_substr('abcabcabc', 'A.C', 1, 2, 'i');
+ regexp_substr
+---------------
+ abc
+(1 row)
+
+SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 0);
+ regexp_substr
+---------------
+ 12345678
+(1 row)
+
+SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 1);
+ regexp_substr
+---------------
+ 123
+(1 row)
+
+SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 2);
+ regexp_substr
+---------------
+ 45678
+(1 row)
+
+SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 3);
+ regexp_substr
+---------------
+ 56
+(1 row)
+
+SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 4);
+ regexp_substr
+---------------
+ 78
+(1 row)
+
+SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 5) IS NULL AS t;
+ t
+---
+ t
+(1 row)
+
+-- Check case where we have a match, but not a subexpression match
+SELECT regexp_substr('foo', 'foo(bar)?', 1, 1, '', 1) IS NULL AS t;
+ t
+---
+ t
+(1 row)
+
+-- errors
+SELECT regexp_substr('abcabcabc', 'a.c', 0, 1);
+ERROR: invalid value for parameter "start": 0
+SELECT regexp_substr('abcabcabc', 'a.c', 1, 0);
+ERROR: invalid value for parameter "n": 0
+SELECT regexp_substr('abcabcabc', 'a.c', 1, 1, 'g');
+ERROR: regexp_substr() does not support the "global" option
+SELECT regexp_substr('abcabcabc', 'a.c', 1, 1, '', -1);
+ERROR: invalid value for parameter "subexpr": -1
-- set so we can tell NULL from empty string
\pset null '\\N'
-- return all matches from regexp
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 2c502534c2..92837fdd14 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -171,6 +171,8 @@ SELECT SUBSTRING('abcdefg' FROM 'c.e') AS "cde";
-- With a parenthesized subexpression, return only what matches the subexpr
SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
+-- Check case where we have a match, but not a subexpression match
+SELECT SUBSTRING('foo' FROM 'foo(bar)?') IS NULL AS t;
-- Check behavior of SIMILAR TO, which uses largely the same regexp variant
SELECT 'abcdefg' SIMILAR TO '_bcd%' AS true;
@@ -193,6 +195,95 @@ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
-- invalid regexp option
SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
+-- extended regexp_replace tests
+SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 1);
+SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 1, 2);
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 0, 'i');
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 1, 'i');
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 2, 'i');
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 3, 'i');
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 9, 'i');
+SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 7, 0, 'i');
+-- 'g' flag should be ignored when N is specified
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 1, 'g');
+-- errors
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', -1, 0, 'i');
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, -1, 'i');
+-- erroneous invocation of non-extended form
+SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', '1');
+
+-- regexp_count tests
+SELECT regexp_count('123123123123123', '(12)3');
+SELECT regexp_count('123123123123', '123', 1);
+SELECT regexp_count('123123123123', '123', 3);
+SELECT regexp_count('123123123123', '123', 33);
+SELECT regexp_count('ABCABCABCABC', 'Abc', 1, '');
+SELECT regexp_count('ABCABCABCABC', 'Abc', 1, 'i');
+-- errors
+SELECT regexp_count('123123123123', '123', 0);
+SELECT regexp_count('123123123123', '123', -3);
+
+-- regexp_like tests
+SELECT regexp_like('Steven', '^Ste(v|ph)en$');
+SELECT regexp_like('a'||CHR(10)||'d', 'a.d', 1, 'n');
+SELECT regexp_like('a'||CHR(10)||'d', 'a.d', 1, 's');
+SELECT regexp_like('abc', ' a . c ', 1, 'x');
+SELECT regexp_like('abc', 'a.c', 2);
+SELECT regexp_like('abc', 'a.c', 0); -- error
+SELECT regexp_like('abc', 'a.c', 1, 'g'); -- error
+
+-- regexp_instr tests
+SELECT regexp_instr('abcdefghi', 'd.f');
+SELECT regexp_instr('abcdefghi', 'd.q');
+SELECT regexp_instr('abcabcabc', 'a.c');
+SELECT regexp_instr('abcabcabc', 'a.c', 2);
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 3);
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 4);
+SELECT regexp_instr('abcabcabc', 'A.C', 1, 2, 0, 'i');
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 0);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 1);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 2);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 3);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 4);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 5);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 0);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 1);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 2);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 3);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 4);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 5);
+-- Check case where we have a match, but not a subexpression match
+SELECT regexp_instr('foo', 'foo(bar)?', 1, 1, 0, '', 1);
+-- errors
+SELECT regexp_instr('abcabcabc', 'a.c', 0, 1);
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 0);
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, -1);
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 2);
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 0, 'g');
+SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 0, '', -1);
+
+-- regexp_substr tests
+SELECT regexp_substr('abcdefghi', 'd.f');
+SELECT regexp_substr('abcdefghi', 'd.q') IS NULL AS t;
+SELECT regexp_substr('abcabcabc', 'a.c');
+SELECT regexp_substr('abcabcabc', 'a.c', 2);
+SELECT regexp_substr('abcabcabc', 'a.c', 1, 3);
+SELECT regexp_substr('abcabcabc', 'a.c', 1, 4) IS NULL AS t;
+SELECT regexp_substr('abcabcabc', 'A.C', 1, 2, 'i');
+SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 0);
+SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 1);
+SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 2);
+SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 3);
+SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 4);
+SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 5) IS NULL AS t;
+-- Check case where we have a match, but not a subexpression match
+SELECT regexp_substr('foo', 'foo(bar)?', 1, 1, '', 1) IS NULL AS t;
+-- errors
+SELECT regexp_substr('abcabcabc', 'a.c', 0, 1);
+SELECT regexp_substr('abcabcabc', 'a.c', 1, 0);
+SELECT regexp_substr('abcabcabc', 'a.c', 1, 1, 'g');
+SELECT regexp_substr('abcabcabc', 'a.c', 1, 1, '', -1);
+
-- set so we can tell NULL from empty string
\pset null '\\N'