diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 9b7e967..b2d2ed6 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1519,21 +1519,13 @@ format format(formatstr text - [, str "any" [, ...] ]) + [, formatarg "any" [, ...] ]) text Format arguments according to a format string. - This function is similar to the C function - sprintf, but only the following conversion specifications - are recognized: %s interpolates the corresponding - argument as a string; %I escapes its argument as - an SQL identifier; %L escapes its argument as an - SQL literal; %% outputs a literal %. - A conversion can reference an explicit parameter position by preceding - the conversion specifier with n$, where - n is the argument position. - See also . + This function is similar to the C function sprintf. + See . format('Hello %s, %1$s', 'World') Hello World, World @@ -2847,6 +2839,186 @@ + + <function>format</function> + + + format + + + + The function format produces formatted output according to + a format string in a similar way to the C function sprintf. + + + + +format(formatstr text [, formatarg "any" [, ...] ]) + + formatstr is a format string that specifies how the + result should be formatted. Text in the format string is copied directly + to the result, except where format specifiers are used. + Format specifiers act as placeholders in the string, allowing subsequent + function arguments to be formatted and inserted into the result. + + + + Format specifiers are introduced by a % character and take + the form + +%[parameter][flags][width]type + + + + parameter (optional) + + + An expression of the form n$ where + n is the index of the argument to use for the format + specifier's value. An index of 1 means the first argument after + formatstr. If the parameter field is + omitted, the default is to use the next argument. + + +SELECT format('Testing %s, %s, %s', 'one', 'two', 'three'); +Result: Testing one, two, three + +SELECT format('Testing %3$s, %2$s, %1$s', 'one', 'two', 'three'); +Result: Testing three, two, one + + + + Note that unlike the C function sprintf defined in the + Single UNIX Specification, the format function in + PostgreSQL allows format specifiers with and without + explicit parameter fields to be mixed in the same + format string. A format specifier without a + parameter field always uses the next argument after + the last argument consumed. In addition, the + PostgreSQL format function does not + require all function arguments to be referred to in the format + string. + + +SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); +Result: Testing three, two, three + + + + + + flags (optional) + + + Additional options controlling how the format specifier's output is + formatted. Currently the only supported flag is an minus sign + (-) which will cause the format specifier's output to be + left-aligned. This has no effect unless the width + field is also specified. + + +SELECT format('|%10s|%-10s|', 'foo', 'bar'); +Result: | foo|bar | + + + + + + width (optional) + + + Specifies the minimum number of characters to use to + display the format specifier's output. The width may be specified + using any of the following: a positive integer; an asterisk + (*) to use the next function argument as the width; or an + expression of the form *n$ to use the + nth function argument as the width. + + + + If the width comes from a function argument, that argument is + consumed before the argument that is used for the format + specifier's value. If the width argument is negative, the result is + left aligned, as if the - flag had been specified. + + +SELECT format('|%10s|', 'foo'); +Result: | foo| + +SELECT format('|%*s|', 10, 'foo'); +Result: | foo| + +SELECT format('|%*s|', -10, 'foo'); +Result: |foo | + +SELECT format('|%-*s|', 10, 'foo'); +Result: |foo | + +SELECT format('|%-*s|', -10, 'foo'); +Result: |foo | + +SELECT format('|%*2$s|', 'foo', 10, 'bar'); +Result: | bar| + +SELECT format('|%3$*2$s|', 'foo', 10, 'bar'); +Result: | bar| + + + + + + type (required) + + + The type of format conversion to use to produce the format + specifier's output. The following types are supported: + + + + s formats the argument value as a simple + string. A null value is treated as an empty string. + + + + + I escapes the value as an SQL identifier. It + is an error for the value to be null. + + + + + L escapes the value as an SQL literal. A null + value is displayed as the literal value NULL. + + + + + +SELECT format('Hello %s', 'World'); +Result: Hello World + +SELECT format('DROP TABLE %I', 'Foo bar'); +Result: DROP TABLE "Foo bar" + +SELECT format('SELECT %L', E'O\'Reilly'); +Result: SELECT 'O''Reilly' + + + + The %I and %L format specifiers may be used + to safely construct dynamic SQL statements. See + . + + + + + + + + In addition to the format specifiers above, the special escape sequence + %% may be used to output a literal % character. + + diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index e69b7dd..19b8049 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -78,7 +78,8 @@ static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); static StringInfo makeStringAggState(FunctionCallInfo fcinfo); static void text_format_string_conversion(StringInfo buf, char conversion, FmgrInfo *typOutputInfo, - Datum value, bool isNull); + Datum value, bool isNull, + int flags, int width); static Datum text_to_array_internal(PG_FUNCTION_ARGS); static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v, const char *fldsep, const char *null_string); @@ -3996,6 +3997,135 @@ text_reverse(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(result); } +#define FORWARD_PARSE_POINT(ptr) \ +do { \ + if (++(ptr) >= (end_ptr)) \ + ereport(ERROR, \ + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \ + errmsg("unterminated conversion specifier"))); \ +} while (0) + +/* + * Parse congiguous digits into decimal number. + * + * Returns true if some digits could be parsed and *ptr moved to the next + * character to be parsed. The value is returned into *value. + */ +static bool +text_format_parse_digits(const char **ptr, const char *end_ptr, int *value) +{ + const char *cp = *ptr; + int wval = 0; + bool found; + + /* + * continue, only when start_ptr is less than end_ptr. + * Overrun of cp is checked in FORWARD_PARSE_POINT. + */ + while (*cp >= '0' && *cp <= '9') + { + int newnum = wval * 10 + (*cp - '0'); + + if (newnum / 10 != wval) /* overflow? */ + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("number is out of range"))); + wval = newnum; + FORWARD_PARSE_POINT(cp); + } + + found = (cp > *ptr); + *value = wval; + *ptr = cp; + + return found; +} + +#define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus in format string? */ + +/* + * parse format specification + * [argpos][flags][width]type + * + * Return values are, + * static const char * : Address to be parsed next. + * valarg : argument position for value to be printed. -1 means missing. + * widtharg : argument position for width. Zero means that argument position + * is not specified and -1 means missing. + * flags : flags + * width : the value for direct width specification, zero means that width + * is not specified. + */ +static const char * +text_format_parse_format(const char *start_ptr, const char *end_ptr, + int *valarg, int *widtharg, int *flags, int *width) +{ + const char *cp = start_ptr; + int n; + + /* set defaults to out parameters */ + *valarg = -1; + *widtharg = -1; + *flags = 0; + *width = 0; + + /* try to identify first number */ + if (text_format_parse_digits(&cp, end_ptr, &n)) + { + if (*cp != '$') + { + *width = n; /* The number should be width */ + return cp; + } + /* Explicit 0 for argument index is immediately refused */ + if (n == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("conversion specifies argument 0, but arguments are numbered from 1"))); + *valarg = n; /* The number was argument position */ + FORWARD_PARSE_POINT(cp); + } + + /* Check for flags, only minus is supported now. */ + while (*cp == '-') + { + *flags = *flags | TEXT_FORMAT_FLAG_MINUS; + FORWARD_PARSE_POINT(cp); + } + + /* try to parse indirect width */ + if (*cp == '*') + { + FORWARD_PARSE_POINT(cp); + + if (text_format_parse_digits(&cp, end_ptr, &n)){ + /* number in this position should be closed by $ */ + if (*cp != '$') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unexpected char \"%c\".",*cp))); + FORWARD_PARSE_POINT(cp); + + /* Explicit 0 for argument index is immediately refused */ + if (n == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("conversion specifies argument 0, but arguments are numbered from 1"))); + *widtharg = n; + } + else + *widtharg = 0; /* 0 means argument position is not specified */ + + return cp; + } + + /* last possible number - width */ + if (text_format_parse_digits(&cp, end_ptr, &n)) + *width = n; + + return cp; +} + /* * Returns a formated string */ @@ -4016,6 +4146,8 @@ text_format(PG_FUNCTION_ARGS) Oid element_type = InvalidOid; Oid prev_type = InvalidOid; FmgrInfo typoutputfinfo; + FmgrInfo typoutputinfo_width; + Oid prev_type_width = InvalidOid; /* When format string is null, returns null */ if (PG_ARGISNULL(0)) @@ -4077,7 +4209,7 @@ text_format(PG_FUNCTION_ARGS) } /* Setup for main loop. */ - fmt = PG_GETARG_TEXT_PP(0); + fmt = PG_GETARG_TEXT_PP(arg++); start_ptr = VARDATA_ANY(fmt); end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt); initStringInfo(&str); @@ -4088,6 +4220,10 @@ text_format(PG_FUNCTION_ARGS) Datum value; bool isNull; Oid typid; + int valarg; + int widtharg; + int flags; + int width; /* * If it's not the start of a conversion specifier, just copy it to @@ -4099,11 +4235,7 @@ text_format(PG_FUNCTION_ARGS) continue; } - /* Did we run off the end of the string? */ - if (++cp >= end_ptr) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("unterminated conversion specifier"))); + FORWARD_PARSE_POINT(cp); /* Easy case: %% outputs a single % */ if (*cp == '%') @@ -4112,69 +4244,84 @@ text_format(PG_FUNCTION_ARGS) continue; } - /* - * If the user hasn't specified an argument position, we just advance - * to the next one. If they have, we must parse it. - */ - if (*cp < '0' || *cp > '9') + cp = text_format_parse_format(cp, end_ptr, + &valarg, &widtharg, &flags, &width); + + if (widtharg >= 0) { - ++arg; - if (arg <= 0) /* overflow? */ - { - /* - * Should not happen, as you can't pass billions of arguments - * to a function, but better safe than sorry. - */ + if (widtharg > 0) + /* be consistent, move ordered argument together with + * positional */ + arg = widtharg; + + if (arg >= nargs) ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("argument number is out of range"))); - } - } - else - { - bool unterminated = false; + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("too few arguments for format"))); - /* Parse digit string. */ - arg = 0; - do + if (!funcvariadic) { - int newarg = arg * 10 + (*cp - '0'); + value = PG_GETARG_DATUM(arg); + isNull = PG_ARGISNULL(arg); + typid = get_fn_expr_argtype(fcinfo->flinfo, arg); + } + else + { + value = elements[arg - 1]; + isNull = nulls[arg - 1]; + typid = element_type; + } + if (!OidIsValid(typid)) + elog(ERROR, "could not determine data type of format() input"); - if (newarg / 10 != arg) /* overflow? */ - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("argument number is out of range"))); - arg = newarg; - ++cp; - } while (cp < end_ptr && *cp >= '0' && *cp <= '9'); + arg++; /* - * If we ran off the end, or if there's not a $ next, or if the $ - * is the last character, the conversion specifier is improperly - * terminated. + * we don't need to different between NULL and zero in this moment, + * NULL means ignore this width - same as zero. */ - if (cp == end_ptr || *cp != '$') - unterminated = true; + if (isNull) + width = 0; + else if (typid == INT4OID) + width = DatumGetInt32(value); + else if (typid == INT2OID) + width = DatumGetInt16(value); else { - ++cp; - if (cp == end_ptr) - unterminated = true; - } - if (unterminated) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("unterminated conversion specifier"))); + char *str; - /* There's no argument 0. */ - if (arg == 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("conversion specifies argument 0, but arguments are numbered from 1"))); + /* simple IO cast to int */ + if (typid != prev_type_width) + { + Oid typoutputfunc; + bool typIsVarlena; + + getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena); + fmgr_info(typoutputfunc, &typoutputinfo_width); + prev_type_width = typid; + } + + /* Stringify. */ + str = OutputFunctionCall(&typoutputinfo_width, value); + + /* get int value */ + width = pg_atoi(str, sizeof(int32), '\0'); + pfree(str); + } } - /* Not enough arguments? Deduct 1 to avoid counting format string. */ - if (arg > nargs - 1) + /* We calculate -width later but -INT_MIN is undefined for int. */ + if (width <= INT_MIN) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("number is out of range"))); + + if (valarg >= 0) + /* be consistent, move ordered argument together with + * positional */ + arg = valarg; + + if (arg >= nargs) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("too few arguments for format"))); @@ -4195,6 +4342,8 @@ text_format(PG_FUNCTION_ARGS) if (!OidIsValid(typid)) elog(ERROR, "could not determine data type of format() input"); + arg++; + /* * Get the appropriate typOutput function, reusing previous one if * same type as previous argument. That's particularly useful in the @@ -4221,7 +4370,7 @@ text_format(PG_FUNCTION_ARGS) case 'I': case 'L': text_format_string_conversion(&str, *cp, &typoutputfinfo, - value, isNull); + value, isNull, flags, width); break; default: ereport(ERROR, @@ -4244,23 +4393,65 @@ text_format(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(result); } +/* + * Add spaces on begin or on end when it is necessary + */ +static void +text_format_append_string(StringInfo buf, const char *str, + int flags, int width) +{ + bool align_to_left = false; + int len; + + /* fast path */ + if (width == 0) + { + appendStringInfoString(buf, str); + return; + } + else if (width < 0 || (flags & TEXT_FORMAT_FLAG_MINUS)) + { + align_to_left = true; + if (width < 0) + width = -width; + } + + len = pg_mbstrlen(str); + if (align_to_left) + { + appendStringInfoString(buf, str); + if (len < width) + appendStringInfoSpaces(buf, width - len); + } + else + { + /* align_to_right */ + if (len < width) + appendStringInfoSpaces(buf, width - len); + appendStringInfoString(buf, str); + } +} + /* Format a %s, %I, or %L conversion. */ static void text_format_string_conversion(StringInfo buf, char conversion, FmgrInfo *typOutputInfo, - Datum value, bool isNull) + Datum value, bool isNull, + int flags, int width) { char *str; - /* Handle NULL arguments before trying to stringify the value. */ if (isNull) { - if (conversion == 'L') - appendStringInfoString(buf, "NULL"); + if (conversion == 's') + text_format_append_string(buf, "", flags, width); + else if (conversion == 'L') + text_format_append_string(buf, "NULL", flags, width); else if (conversion == 'I') ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("null values cannot be formatted as an SQL identifier"))); + return; } @@ -4271,18 +4462,18 @@ text_format_string_conversion(StringInfo buf, char conversion, if (conversion == 'I') { /* quote_identifier may or may not allocate a new string. */ - appendStringInfoString(buf, quote_identifier(str)); + text_format_append_string(buf, quote_identifier(str), flags, width); } else if (conversion == 'L') { char *qstr = quote_literal_cstr(str); - appendStringInfoString(buf, qstr); + text_format_append_string(buf, qstr, flags, width); /* quote_literal_cstr() always allocates a new string */ pfree(qstr); } else - appendStringInfoString(buf, str); + text_format_append_string(buf, str, flags, width); /* Cleanup. */ pfree(str); diff --git a/src/test/regress/expected/text.out b/src/test/regress/expected/text.out index b756583..e05a1e5 100644 --- a/src/test/regress/expected/text.out +++ b/src/test/regress/expected/text.out @@ -256,12 +256,20 @@ select format('%1$s %4$s', 1, 2, 3); ERROR: too few arguments for format select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); ERROR: too few arguments for format +select format('%0$s', 'Hello'); +ERROR: conversion specifies argument 0, but arguments are numbered from 1 +select format('%*0$s', 'Hello'); +ERROR: conversion specifies argument 0, but arguments are numbered from 1 select format('%1s', 1); -ERROR: unterminated conversion specifier + format +-------- + 1 +(1 row) + select format('%1$', 1); ERROR: unterminated conversion specifier select format('%1$1', 1); -ERROR: unrecognized conversion specifier "1" +ERROR: unterminated conversion specifier -- check mix of positional and ordered placeholders select format('Hello %s %1$s %s', 'World', 'Hello again'); format @@ -328,3 +336,74 @@ from generate_series(1,200) g(i); 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200 (1 row) +-- left, right align +select format('>>%10s<<', 'Hello') +union all +select format('>>%10s<<', NULL) +union all +select format('>>%10s<<', '') +union all +select format('>>%-10s<<', '') +union all +select format('>>%-10s<<', 'Hello') +union all +select format('>>%-10s<<', NULL) +union all +select format('>>%1$10s<<', 'Hello') +union all +select format('>>%1$-10I<<', 'Hello') +union all +select format('>>%2$*1$L<<', 10, 'Hello') +union all +select format('>>%2$*1$L<<', 10, NULL) +union all +select format('>>%2$*1$L<<', -10, NULL) +union all +select format('>>%*s<<', 10, 'Hello'); + format +---------------- + >> Hello<< + >> << + >> << + >> << + >>Hello << + >> << + >> Hello<< + >>"Hello" << + >> 'Hello'<< + >> NULL<< + >>NULL << + >> Hello<< +(12 rows) + +select format('>>%*1$s<<', 10, 'Hello'); + format +---------------- + >> Hello<< +(1 row) + +select format('>>%-s<<', 'Hello'); + format +----------- + >>Hello<< +(1 row) + +-- NULL is not different to zero here +select format('>>%10L<<', NULL); + format +---------------- + >> NULL<< +(1 row) + +select format('>>%2$*1$L<<', NULL, 'Hello'); + format +------------- + >>'Hello'<< +(1 row) + +select format('>>%2$*1$L<<', 0, 'Hello'); + format +------------- + >>'Hello'<< +(1 row) + diff --git a/src/test/regress/sql/text.sql b/src/test/regress/sql/text.sql index a96e9f7..1c68754 100644 --- a/src/test/regress/sql/text.sql +++ b/src/test/regress/sql/text.sql @@ -78,6 +78,8 @@ select format('%1$s %12$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); -- should fail select format('%1$s %4$s', 1, 2, 3); select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); +select format('%0$s', 'Hello'); +select format('%*0$s', 'Hello'); select format('%1s', 1); select format('%1$', 1); select format('%1$1', 1); @@ -97,3 +99,36 @@ select format('Hello', variadic NULL); -- variadic argument allows simulating more than FUNC_MAX_ARGS parameters select format(string_agg('%s',','), variadic array_agg(i)) from generate_series(1,200) g(i); + +-- left, right align +select format('>>%10s<<', 'Hello') +union all +select format('>>%10s<<', NULL) +union all +select format('>>%10s<<', '') +union all +select format('>>%-10s<<', '') +union all +select format('>>%-10s<<', 'Hello') +union all +select format('>>%-10s<<', NULL) +union all +select format('>>%1$10s<<', 'Hello') +union all +select format('>>%1$-10I<<', 'Hello') +union all +select format('>>%2$*1$L<<', 10, 'Hello') +union all +select format('>>%2$*1$L<<', 10, NULL) +union all +select format('>>%2$*1$L<<', -10, NULL) +union all +select format('>>%*s<<', 10, 'Hello'); + +select format('>>%*1$s<<', 10, 'Hello'); +select format('>>%-s<<', 'Hello'); + +-- NULL is not different to zero here +select format('>>%10L<<', NULL); +select format('>>%2$*1$L<<', NULL, 'Hello'); +select format('>>%2$*1$L<<', 0, 'Hello');