From d0553ec7446f0f352a281c591db228029c0946c4 Mon Sep 17 00:00:00 2001 From: jian he Date: Sat, 16 Nov 2024 15:57:59 +0800 Subject: [PATCH v8 1/1] COPY option on_error set_to_null extent "on_error action", introduce new option: on_error set_to_null. Due to current grammar, we cannot use "on_error null", so I choose on_error set_to_null. any data type conversion errors while the COPY FROM process will set that column value to be NULL. this will only work with COPY FROM and non-binary format. However this will respect the not-null constraint, meaning, if you actually converted error to null, but the column has not-null constraint, not-null constraint violation ERROR will be raised. this also respect not-null constraint on domain, meaning on_error set_to_null may raise ERROR for failed domain_in function call discussion: https://postgr.es/m/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=BP3d1_aSfe_+Q@mail.gmail.com --- doc/src/sgml/ref/copy.sgml | 7 +-- src/backend/commands/copy.c | 12 +++-- src/backend/commands/copyfrom.c | 39 ++++++++++---- src/backend/commands/copyfromparse.c | 40 ++++++++++++++ src/bin/psql/tab-complete.in.c | 2 +- src/include/commands/copy.h | 1 + src/test/regress/expected/copy2.out | 80 +++++++++++++++++++++++++++- src/test/regress/sql/copy2.sql | 79 +++++++++++++++++++++++++++ 8 files changed, 239 insertions(+), 21 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 8394402f09..dcbfa17a3c 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -394,12 +394,13 @@ COPY { table_name [ ( error_action value of - stop means fail the command, while - ignore means discard the input row and continue with the next one. + stop means fail the command, + ignore means discard the input row and continue with the next one, and + set_to_null means replace columns containing erroneous input values with null and move to the next row. The default is stop. - The ignore option is applicable only for COPY FROM + The ignore and set_to_null options are applicable only for COPY FROM when the FORMAT is text or csv. diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 3485ba8663..304022cd86 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -403,12 +403,14 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from) parser_errposition(pstate, def->location))); /* - * Allow "stop", or "ignore" values. + * Allow "stop", "ignore", "set_to_null" values. */ if (pg_strcasecmp(sval, "stop") == 0) return COPY_ON_ERROR_STOP; if (pg_strcasecmp(sval, "ignore") == 0) return COPY_ON_ERROR_IGNORE; + if (pg_strcasecmp(sval, "set_to_null") == 0) + return COPY_ON_ERROR_NULL; ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -904,13 +906,13 @@ ProcessCopyOptions(ParseState *pstate, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("only ON_ERROR STOP is allowed in BINARY mode"))); - if (opts_out->reject_limit && !opts_out->on_error) + if (opts_out->reject_limit && !(opts_out->on_error == COPY_ON_ERROR_NULL || opts_out->on_error == COPY_ON_ERROR_IGNORE)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), /*- translator: first and second %s are the names of COPY option, e.g. - * ON_ERROR, third is the value of the COPY option, e.g. IGNORE */ - errmsg("COPY %s requires %s to be set to %s", - "REJECT_LIMIT", "ON_ERROR", "IGNORE"))); + * ON_ERROR, third is the value of the COPY option, e.g. IGNORE or SET_TO_NULL */ + errmsg("COPY %s requires %s to be set to %s or %s", + "REJECT_LIMIT", "ON_ERROR", "IGNORE", "SET_TO_NULL"))); } /* diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 07cbd5d22b..f00f383baa 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -1003,7 +1003,7 @@ CopyFrom(CopyFromState cstate) if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull)) break; - if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE && + if ((cstate->opts.on_error == COPY_ON_ERROR_IGNORE || cstate->opts.on_error == COPY_ON_ERROR_NULL) && cstate->escontext->error_occurred) { /* @@ -1018,12 +1018,28 @@ CopyFrom(CopyFromState cstate) pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED, cstate->num_errors); - if (cstate->opts.reject_limit > 0 && \ - cstate->num_errors > cstate->opts.reject_limit) - ereport(ERROR, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility", - (long long) cstate->opts.reject_limit))); + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE) + { + if (cstate->opts.reject_limit > 0 && cstate->num_errors > cstate->opts.reject_limit) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility", + (long long) cstate->opts.reject_limit))); + } + else + { + /* Provide different error msg if reject_limit is zero */ + if (cstate->opts.reject_limit == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("failed to replace column containing erroneous data with null"), + errhint("Consider specifying the REJECT LIMIT option to skip erroneous rows."))); + else if (cstate->num_errors > cstate->opts.reject_limit) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("encountered more than REJECT_LIMIT (%lld) rows with data type incompatibility", + (long long) cstate->opts.reject_limit))); + } /* Repeat NextCopyFrom() until no soft error occurs */ continue; @@ -1321,7 +1337,7 @@ CopyFrom(CopyFromState cstate) /* Done, clean up */ error_context_stack = errcallback.previous; - if (cstate->opts.on_error != COPY_ON_ERROR_STOP && + if ((cstate->opts.on_error == COPY_ON_ERROR_IGNORE || cstate->opts.on_error == COPY_ON_ERROR_NULL) && cstate->num_errors > 0 && cstate->opts.log_verbosity >= COPY_LOG_VERBOSITY_DEFAULT) ereport(NOTICE, @@ -1474,10 +1490,11 @@ BeginCopyFrom(ParseState *pstate, cstate->escontext->error_occurred = false; /* - * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other - * options later + * Currently we only support COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_NULL. + * We'll add other options later */ - if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE) + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE || + cstate->opts.on_error == COPY_ON_ERROR_NULL) cstate->escontext->details_wanted = false; } else diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index d1d43b53d8..142fdf3fbb 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -960,6 +960,46 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, { Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP); + /* + * We encountered an error while parsing one of attributes. + */ + if (cstate->opts.on_error == COPY_ON_ERROR_NULL) + { + /* + * Temporary unset error_occurred. for basetype, null value + * is allowed, for domain type, we may have not-null + * constraint. we pass NULL to InputFunctionCallSafe to check + * if this type have not-null constraint or not. If it's + * domain with not-null constraint, then we have to error + * out, that would behave consistent with column level + * not-null constraint + */ + cstate->escontext->error_occurred = false; + + if (!InputFunctionCallSafe(&in_functions[m], + NULL, + typioparams[m], + att->atttypmod, + (Node *) cstate->escontext, + &values[m])) + ereport(ERROR, + (errcode(ERRCODE_NOT_NULL_VIOLATION), + errmsg("domain %s does not allow null values", + format_type_be(typioparams[m])), + errdatatype(typioparams[m]))); + + /* If datatype if okay with NULL, replace + * with null + */ + nulls[m] = true; + values[m] = (Datum) 0; + continue; + } + + /* + * Update copy state counter for number of erroneous rows + * as we are going to return from function. + */ cstate->num_errors++; if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE) diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index fad2277991..c2902ffc33 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -3235,7 +3235,7 @@ match_previous_words(int pattern_id, COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL", "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE", "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT", - "ON_ERROR", "LOG_VERBOSITY"); + "ON_ERROR", "SET_TO_NULL", "LOG_VERBOSITY"); /* Complete COPY FROM|TO filename WITH (FORMAT */ else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT")) diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 4002a7f538..051ca12d10 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice { COPY_ON_ERROR_STOP = 0, /* immediately throw errors, default */ COPY_ON_ERROR_IGNORE, /* ignore errors */ + COPY_ON_ERROR_NULL, /* set error field to null */ } CopyOnErrorChoice; /* diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 64ea33aeae..62cbe0c2b3 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore); ERROR: conflicting or redundant options LINE 1: COPY x from stdin (on_error ignore, on_error ignore); ^ +COPY x from stdin (on_error set_to_null, on_error set_to_null); +ERROR: conflicting or redundant options +LINE 1: COPY x from stdin (on_error set_to_null, on_error set_to_nul... + ^ COPY x from stdin (log_verbosity default, log_verbosity verbose); ERROR: conflicting or redundant options LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb... @@ -92,6 +96,8 @@ COPY x from stdin (format BINARY, null 'x'); ERROR: cannot specify NULL in BINARY mode COPY x from stdin (format BINARY, on_error ignore); ERROR: only ON_ERROR STOP is allowed in BINARY mode +COPY x from stdin (format BINARY, on_error set_to_null); +ERROR: only ON_ERROR STOP is allowed in BINARY mode COPY x from stdin (on_error unsupported); ERROR: COPY ON_ERROR "unsupported" not recognized LINE 1: COPY x from stdin (on_error unsupported); @@ -124,12 +130,16 @@ COPY x to stdout (format BINARY, on_error unsupported); ERROR: COPY ON_ERROR cannot be used with COPY TO LINE 1: COPY x to stdout (format BINARY, on_error unsupported); ^ +COPY x to stdin (on_error set_to_null); +ERROR: COPY ON_ERROR cannot be used with COPY TO +LINE 1: COPY x to stdin (on_error set_to_null); + ^ COPY x from stdin (log_verbosity unsupported); ERROR: COPY LOG_VERBOSITY "unsupported" not recognized LINE 1: COPY x from stdin (log_verbosity unsupported); ^ COPY x from stdin with (reject_limit 1); -ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE +ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE or SET_TO_NULL COPY x from stdin with (on_error ignore, reject_limit 0); ERROR: REJECT_LIMIT (0) must be greater than zero -- too many columns in column list: should fail @@ -769,6 +779,46 @@ CONTEXT: COPY check_ign_err NOTICE: skipping row due to data type incompatibility at line 8 for column "k": "a" CONTEXT: COPY check_ign_err NOTICE: 6 rows were skipped due to data type incompatibility +CREATE DOMAIN d_int_not_null AS INT NOT NULL CHECK(value > 0); +CREATE DOMAIN d_int_positive_maybe_null AS INT CHECK(value > 0); +CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c INT); +--ok +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null); +--ok +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null); +-- check inserted content +TABLE t_on_error_null; + a | b | c +----+----+---- + 11 | | 12 + 1 | 11 | +(2 rows) + +--fail. we do check domain not-null constraint +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null); +ERROR: domain d_int_not_null does not allow null values +CONTEXT: COPY t_on_error_null, line 1, column a: "a" +--fail. first check constraint fails, then we convert column a value to null +-- by on_error set_to_nul but column a domain type not allow null value. +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null); +ERROR: domain d_int_not_null does not allow null values +CONTEXT: COPY t_on_error_null, line 1, column a: "-1" +--ok. Check interaction with REJECT LIMIT +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2); +ERROR: domain d_int_not_null does not allow null values +CONTEXT: COPY t_on_error_null, line 1, column a: "-1" +-- check inserted content +TABLE t_on_error_null; + a | b | c +----+----+---- + 11 | | 12 + 1 | 11 | +(2 rows) + +--fail. Check interaction with REJECT LIMIT +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2); +ERROR: domain d_int_not_null does not allow null values +CONTEXT: COPY t_on_error_null, line 1, column a: "-1" -- tests for on_error option with log_verbosity and null constraint via domain CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL; CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2); @@ -776,6 +826,10 @@ COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose); NOTICE: skipping row due to data type incompatibility at line 2 for column "l": null input CONTEXT: COPY check_ign_err2 NOTICE: 1 row was skipped due to data type incompatibility +-- check null substitution massages. +COPY check_ign_err2 FROM STDIN WITH (on_error set_to_null, reject_limit 2, log_verbosity verbose); +ERROR: domain dcheck_ign_err2 does not allow null values +CONTEXT: COPY check_ign_err2, line 2, column l: null input COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity silent); -- reset context choice \set SHOW_CONTEXT errors @@ -813,6 +867,28 @@ ERROR: skipped more than REJECT_LIMIT (3) rows due to data type incompatibility CONTEXT: COPY check_ign_err, line 5, column n: "" COPY check_ign_err FROM STDIN WITH (on_error ignore, reject_limit 4); NOTICE: 4 rows were skipped due to data type incompatibility +-- tests for on_error set_to_null option +truncate check_ign_err; +COPY check_ign_err FROM STDIN WITH (on_error set_to_null); +\pset null NULL +SELECT * FROM check_ign_err; + n | m | k +------+-----+------ + 1 | {1} | NULL + 2 | {2} | 1 + 3 | {3} | 2 + 4 | {4} | NULL + NULL | {5} | NULL +(5 rows) + +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null); +ERROR: missing data for column "k" +CONTEXT: COPY check_ign_err, line 1, column m: "" +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null); +ERROR: extra data after last expected column +CONTEXT: COPY check_ign_err, line 1: "1,{1},1,1" -- clean up DROP TABLE forcetest; DROP TABLE vistest; @@ -828,6 +904,8 @@ DROP VIEW instead_of_insert_tbl_view; DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); DROP TABLE check_ign_err; +DROP TABLE t_on_error_null; +DROP DOMAIN d_int_not_null; DROP TABLE check_ign_err2; DROP DOMAIN dcheck_ign_err2; DROP TABLE hard_err; diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index 45273557ce..c9884cc169 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -67,12 +67,14 @@ COPY x from stdin (force_null (a), force_null (b)); COPY x from stdin (convert_selectively (a), convert_selectively (b)); COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii'); COPY x from stdin (on_error ignore, on_error ignore); +COPY x from stdin (on_error set_to_null, on_error set_to_null); COPY x from stdin (log_verbosity default, log_verbosity verbose); -- incorrect options COPY x from stdin (format BINARY, delimiter ','); COPY x from stdin (format BINARY, null 'x'); COPY x from stdin (format BINARY, on_error ignore); +COPY x from stdin (format BINARY, on_error set_to_null); COPY x from stdin (on_error unsupported); COPY x from stdin (format TEXT, force_quote(a)); COPY x from stdin (format TEXT, force_quote *); @@ -87,6 +89,7 @@ COPY x from stdin (format TEXT, force_null *); COPY x to stdout (format CSV, force_null(a)); COPY x to stdout (format CSV, force_null *); COPY x to stdout (format BINARY, on_error unsupported); +COPY x to stdin (on_error set_to_null); COPY x from stdin (log_verbosity unsupported); COPY x from stdin with (reject_limit 1); COPY x from stdin with (on_error ignore, reject_limit 0); @@ -534,6 +537,51 @@ a {2} 2 8 {8} 8 \. +CREATE DOMAIN d_int_not_null AS INT NOT NULL CHECK(value > 0); +CREATE DOMAIN d_int_positive_maybe_null AS INT CHECK(value > 0); +CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c INT); + +--ok +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null); +11 a 12 +\. + +--ok +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null); +1 11 d +\. + +-- check inserted content +TABLE t_on_error_null; + +--fail. we do check domain not-null constraint +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null); +a 11 14 +\. + +--fail. first check constraint fails, then we convert column a value to null +-- by on_error set_to_nul but column a domain type not allow null value. +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null); +-1 11 13 +\. + +--ok. Check interaction with REJECT LIMIT +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2); +-1 11 13 +a 11 14 +1 11 14 +\. + +-- check inserted content +TABLE t_on_error_null; + +--fail. Check interaction with REJECT LIMIT +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null, reject_limit 2); +-1 11 13 +a 11 14 +\N 11 14 +\. + -- tests for on_error option with log_verbosity and null constraint via domain CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL; CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2); @@ -541,6 +589,12 @@ COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose); 1 {1} 1 'foo' 2 {2} 2 \N \. +-- check null substitution massages. +COPY check_ign_err2 FROM STDIN WITH (on_error set_to_null, reject_limit 2, log_verbosity verbose); +1 {1} foo 'foo' +2 {2} 2 \N +2 {2} 2 'foooooooooooooooo' +\. COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity silent); 3 {3} 3 'bar' 4 {4} 4 \N @@ -588,6 +642,29 @@ a {7} 7 10 {10} 10 \. +-- tests for on_error set_to_null option +truncate check_ign_err; +COPY check_ign_err FROM STDIN WITH (on_error set_to_null); +1 {1} a +2 {2} 1 +3 {3} 2 +4 {4} b +a {5} c +\. + +\pset null NULL +SELECT * FROM check_ign_err; + +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null); +1, +\. + +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null); +1,{1},1,1 +\. + -- clean up DROP TABLE forcetest; DROP TABLE vistest; @@ -603,6 +680,8 @@ DROP VIEW instead_of_insert_tbl_view; DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); DROP TABLE check_ign_err; +DROP TABLE t_on_error_null; +DROP DOMAIN d_int_not_null; DROP TABLE check_ign_err2; DROP DOMAIN dcheck_ign_err2; DROP TABLE hard_err; -- 2.34.1