From 362d0a20ccdace50055e37f996f5e168f6f86873 Mon Sep 17 00:00:00 2001 From: Shlok Kyal Date: Tue, 5 Nov 2024 11:11:25 +0530 Subject: [PATCH v13] Disallow UPDATE/DELETE on table with generated column as REPLICA IDENTITY UPDATE or DELETE operations on tables with unpublished generated columns set as REPLICA IDENTITY are not permitted. This patch ensures that if an UPDATE or DELETE command is executed on such tables, an error will be thrown. With this patch, the behavior has changed for the test added in commit adedf54. Additionally, there is no other way to trigger the bug that was fixed by commit adedf54, so the test has been removed. --- src/backend/commands/publicationcmds.c | 159 +++++++++++++--------- src/backend/executor/execReplication.c | 32 ++++- src/backend/utils/cache/relcache.c | 57 ++++++-- src/include/catalog/pg_publication.h | 7 + src/include/commands/publicationcmds.h | 7 +- src/test/regress/expected/publication.out | 24 ++++ src/test/regress/sql/publication.sql | 25 ++++ src/test/subscription/t/100_bugs.pl | 16 +-- 8 files changed, 232 insertions(+), 95 deletions(-) diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c index 031c84ec29..d2b4c8e9a6 100644 --- a/src/backend/commands/publicationcmds.c +++ b/src/backend/commands/publicationcmds.c @@ -336,21 +336,36 @@ pub_rf_contains_invalid_column(Oid pubid, Relation relation, List *ancestors, } /* - * Check if all columns referenced in the REPLICA IDENTITY are covered by - * the column list. + * Check for invalid columns in the publication table definition. * - * Returns true if any replica identity column is not covered by column list. + * This function evaluates two conditions: + * + * 1. Ensures that all columns referenced in the REPLICA IDENTITY are covered + * by the column list. If any column is missing, *invalid_column_list is set + * to true. + * + * 2. Ensures that the REPLICA IDENTITY does not contain unpublished generated + * columns. If an unpublished generated column is found, + * *unpublished_gen_col is set to true. + * + * Returns true if any of the above conditions are not met. */ bool -pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestors, - bool pubviaroot) +pub_contains_invalid_column(Oid pubid, Relation relation, List *ancestors, + bool pubviaroot, bool pubgencols, + bool *invalid_column_list, + bool *unpublished_gen_col) { - HeapTuple tuple; Oid relid = RelationGetRelid(relation); Oid publish_as_relid = RelationGetRelid(relation); - bool result = false; - Datum datum; - bool isnull; + Bitmapset *idattrs; + Bitmapset *columns = NULL; + TupleDesc desc = RelationGetDescr(relation); + Publication *pub; + int x; + + *invalid_column_list = false; + *unpublished_gen_col = false; /* * For a partition, if pubviaroot is true, find the topmost ancestor that @@ -368,80 +383,96 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor publish_as_relid = relid; } - tuple = SearchSysCache2(PUBLICATIONRELMAP, - ObjectIdGetDatum(publish_as_relid), - ObjectIdGetDatum(pubid)); + /* Fetch the column list */ + pub = GetPublication(pubid); + check_and_fetch_column_list(pub, publish_as_relid, NULL, &columns); - if (!HeapTupleIsValid(tuple)) - return false; + if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL) + { + /* With REPLICA IDENTITY FULL, no column list is allowed. */ + *invalid_column_list = (columns != NULL); - datum = SysCacheGetAttr(PUBLICATIONRELMAP, tuple, - Anum_pg_publication_rel_prattrs, - &isnull); + /* + * REPLICA IDENTITY can be FULL only if there is no column list for + * publication. When REPLICA IDENTITY is FULL and the relation + * includes a generated column, but the publish_generated_columns + * option is set to false, this scenario is invalid. + */ + if (!pubgencols && relation->rd_att->constr && + relation->rd_att->constr->has_generated_stored) + *unpublished_gen_col = true; - if (!isnull) - { - int x; - Bitmapset *idattrs; - Bitmapset *columns = NULL; + if (*unpublished_gen_col && *invalid_column_list) + return true; + } - /* With REPLICA IDENTITY FULL, no column list is allowed. */ - if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL) - result = true; + /* Remember columns that are part of the REPLICA IDENTITY */ + idattrs = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_IDENTITY_KEY); - /* Transform the column list datum to a bitmapset. */ - columns = pub_collist_to_bitmapset(NULL, datum, NULL); + /* + * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are offset + * (to handle system columns the usual way), while column list does not + * use offset, so we can't do bms_is_subset(). Instead, we have to loop + * over the idattrs and check all of them are in the list. + */ + x = -1; + while ((x = bms_next_member(idattrs, x)) >= 0) + { + AttrNumber attnum = (x + FirstLowInvalidHeapAttributeNumber); + Form_pg_attribute att = TupleDescAttr(desc, attnum - 1); - /* Remember columns that are part of the REPLICA IDENTITY */ - idattrs = RelationGetIndexAttrBitmap(relation, - INDEX_ATTR_BITMAP_IDENTITY_KEY); + Assert(!att->attisdropped); - /* - * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are - * offset (to handle system columns the usual way), while column list - * does not use offset, so we can't do bms_is_subset(). Instead, we - * have to loop over the idattrs and check all of them are in the - * list. - */ - x = -1; - while ((x = bms_next_member(idattrs, x)) >= 0) + if (columns == NULL) { - AttrNumber attnum = (x + FirstLowInvalidHeapAttributeNumber); - /* - * If pubviaroot is true, we are validating the column list of the - * parent table, but the bitmap contains the replica identity - * information of the child table. The parent/child attnums may - * not match, so translate them to the parent - get the attname - * from the child, and look it up in the parent. + * Check if pubgencols is false and generated column is part of + * REPLICA IDENTITY */ - if (pubviaroot) + if (!pubgencols) { - /* attribute name in the child table */ - char *colname = get_attname(relid, attnum, false); + *unpublished_gen_col |= att->attgenerated; - /* - * Determine the attnum for the attribute name in parent (we - * are using the column list defined on the parent). - */ - attnum = get_attnum(publish_as_relid, colname); + /* Break the loop if unpublished generated columns exist. */ + if (*unpublished_gen_col) + break; } - /* replica identity column, not covered by the column list */ - if (!bms_is_member(attnum, columns)) - { - result = true; - break; - } + /* Skip validating the column list since it is not defined */ + continue; + } + + /* + * If pubviaroot is true, we are validating the column list of the + * parent table, but the bitmap contains the replica identity + * information of the child table. The parent/child attnums may not + * match, so translate them to the parent - get the attname from the + * child, and look it up in the parent. + */ + if (pubviaroot) + { + /* attribute name in the child table */ + char *colname = get_attname(relid, attnum, false); + + /* + * Determine the attnum for the attribute name in parent (we are + * using the column list defined on the parent). + */ + attnum = get_attnum(publish_as_relid, colname); } - bms_free(idattrs); - bms_free(columns); + /* replica identity column, not covered by the column list */ + *invalid_column_list |= !bms_is_member(attnum, columns); + + if (*invalid_column_list && *unpublished_gen_col) + break; } - ReleaseSysCache(tuple); + bms_free(columns); + bms_free(idattrs); - return result; + return *invalid_column_list || *unpublished_gen_col; } /* check_functions_in_node callback */ diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 54025c9f15..227a8aeea0 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -785,10 +785,22 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd) return; /* - * It is only safe to execute UPDATE/DELETE when all columns, referenced - * in the row filters from publications which the relation is in, are - * valid - i.e. when all referenced columns are part of REPLICA IDENTITY - * or the table does not publish UPDATEs or DELETEs. + * It is only safe to execute UPDATE/DELETE when: + * + * 1. All columns, referenced in the row filters from publications which + * the relation is in, are valid - i.e. when all referenced columns are + * part of REPLICA IDENTITY or the table does not publish UPDATEs or + * DELETEs. + * + * 2. All columns, referenced in the column lists from publications which + * the relation is in, are valid - i.e. when all referenced columns are + * part of REPLICA IDENTITY or the table does not publish UPDATEs or + * DELETEs. + * + * 3. All generated columns in REPLICA IDENTITY of the relation, for all + * the publications which the relation is in, are valid - i.e. when + * unpublished generated columns are not part of REPLICA IDENTITY or the + * table does not publish UPDATEs or DELETEs. * * XXX We could optimize it by first checking whether any of the * publications have a row filter for this relation. If not and relation @@ -809,6 +821,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd) errmsg("cannot update table \"%s\"", RelationGetRelationName(rel)), errdetail("Column list used by the publication does not cover the replica identity."))); + else if (cmd == CMD_UPDATE && !pubdesc.gencols_valid_for_update) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("cannot update table \"%s\"", + RelationGetRelationName(rel)), + errdetail("Replica identity consists of an unpublished generated column."))); else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete) ereport(ERROR, (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), @@ -821,6 +839,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd) errmsg("cannot delete from table \"%s\"", RelationGetRelationName(rel)), errdetail("Column list used by the publication does not cover the replica identity."))); + else if (cmd == CMD_DELETE && !pubdesc.gencols_valid_for_delete) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("cannot delete from table \"%s\"", + RelationGetRelationName(rel)), + errdetail("Replica identity consists of an unpublished generated column."))); /* If relation has replica identity we are always good. */ if (OidIsValid(RelationGetReplicaIndex(rel))) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index d0892cee24..9205ea86c4 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -5706,12 +5706,19 @@ RelationGetExclusionInfo(Relation indexRelation, * Get the publication information for the given relation. * * Traverse all the publications which the relation is in to get the - * publication actions and validate the row filter expressions for such - * publications if any. We consider the row filter expression as invalid if it - * references any column which is not part of REPLICA IDENTITY. + * publication actions and validate: + * 1. The row filter expressions for such publications if any. We consider the + * row filter expression as invalid if it references any column which is not + * part of REPLICA IDENTITY. + * 2. The column list for such publication if any. We consider column list as + * invalid if it references any column which is not part of REPLICA IDENTITY. + * 3. The generated columns of the relation for such publications. We consider + * any reference of an unpublished generated column in REPLICA IDENTITY as + * invalid. * * To avoid fetching the publication information repeatedly, we cache the - * publication actions and row filter validation information. + * publication actions, row filter validation information, column list + * validation information and generated column validation information. */ void RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc) @@ -5734,6 +5741,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc) pubdesc->rf_valid_for_delete = true; pubdesc->cols_valid_for_update = true; pubdesc->cols_valid_for_delete = true; + pubdesc->gencols_valid_for_update = true; + pubdesc->gencols_valid_for_delete = true; return; } @@ -5748,6 +5757,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc) pubdesc->rf_valid_for_delete = true; pubdesc->cols_valid_for_update = true; pubdesc->cols_valid_for_delete = true; + pubdesc->gencols_valid_for_update = true; + pubdesc->gencols_valid_for_delete = true; /* Fetch the publication membership info. */ puboids = GetRelationPublications(relid); @@ -5777,6 +5788,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc) Oid pubid = lfirst_oid(lc); HeapTuple tup; Form_pg_publication pubform; + bool invalid_column_list; + bool unpublished_gen_col; tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid)); @@ -5811,18 +5824,27 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc) /* * Check if all columns are part of the REPLICA IDENTITY index or not. * - * If the publication is FOR ALL TABLES then it means the table has no - * column list and we can skip the validation. + * Check if all generated columns included in the REPLICA IDENTITY are + * published. */ - if (!pubform->puballtables && - (pubform->pubupdate || pubform->pubdelete) && - pub_collist_contains_invalid_column(pubid, relation, ancestors, - pubform->pubviaroot)) + if ((pubform->pubupdate || pubform->pubdelete) && + pub_contains_invalid_column(pubid, relation, ancestors, + pubform->pubviaroot, + pubform->pubgencols, + &invalid_column_list, + &unpublished_gen_col)) { if (pubform->pubupdate) - pubdesc->cols_valid_for_update = false; + { + pubdesc->cols_valid_for_update = !invalid_column_list; + pubdesc->gencols_valid_for_update = !unpublished_gen_col; + } + if (pubform->pubdelete) - pubdesc->cols_valid_for_delete = false; + { + pubdesc->cols_valid_for_delete = !invalid_column_list; + pubdesc->gencols_valid_for_delete = !unpublished_gen_col; + } } ReleaseSysCache(tup); @@ -5846,6 +5868,17 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc) pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate && !pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete) break; + + /* + * If we know everything is replicated and replica identity has an + * unpublished generated column, there is no point to check for other + * publications. + */ + if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate && + pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate && + !pubdesc->gencols_valid_for_update && + !pubdesc->gencols_valid_for_delete) + break; } if (relation->rd_pubdesc) diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h index 9a83a72d6b..e167b34461 100644 --- a/src/include/catalog/pg_publication.h +++ b/src/include/catalog/pg_publication.h @@ -98,6 +98,13 @@ typedef struct PublicationDesc */ bool cols_valid_for_update; bool cols_valid_for_delete; + + /* + * true if all generated columns which are part of replica identity are + * published or the publication actions do not include UPDATE or DELETE. + */ + bool gencols_valid_for_update; + bool gencols_valid_for_delete; } PublicationDesc; typedef struct Publication diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h index 5487c571f6..c200c19c6d 100644 --- a/src/include/commands/publicationcmds.h +++ b/src/include/commands/publicationcmds.h @@ -33,7 +33,10 @@ extern void AlterPublicationOwner_oid(Oid subid, Oid newOwnerId); extern void InvalidatePublicationRels(List *relids); extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation, List *ancestors, bool pubviaroot); -extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation, - List *ancestors, bool pubviaroot); +extern bool pub_contains_invalid_column(Oid pubid, Relation relation, + List *ancestors, bool pubviaroot, + bool pubgencols, + bool *invalid_column_list, + bool *unpublished_gen_col); #endif /* PUBLICATIONCMDS_H */ diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out index 5de2d64d01..12d0611a7c 100644 --- a/src/test/regress/expected/publication.out +++ b/src/test/regress/expected/publication.out @@ -672,6 +672,30 @@ DROP TABLE rf_tbl_abcd_pk; DROP TABLE rf_tbl_abcd_nopk; DROP TABLE rf_tbl_abcd_part_pk; -- ====================================================== +-- ====================================================== +-- test with generated column +SET client_min_messages = 'ERROR'; +CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL); +CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b); +ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx; +-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY +CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol; +UPDATE testpub_gencol SET a = 100 WHERE a = 1; +ERROR: cannot update table "testpub_gencol" +DETAIL: Replica identity consists of an unpublished generated column. +-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL +ALTER TABLE testpub_gencol REPLICA IDENTITY FULL; +UPDATE testpub_gencol SET a = 100 WHERE a = 1; +ERROR: cannot update table "testpub_gencol" +DETAIL: Replica identity consists of an unpublished generated column. +DROP PUBLICATION pub_gencol; +-- ok - generated column "b" is published and is part of REPLICA IDENTITY +CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true); +UPDATE testpub_gencol SET a = 100 WHERE a = 1; +DROP PUBLICATION pub_gencol; +DROP TABLE testpub_gencol; +RESET client_min_messages; +-- ====================================================== -- fail - duplicate tables are not allowed if that table has any column lists SET client_min_messages = 'ERROR'; CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert'); diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql index 48e68bcca2..a29587b45d 100644 --- a/src/test/regress/sql/publication.sql +++ b/src/test/regress/sql/publication.sql @@ -396,6 +396,31 @@ DROP TABLE rf_tbl_abcd_nopk; DROP TABLE rf_tbl_abcd_part_pk; -- ====================================================== +-- ====================================================== +-- test with generated column +SET client_min_messages = 'ERROR'; +CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL); +CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b); +ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx; + +-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY +CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol; +UPDATE testpub_gencol SET a = 100 WHERE a = 1; + +-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL +ALTER TABLE testpub_gencol REPLICA IDENTITY FULL; +UPDATE testpub_gencol SET a = 100 WHERE a = 1; +DROP PUBLICATION pub_gencol; + +-- ok - generated column "b" is published and is part of REPLICA IDENTITY +CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true); +UPDATE testpub_gencol SET a = 100 WHERE a = 1; +DROP PUBLICATION pub_gencol; + +DROP TABLE testpub_gencol; +RESET client_min_messages; +-- ====================================================== + -- fail - duplicate tables are not allowed if that table has any column lists SET client_min_messages = 'ERROR'; CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert'); diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl index cb36ca7b16..794b928f50 100644 --- a/src/test/subscription/t/100_bugs.pl +++ b/src/test/subscription/t/100_bugs.pl @@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch"); $node_publisher->stop('fast'); $node_subscriber->stop('fast'); -# The bug was that when the REPLICA IDENTITY FULL is used with dropped or -# generated columns, we fail to apply updates and deletes +# The bug was that when the REPLICA IDENTITY FULL is used with dropped +# we fail to apply updates and deletes $node_publisher->rotate_logfile(); $node_publisher->start(); @@ -389,18 +389,14 @@ $node_publisher->safe_psql( 'postgres', qq( CREATE TABLE dropped_cols (a int, b_drop int, c int); ALTER TABLE dropped_cols REPLICA IDENTITY FULL; - CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int); - ALTER TABLE generated_cols REPLICA IDENTITY FULL; - CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols; + CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols; -- some initial data INSERT INTO dropped_cols VALUES (1, 1, 1); - INSERT INTO generated_cols (a, c) VALUES (1, 1); )); $node_subscriber->safe_psql( 'postgres', qq( CREATE TABLE dropped_cols (a int, b_drop int, c int); - CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int); )); $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; @@ -421,7 +417,6 @@ $node_subscriber->safe_psql( $node_publisher->safe_psql( 'postgres', qq( UPDATE dropped_cols SET a = 100; - UPDATE generated_cols SET a = 100; )); $node_publisher->wait_for_catchup('sub_dropped_cols'); @@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql( qq(1), 'replication with RI FULL and dropped columns'); -is( $node_subscriber->safe_psql( - 'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"), - qq(1), - 'replication with RI FULL and generated columns'); - $node_publisher->stop('fast'); $node_subscriber->stop('fast'); -- 2.34.1