From 030f30d330dba3a6c3ff3f9561348375d30a1486 Mon Sep 17 00:00:00 2001 From: Asim R P Date: Fri, 20 Sep 2019 17:31:25 +0530 Subject: [PATCH v13 2/3] Tests to replay create database operation on standby The tests demonstrate that standby fails to replay a create database WAL record during crash recovery, if one or more of underlying directories are missing from the file system. This can happen if a drop tablespace or drop database WAL record has been replayed in archive recovery, before a crash. And then the create database record happens to be replayed again during crash recovery. The failures indicate bugs that need to be fixed. The first test, TEST 4, performs several DDL operations resulting in a database directory being removed, along with a few create database operations. It expects crash recovery to succeed because for each missing directory encountered during create database replay, a matching drop tablespace or drop database WAL record is found later. Second test, TEST 5, validates that a standby rightfully aborts replay during archive recovery, if a missing directory is encountered when replaying create database WAL record. These tests have been proposed and implemented in various ways by Alexandra Wang, Anastasia Lubennikova, Kyotaro Horiguchi, Paul Guo and me. --- src/test/recovery/t/011_crash_recovery.pl | 162 +++++++++++++++++++++- 1 file changed, 161 insertions(+), 1 deletion(-) diff --git a/src/test/recovery/t/011_crash_recovery.pl b/src/test/recovery/t/011_crash_recovery.pl index d7806e6671..a4e1fcb5dc 100644 --- a/src/test/recovery/t/011_crash_recovery.pl +++ b/src/test/recovery/t/011_crash_recovery.pl @@ -9,9 +9,10 @@ use warnings; use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; use Test::More; +use File::Path qw(rmtree); use Config; -plan tests => 3; +plan tests => 5; my $node = PostgreSQL::Test::Cluster->new('primary'); $node->init(allows_streaming => 1); @@ -62,3 +63,162 @@ is($node->safe_psql('postgres', qq[SELECT pg_xact_status('$xid');]), $stdin .= "\\q\n"; $tx->finish; # wait for psql to quit gracefully + +# TEST 4 +# +# Ensure that a missing tablespace directory during crash recovery on +# a standby is handled correctly. The standby should finish crash +# recovery successfully because a matching drop database record is +# found in the WAL. The following scnearios are covered: +# +# 1. Create a database against a user-defined tablespace then drop the +# database. +# +# 2. Create a database against a user-defined tablespace then drop the +# database and the tablespace. +# +# 3. Move a database from source tablespace to target tablespace then +# drop the source tablespace. +# +# 4. Create a database from another database as template then drop the +# template database. +# +# + +my $node_master = PostgreSQL::Test::Cluster->new('master2'); +$node_master->init(allows_streaming => 1); +$node_master->start; + +# Create tablespace +my $dropme_ts_master1 = PostgreSQL::Test::Utils::tempdir(); +$dropme_ts_master1 = PostgreSQL::Test::Utils::perl2host($dropme_ts_master1); +my $dropme_ts_master2 = PostgreSQL::Test::Utils::tempdir(); +$dropme_ts_master2 = PostgreSQL::Test::Utils::perl2host($dropme_ts_master2); +my $source_ts_master = PostgreSQL::Test::Utils::tempdir(); +$source_ts_master = PostgreSQL::Test::Utils::perl2host($source_ts_master); +my $target_ts_master = PostgreSQL::Test::Utils::tempdir(); +$target_ts_master = PostgreSQL::Test::Utils::perl2host($target_ts_master); + +$node_master->safe_psql('postgres', + qq[CREATE TABLESPACE dropme_ts1 location '$dropme_ts_master1'; + CREATE TABLESPACE dropme_ts2 location '$dropme_ts_master2'; + CREATE TABLESPACE source_ts location '$source_ts_master'; + CREATE TABLESPACE target_ts location '$target_ts_master'; + CREATE DATABASE template_db IS_TEMPLATE = true;]); + +my $dropme_ts_standby1 = PostgreSQL::Test::Utils::tempdir(); +$dropme_ts_standby1 = PostgreSQL::Test::Utils::perl2host($dropme_ts_standby1); +my $dropme_ts_standby2 = PostgreSQL::Test::Utils::tempdir(); +$dropme_ts_standby2 = PostgreSQL::Test::Utils::perl2host($dropme_ts_standby2); +my $source_ts_standby = PostgreSQL::Test::Utils::tempdir(); +$source_ts_standby = PostgreSQL::Test::Utils::perl2host($source_ts_standby); +my $target_ts_standby = PostgreSQL::Test::Utils::tempdir(); +$target_ts_standby = PostgreSQL::Test::Utils::perl2host($target_ts_standby); + +# Take backup +my $backup_name = 'my_backup'; +my $ts_mapping = [ "--tablespace-mapping=$dropme_ts_master1=$dropme_ts_standby1", + "--tablespace-mapping=$dropme_ts_master2=$dropme_ts_standby2", + "--tablespace-mapping=$source_ts_master=$source_ts_standby", + "--tablespace-mapping=$target_ts_master=$target_ts_standby" ]; +$node_master->backup($backup_name, backup_options => $ts_mapping); + +my $node_standby = PostgreSQL::Test::Cluster->new('standby2'); +$node_standby->init_from_backup($node_master, $backup_name, has_streaming => 1); +$node_standby->start; + +# Make sure connection is made +$node_master->poll_query_until( + 'postgres', 'SELECT count(*) = 1 FROM pg_stat_replication'); + +# Make sure to perform restartpoint after tablespace creation +$node_master->wait_for_catchup($node_standby, 'replay', + $node_master->lsn('replay')); +$node_standby->safe_psql('postgres', 'CHECKPOINT'); + +# Do immediate shutdown just after a sequence of CREAT DATABASE / DROP +# DATABASE / DROP TABLESPACE. This causes CREATE DATABASE WAL records +# to be applied to already-removed directories. +$node_master->safe_psql('postgres', + q[CREATE DATABASE dropme_db1 WITH TABLESPACE dropme_ts1; + CREATE DATABASE dropme_db2 WITH TABLESPACE dropme_ts2; + CREATE DATABASE moveme_db TABLESPACE source_ts; + ALTER DATABASE moveme_db SET TABLESPACE target_ts; + CREATE DATABASE newdb TEMPLATE template_db; + ALTER DATABASE template_db IS_TEMPLATE = false; + DROP DATABASE dropme_db1; + DROP DATABASE dropme_db2; DROP TABLESPACE dropme_ts2; + DROP TABLESPACE source_ts; + DROP DATABASE template_db;]); +$node_master->wait_for_catchup($node_standby, 'replay', + $node_master->lsn('replay')); +$node_standby->stop('immediate'); + +# Should restart ignoring directory creation error. +is($node_standby->start(fail_ok => 1), 1); + +# TEST 5 +# +# Ensure that a missing tablespace directory during create database +# replay immediately causes panic if the standby has already reached +# consistent state (archive recovery is in progress). + +$node_master = PostgreSQL::Test::Cluster->new('master3'); +$node_master->init(allows_streaming => 1); +$node_master->start; + +# Create tablespace +my $ts_master = PostgreSQL::Test::Utils::tempdir(); +$ts_master = PostgreSQL::Test::Utils::perl2host($ts_master); +$node_master->safe_psql('postgres', "CREATE TABLESPACE ts1 LOCATION '$ts_master'"); +$node_master->safe_psql('postgres', "CREATE DATABASE db1 TABLESPACE ts1"); + +my $ts_standby = PostgreSQL::Test::Utils::tempdir("standby"); +$ts_standby = PostgreSQL::Test::Utils::perl2host($ts_standby); + +# Take backup +$backup_name = 'my_backup'; +$node_master->backup($backup_name, + backup_options => + [ "--tablespace-mapping=$ts_master=$ts_standby" ]); +$node_standby = PostgreSQL::Test::Cluster->new('standby3'); +$node_standby->init_from_backup($node_master, $backup_name, has_streaming => 1); +$node_standby->start; + +# Make sure standby reached consistency and starts accepting connections +$node_standby->poll_query_until('postgres', 'SELECT 1', '1'); + +# Remove standby tablespace directory so it will be missing when +# replay resumes. +# +# The tablespace mapping is lost when the standby node is initialized +# from basebackup because RecursiveCopy::copypath creates a new temp +# directory for each tablspace symlink found in backup. We must +# obtain the correct tablespace directory by querying standby. +$ts_standby = $node_standby->safe_psql( + 'postgres', + "select pg_tablespace_location(oid) from pg_tablespace where spcname = 'ts1'"); +rmtree($ts_standby); + +# Create a database in the tablespace and a table in default tablespace +$node_master->safe_psql('postgres', + q[CREATE TABLE should_not_replay_insertion(a int); + CREATE DATABASE db2 WITH TABLESPACE ts1; + INSERT INTO should_not_replay_insertion VALUES (1);]); + +# Standby should fail and should not silently skip replaying the wal +if ($node_master->poll_query_until( + 'postgres', + 'SELECT count(*) = 0 FROM pg_stat_replication', + 't') == 1) +{ + pass('standby failed as expected'); + # We know that the standby has failed. Setting its pid to + # undefined avoids error when PostgreNode module tries to stop the + # standby node as part of tear_down sequence. + $node_standby->{_pid} = undef; +} +else +{ + fail('standby did not fail within 5 seconds'); +} -- 2.27.0