From 32e2c106aee30202f5731a163a6e2f1a88a6d06b Mon Sep 17 00:00:00 2001 From: Asim R P Date: Fri, 20 Sep 2019 17:34:19 +0530 Subject: [PATCH v8 2/3] Tests to replay create database operation on standby The tests demonstrate that standby fails to replay a create database WAL record during crash recovery, if one or more of underlying directories are missing from the file system. This can happen if a drop tablespace or drop database WAL record has been replayed in archive recovery, before a crash. And then the create database record happens to be replayed again during crash recovery. The failures indicate bugs that need to be fixed. The first test, TEST 4, performs several DDL operations resulting in a database directory being removed, along with a few create database operations. It expects crash recovery to succeed because for each missing directory encountered during create database replay, a matching drop tablespace or drop database WAL record is found later. Second test, TEST 5, validates that a standby rightfully aborts replay during archive recovery, if a missing directory is encountered when replaying create database WAL record. These tests have been proposed and implemented in various ways by Alexandra, Anastasia, Kyotaro, Paul and me. --- src/test/perl/PostgresNode.pm | 34 ++++- src/test/recovery/t/011_crash_recovery.pl | 152 +++++++++++++++++++++- 2 files changed, 178 insertions(+), 8 deletions(-) diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index 3cae483ddb..e6e7ea505d 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -546,13 +546,22 @@ target server since it isn't done by default. sub backup { - my ($self, $backup_name) = @_; + my ($self, $backup_name, %params) = @_; my $backup_path = $self->backup_dir . '/' . $backup_name; my $name = $self->name; + my @rest = (); + + if (defined $params{tablespace_mappings}) + { + my @ts_mappings = split(/,/, $params{tablespace_mappings}); + foreach my $elem (@ts_mappings) { + push(@rest, '--tablespace-mapping='.$elem); + } + } print "# Taking pg_basebackup $backup_name from node \"$name\"\n"; TestLib::system_or_bail('pg_basebackup', '-D', $backup_path, '-h', - $self->host, '-p', $self->port, '--no-sync'); + $self->host, '-p', $self->port, '--no-sync', @rest); print "# Backup finished\n"; return; } @@ -1640,13 +1649,24 @@ Returns 1 if successful, 0 if timed out. sub poll_query_until { - my ($self, $dbname, $query, $expected) = @_; + my ($self, $dbname, $query, $params) = @_; + my $expected; - $expected = 't' unless defined($expected); # default value + # Be backwards-compatible + if (defined $params and ref $params eq '') + { + $params = { + expected => $params, + timeout => 180 + }; + } + + $params->{expected} = 't' unless defined($params->{expected}); + $params->{timeout} = 180 unless defined($params->{timeout}); my $cmd = [ 'psql', '-XAt', '-c', $query, '-d', $self->connstr($dbname) ]; my ($stdout, $stderr); - my $max_attempts = 180 * 10; + my $max_attempts = $params->{timeout} * 10; my $attempts = 0; while ($attempts < $max_attempts) @@ -1656,7 +1676,7 @@ sub poll_query_until chomp($stdout); $stdout =~ s/\r//g if $TestLib::windows_os; - if ($stdout eq $expected) + if ($stdout eq $params->{expected}) { return 1; } @@ -1674,7 +1694,7 @@ sub poll_query_until diag qq(poll_query_until timed out executing this query: $query expecting this output: -$expected +$params->{expected} last actual query output: $stdout with stderr: diff --git a/src/test/recovery/t/011_crash_recovery.pl b/src/test/recovery/t/011_crash_recovery.pl index 526a3481fb..013d3d5b0c 100644 --- a/src/test/recovery/t/011_crash_recovery.pl +++ b/src/test/recovery/t/011_crash_recovery.pl @@ -6,6 +6,7 @@ use warnings; use PostgresNode; use TestLib; use Test::More; +use File::Path qw(rmtree); use Config; if ($Config{osname} eq 'MSWin32') { @@ -15,7 +16,7 @@ if ($Config{osname} eq 'MSWin32') } else { - plan tests => 3; + plan tests => 5; } my $node = get_new_node('master'); @@ -66,3 +67,152 @@ is($node->safe_psql('postgres', qq[SELECT txid_status('$xid');]), 'aborted', 'xid is aborted after crash'); $tx->kill_kill; + +# TEST 4 +# +# Ensure that a missing tablespace directory during crash recovery on +# a standby is hangled correctly. The standby should finish crash +# recovery successfully because a matching drop database record is +# found in the WAL. The following scnearios are covered: +# +# 1. Create a database against a user-defined tablespace then drop the +# tablespace. +# +# 2. Move a database from source tablespace to target tablespace then +# drop the source tablespace. +# +# 3. Create a datbase from another database as template then drop the +# template database. + +my $node_master = get_new_node('master2'); +$node_master->init(allows_streaming => 1); +$node_master->start; + +# Create tablespace +my $dropme_ts_master = TestLib::tempdir; +$dropme_ts_master = TestLib::perl2host($dropme_ts_master); +my $source_ts_master = TestLib::tempdir; +$source_ts_master = TestLib::perl2host($source_ts_master); +my $target_ts_master = TestLib::tempdir; +$target_ts_master = TestLib::perl2host($target_ts_master); + +$node_master->safe_psql('postgres', + qq[CREATE TABLESPACE dropme_ts location '$dropme_ts_master'; + CREATE TABLESPACE source_ts location '$source_ts_master'; + CREATE TABLESPACE target_ts location '$target_ts_master'; + CREATE DATABASE template_db IS_TEMPLATE = true;]); + +my $dropme_ts_standby = TestLib::tempdir; +$dropme_ts_standby = TestLib::perl2host($dropme_ts_standby); +my $source_ts_standby = TestLib::tempdir; +$source_ts_standby = TestLib::perl2host($source_ts_standby); +my $target_ts_standby = TestLib::tempdir; +$target_ts_standby = TestLib::perl2host($target_ts_standby); + +# Take backup +my $backup_name = 'my_backup'; +my $ts_mapping = "$dropme_ts_master=$dropme_ts_standby," . + "$source_ts_master=$source_ts_standby," . + "$target_ts_master=$target_ts_standby"; +$node_master->backup($backup_name, tablespace_mappings => $ts_mapping); + +my $node_standby = get_new_node('standby2'); +$node_standby->init_from_backup($node_master, $backup_name, has_streaming => 1); +$node_standby->start; + +# Make sure connection is made +$node_master->poll_query_until( + 'postgres', 'SELECT count(*) = 1 FROM pg_stat_replication'); + +# Make sure to perform restartpoint after tablespace creation +$node_master->wait_for_catchup($node_standby, 'replay', + $node_master->lsn('replay')); +$node_standby->safe_psql('postgres', 'CHECKPOINT'); + +# Do immediate shutdown just after a sequence of CREAT DATABASE / DROP +# DATABASE / DROP TABLESPACE. This causes CREATE DATBASE WAL records +# to be applied to already-removed directories. +$node_master->safe_psql('postgres', + q[CREATE DATABASE dropme_db1 WITH TABLESPACE dropme_ts; + CREATE DATABASE dropme_db2 WITH TABLESPACE dropme_ts; + CREATE DATABASE moveme_db TABLESPACE source_ts; + ALTER DATABASE moveme_db SET TABLESPACE target_ts; + DROP DATABASE dropme_db1; + CREATE DATABASE newdb TEMPLATE template_db; + ALTER DATABASE template_db IS_TEMPLATE = false; + DROP TABLESPACE source_ts; + DROP DATABASE dropme_db2; + DROP TABLESPACE dropme_ts; + DROP DATABASE template_db;]); +$node_master->wait_for_catchup($node_standby, 'replay', + $node_master->lsn('replay')); +$node_standby->stop('immediate'); + +# Should restart ignoring directory creation error. +is($node_standby->start(fail_ok => 1), 1); + +# TEST 5 +# +# Ensure that a missing tablespace directory during create database +# replay immediately causes panic if the standby has already reached +# consistent state (archive recovery is in progress). + +$node_master = get_new_node('master3'); +$node_master->init(allows_streaming => 1); +$node_master->start; + +# Create tablespace +my $ts_master = TestLib::tempdir; +$ts_master = TestLib::perl2host($ts_master); +$node_master->safe_psql('postgres', "CREATE TABLESPACE ts1 LOCATION '$ts_master'"); +$node_master->safe_psql('postgres', "CREATE DATABASE db1 TABLESPACE ts1"); + +my $ts_standby = TestLib::tempdir("standby"); +$ts_standby = TestLib::perl2host($ts_standby); + +# Take backup +$backup_name = 'my_backup'; +$node_master->backup($backup_name, + tablespace_mappings => + "$ts_master=$ts_standby"); +$node_standby = get_new_node('standby3'); +$node_standby->init_from_backup($node_master, $backup_name, has_streaming => 1); +$node_standby->start; + +# Make sure standby reached consistency and starts accepting connections +$node_standby->poll_query_until('postgres', 'SELECT 1', '1'); + +# Remove standby tablespace directory so it will be missing when +# replay resumes. +# +# The tablespace mapping is lost when the standby node is initialized +# from basebackup because RecursiveCopy::copypath creates a new temp +# directory for each tablspace symlink found in backup. We must +# obtain the correct tablespace directory by querying standby. +$ts_standby = $node_standby->safe_psql( + 'postgres', + "select pg_tablespace_location(oid) from pg_tablespace where spcname = 'ts1'"); +rmtree($ts_standby); + +# Create a database in the tablespace and a table in default tablespace +$node_master->safe_psql('postgres', + q[CREATE TABLE should_not_replay_insertion(a int); + CREATE DATABASE db2 WITH TABLESPACE ts1; + INSERT INTO should_not_replay_insertion VALUES (1);]); + +# Standby should fail and should not silently skip replaying the wal +if ($node_master->poll_query_until( + 'postgres', + 'SELECT count(*) = 0 FROM pg_stat_replication', + timeout => 5) == 1) +{ + pass('standby failed as expected'); + # We know that the standby has failed. Setting its pid to + # undefined avoids error when PostgreNode module tries to stop the + # standby node as part of tear_down sequence. + $node_standby->{_pid} = undef; +} +else +{ + fail('standby did not fail within 5 seconds'); +} -- 2.20.1