From 9b4e41c91312d8a79a03c929224cd4dfe99e7802 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Mon, 06 Aug 2023 15:56:39 +0100 Subject: [PATCH] Be more picky with WAL segment deletion in pg_rewind Make pg_rewind to be a bit wiser in terms of creating filemap: preserve on the target all WAL segments that contain records between the last common checkpoint and the point of divergence. Co-authored-by: Polina Bungina --- src/bin/pg_rewind/filemap.c | 62 +++++++++++++++++- src/bin/pg_rewind/filemap.h | 3 + src/bin/pg_rewind/parsexlog.c | 22 +++++++ src/bin/pg_rewind/pg_rewind.c | 3 + src/bin/pg_rewind/t/010_keep_recycled_wals.pl | 65 +++++++++++++++++++ 5 files changed, 153 insertions(+), 2 deletions(-) create mode 100644 src/bin/pg_rewind/t/010_keep_recycled_wals.pl diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c index ecadd69dc5..ebaebfb149 100644 --- a/src/bin/pg_rewind/filemap.c +++ b/src/bin/pg_rewind/filemap.c @@ -64,6 +64,27 @@ static file_entry_t *lookup_filehash_entry(const char *path); static int final_filemap_cmp(const void *a, const void *b); static bool check_file_excluded(const char *path, bool is_source); +typedef struct skipwal_t { + const char *path; + uint32 status; +} skipwal_t; + +#define SH_PREFIX keepwalhash +#define SH_ELEMENT_TYPE skipwal_t +#define SH_KEY_TYPE const char * +#define SH_KEY path +#define SH_HASH_KEY(tb, key) hash_string_pointer(key) +#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0) +#define SH_SCOPE static inline +#define SH_RAW_ALLOCATOR pg_malloc0 +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + +static keepwalhash_hash *keepwalhash = NULL; + +static bool keepwalhash_entry_exists(const char *path); + /* * Definition of one element part of an exclusion list, used to exclude * contents when rewinding. "name" is the name of the file or path to @@ -207,6 +228,35 @@ lookup_filehash_entry(const char *path) return filehash_lookup(filehash, path); } +/* Initialize a hash table to store WAL file names that must be kept */ +void +keepwalhash_init(void) +{ + keepwalhash = keepwalhash_create(FILEHASH_INITIAL_SIZE, NULL); +} + +/* Prevent a given file deletion during rewind */ +void +insert_keepwalhash_entry(const char *path) +{ + skipwal_t *entry; + bool found; + + /* Should only be called with keepwalhash initialized */ + Assert(keepwalhash); + + entry = keepwalhash_insert(keepwalhash, path, &found); + + if (!found) + entry->path = pg_strdup(path); +} + +static bool +keepwalhash_entry_exists(const char *path) +{ + return keepwalhash_lookup(keepwalhash, path) != NULL; +} + /* * Callback for processing source file list. * @@ -682,7 +732,16 @@ decide_file_action(file_entry_t *entry) } else if (entry->target_exists && !entry->source_exists) { - /* File exists in target, but not source. Remove it. */ + /* File exists in target, but not source. */ + + if (keepwalhash_entry_exists(path)) + { + /* This is a WAL file that should be kept. */ + pg_log_debug("Not removing %s because it is required for recovery", path); + return FILE_ACTION_NONE; + } + + /* Otherwise remove an unexpected file. */ return FILE_ACTION_REMOVE; } else if (!entry->target_exists && !entry->source_exists) @@ -818,7 +877,6 @@ decide_file_actions(void) return filemap; } - /* * Helper function for filemap hash table. */ diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h index 988d4590e0..611b8cf49e 100644 --- a/src/bin/pg_rewind/filemap.h +++ b/src/bin/pg_rewind/filemap.h @@ -110,4 +110,7 @@ extern filemap_t *decide_file_actions(void); extern void calculate_totals(filemap_t *filemap); extern void print_filemap(filemap_t *filemap); +extern void keepwalhash_init(void); +extern void insert_keepwalhash_entry(const char *path); + #endif /* FILEMAP_H */ diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c index 0233ece88b..b89b65077d 100644 --- a/src/bin/pg_rewind/parsexlog.c +++ b/src/bin/pg_rewind/parsexlog.c @@ -176,6 +176,10 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, char *errormsg; XLogPageReadPrivate private; + /* Track WAL segments opened while searching a checkpoint */ + XLogSegNo segno = 0; + TimeLineID tli = 0; + /* * The given fork pointer points to the end of the last common record, * which is not necessarily the beginning of the next record, if the @@ -217,6 +221,24 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, LSN_FORMAT_ARGS(searchptr)); } + /* We are trying to detect if the new WAL file was opened */ + if (xlogreader->seg.ws_tli != tli || xlogreader->seg.ws_segno != segno) + { + char xlogfname[MAXFNAMELEN]; + + tli = xlogreader->seg.ws_tli; + segno = xlogreader->seg.ws_segno; + + snprintf(xlogfname, MAXPGPATH, XLOGDIR "/"); + XLogFileName(xlogfname + strlen(xlogfname), + xlogreader->seg.ws_tli, + xlogreader->seg.ws_segno, WalSegSz); + + /* Make sure pg_rewind doesn't remove this file, because it is + * required for postgres to start after rewind. */ + insert_keepwalhash_entry(xlogfname); + } + /* * Check if it is a checkpoint record. This checkpoint record needs to * be the latest checkpoint before WAL forked and not the checkpoint diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index bfd44a284e..bfbc65c111 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -455,6 +455,9 @@ main(int argc, char **argv) exit(0); } + /* Hash to memorize WAL files that should be kept */ + keepwalhash_init(); + findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex, &chkptrec, &chkpttli, &chkptredo, restore_command); pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u", diff --git a/src/bin/pg_rewind/t/010_keep_recycled_wals.pl b/src/bin/pg_rewind/t/010_keep_recycled_wals.pl new file mode 100644 index 0000000000..fcc3eec48a --- /dev/null +++ b/src/bin/pg_rewind/t/010_keep_recycled_wals.pl @@ -0,0 +1,65 @@ + +# Copyright (c) 2021-2023, PostgreSQL Global Development Group + +# +# Test situation where a target data directory contains +# WAL files that were already recycled by the new primary. +# + +use strict; +use warnings; +use PostgreSQL::Test::Utils; +use Test::More; + +use FindBin; +use lib $FindBin::RealBin; + +use RewindTest; + +RewindTest::setup_cluster(); +$node_primary->enable_archiving(); +RewindTest::start_primary(); + +RewindTest::create_standby(); +$node_standby->enable_restoring($node_primary, 0); +$node_standby->reload(); + +RewindTest::primary_psql("CHECKPOINT"); # last common checkpoint + +# We use "perl -e 'exit(1)'" as an alternative to "false", because the last one +# might not be available on Windows, but we want to run tests cross-platform. +my $false = "$^X -e 'exit(1)'"; +$node_primary->append_conf( + 'postgresql.conf', qq( +archive_command = '$false' +)); +$node_primary->reload(); + +# advance WAL on the primary; WAL segment will never make it to the archive +RewindTest::primary_psql("CREATE TABLE t(a int)"); +RewindTest::primary_psql("INSERT INTO t VALUES(0)"); +RewindTest::primary_psql("SELECT pg_switch_wal()"); + +RewindTest::promote_standby; + +# new primary loses diverging WAL segment +RewindTest::standby_psql("INSERT INTO t values(0)"); +RewindTest::standby_psql("SELECT pg_switch_wal()"); + +$node_standby->stop(); +$node_primary->stop(); + +my ($stdout, $stderr) = run_command( + [ + 'pg_rewind', '--debug', + '--source-pgdata', $node_standby->data_dir, + '--target-pgdata', $node_primary->data_dir, + '--no-sync', + ]); + +like( + $stderr, + qr/Not removing pg_wal.* because it is required for recovery/, + "some WAL files were skipped"); + +done_testing(); -- 2.34.1