From 38edc07c49970ad0ac1818284f2e59e26591b3a4 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Sun, 6 Aug 2023 15:56:39 +0100 Subject: [PATCH v9] Be more picky with WAL segment deletion in pg_rewind Make pg_rewind to be a bit wiser in terms of creating filemap: preserve on the target all WAL segments that contain records between the last common checkpoint and the point of divergence. Co-authored-by: Polina Bungina --- src/bin/pg_rewind/filemap.c | 62 +++++++++++++++++- src/bin/pg_rewind/filemap.h | 3 + src/bin/pg_rewind/meson.build | 1 + src/bin/pg_rewind/parsexlog.c | 24 +++++++ src/bin/pg_rewind/pg_rewind.c | 3 + src/bin/pg_rewind/t/010_keep_recycled_wals.pl | 65 +++++++++++++++++++ 6 files changed, 157 insertions(+), 1 deletion(-) create mode 100644 src/bin/pg_rewind/t/010_keep_recycled_wals.pl diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c index 4458324c9d8..b357c28338a 100644 --- a/src/bin/pg_rewind/filemap.c +++ b/src/bin/pg_rewind/filemap.c @@ -63,6 +63,28 @@ static file_entry_t *lookup_filehash_entry(const char *path); static int final_filemap_cmp(const void *a, const void *b); static bool check_file_excluded(const char *path, bool is_source); +typedef struct skipwal_t +{ + const char *path; + uint32 status; +} skipwal_t; + +#define SH_PREFIX keepwalhash +#define SH_ELEMENT_TYPE skipwal_t +#define SH_KEY_TYPE const char * +#define SH_KEY path +#define SH_HASH_KEY(tb, key) hash_string(key) +#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0) +#define SH_SCOPE static inline +#define SH_RAW_ALLOCATOR pg_malloc0 +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + +static keepwalhash_hash * keepwalhash = NULL; + +static bool keepwalhash_entry_exists(const char *path); + /* * Definition of one element part of an exclusion list, used to exclude * contents when rewinding. "name" is the name of the file or path to @@ -206,6 +228,35 @@ lookup_filehash_entry(const char *path) return filehash_lookup(filehash, path); } +/* Initialize a hash table to store WAL file names that must be kept */ +void +keepwalhash_init(void) +{ + keepwalhash = keepwalhash_create(FILEHASH_INITIAL_SIZE, NULL); +} + +/* Prevent a given file deletion during rewind */ +void +insert_keepwalhash_entry(const char *path) +{ + skipwal_t *entry; + bool found; + + /* Should only be called with keepwalhash initialized */ + Assert(keepwalhash); + + entry = keepwalhash_insert(keepwalhash, path, &found); + + if (!found) + entry->path = pg_strdup(path); +} + +static bool +keepwalhash_entry_exists(const char *path) +{ + return keepwalhash_lookup(keepwalhash, path) != NULL; +} + /* * Callback for processing source file list. * @@ -685,7 +736,16 @@ decide_file_action(file_entry_t *entry) } else if (entry->target_exists && !entry->source_exists) { - /* File exists in target, but not source. Remove it. */ + /* File exists in target, but not source. */ + + if (keepwalhash_entry_exists(path)) + { + /* This is a WAL file that should be kept. */ + pg_log_debug("Not removing %s because it is required for recovery", path); + return FILE_ACTION_NONE; + } + + /* Otherwise remove an unexpected file. */ return FILE_ACTION_REMOVE; } else if (!entry->target_exists && !entry->source_exists) diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h index 007e0f17cf4..0cb6fcae00c 100644 --- a/src/bin/pg_rewind/filemap.h +++ b/src/bin/pg_rewind/filemap.h @@ -110,4 +110,7 @@ extern filemap_t *decide_file_actions(void); extern void calculate_totals(filemap_t *filemap); extern void print_filemap(filemap_t *filemap); +extern void keepwalhash_init(void); +extern void insert_keepwalhash_entry(const char *path); + #endif /* FILEMAP_H */ diff --git a/src/bin/pg_rewind/meson.build b/src/bin/pg_rewind/meson.build index e0f88bde221..200ebf84eb9 100644 --- a/src/bin/pg_rewind/meson.build +++ b/src/bin/pg_rewind/meson.build @@ -43,6 +43,7 @@ tests += { 't/007_standby_source.pl', 't/008_min_recovery_point.pl', 't/009_growing_files.pl', + 't/010_keep_recycled_wals.pl', ], }, } diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c index 22f7351fdcd..7329c06d8fa 100644 --- a/src/bin/pg_rewind/parsexlog.c +++ b/src/bin/pg_rewind/parsexlog.c @@ -176,6 +176,10 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, char *errormsg; XLogPageReadPrivate private; + /* Track WAL segments opened while searching a checkpoint */ + XLogSegNo segno = 0; + TimeLineID tli = 0; + /* * The given fork pointer points to the end of the last common record, * which is not necessarily the beginning of the next record, if the @@ -217,6 +221,26 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, LSN_FORMAT_ARGS(searchptr)); } + /* We are trying to detect if the new WAL file was opened */ + if (xlogreader->seg.ws_tli != tli || xlogreader->seg.ws_segno != segno) + { + char xlogfname[MAXFNAMELEN]; + + tli = xlogreader->seg.ws_tli; + segno = xlogreader->seg.ws_segno; + + snprintf(xlogfname, MAXPGPATH, XLOGDIR "/"); + XLogFileName(xlogfname + strlen(xlogfname), + xlogreader->seg.ws_tli, + xlogreader->seg.ws_segno, WalSegSz); + + /* + * Make sure pg_rewind doesn't remove this file, because it is + * required for postgres to start after rewind. + */ + insert_keepwalhash_entry(xlogfname); + } + /* * Check if it is a checkpoint record. This checkpoint record needs to * be the latest checkpoint before WAL forked and not the checkpoint diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 0841ab4135b..48c11417b23 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -455,6 +455,9 @@ main(int argc, char **argv) exit(0); } + /* Hash to memorize WAL files that should be kept */ + keepwalhash_init(); + findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex, &chkptrec, &chkpttli, &chkptredo, restore_command); pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u", diff --git a/src/bin/pg_rewind/t/010_keep_recycled_wals.pl b/src/bin/pg_rewind/t/010_keep_recycled_wals.pl new file mode 100644 index 00000000000..65caaf2faa2 --- /dev/null +++ b/src/bin/pg_rewind/t/010_keep_recycled_wals.pl @@ -0,0 +1,65 @@ + +# Copyright (c) 2021-2024, PostgreSQL Global Development Group + +# +# Test situation where a target data directory contains +# WAL files that were already recycled by the new primary. +# + +use strict; +use warnings; +use PostgreSQL::Test::Utils; +use Test::More; + +use FindBin; +use lib $FindBin::RealBin; + +use RewindTest; + +RewindTest::setup_cluster(); +$node_primary->enable_archiving(); +RewindTest::start_primary(); + +RewindTest::create_standby(); +$node_standby->enable_restoring($node_primary, 0); +$node_standby->reload(); + +RewindTest::primary_psql("CHECKPOINT"); # last common checkpoint + +# We use "perl -e 'exit(1)'" as an alternative to "false", because the last one +# might not be available on Windows, but we want to run tests cross-platform. +my $false = "$^X -e 'exit(1)'"; +$node_primary->append_conf( + 'postgresql.conf', qq( +archive_command = '$false' +)); +$node_primary->reload(); + +# advance WAL on the primary; WAL segment will never make it to the archive +RewindTest::primary_psql("CREATE TABLE t(a int)"); +RewindTest::primary_psql("INSERT INTO t VALUES(0)"); +RewindTest::primary_psql("SELECT pg_switch_wal()"); + +RewindTest::promote_standby; + +# new primary loses diverging WAL segment +RewindTest::standby_psql("INSERT INTO t values(0)"); +RewindTest::standby_psql("SELECT pg_switch_wal()"); + +$node_standby->stop(); +$node_primary->stop(); + +my ($stdout, $stderr) = run_command( + [ + 'pg_rewind', '--debug', + '--source-pgdata', $node_standby->data_dir, + '--target-pgdata', $node_primary->data_dir, + '--no-sync', + ]); + +like( + $stderr, + qr/Not removing pg_wal.* because it is required for recovery/, + "some WAL files were skipped"); + +done_testing(); -- 2.45.2