From 147f616565bc652950bcd928a870cd3aa3dfb805 Mon Sep 17 00:00:00 2001 From: Jehan-Guillaume de Rorthais Date: Wed, 1 Apr 2020 16:36:06 +0200 Subject: [PATCH 1/2] Fix WAL retention during production crash recovery During crash recovery of a production cluster with archive_mode=on, XLogArchiveCheckDone() was considering the cluster as inRecovery without archive_mode=always. Because of this non-arcived WAL and related .ready files were recycled or removed. --- src/backend/access/transam/xlog.c | 45 +++++++++++++++++++----- src/backend/access/transam/xlogarchive.c | 21 +++++++---- src/include/access/xlog.h | 23 ++++++++++++ 3 files changed, 73 insertions(+), 16 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index abf954ba39..a7a5ce6ef4 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -220,8 +220,8 @@ static TimeLineID receiveTLI = 0; static bool lastFullPageWrites; /* - * Local copy of SharedRecoveryInProgress variable. True actually means "not - * known, need to check the shared state". + * This is false when SharedRecoveryState is not ARCHIVE_RECOVERING. + * True actually means "not known, need to check the shared state". */ static bool LocalRecoveryInProgress = true; @@ -652,10 +652,10 @@ typedef struct XLogCtlData TimeLineID PrevTimeLineID; /* - * SharedRecoveryInProgress indicates if we're still in crash or archive + * SharedRecoveryState indicates if we're either in crash or archive * recovery. Protected by info_lck. */ - bool SharedRecoveryInProgress; + RecoveryState SharedRecoveryState; /* * SharedHotStandbyActive indicates if we allow hot standby queries to be @@ -4407,6 +4407,11 @@ ReadRecord(XLogReaderState *xlogreader, int emode, updateMinRecoveryPoint = true; UpdateControlFile(); + + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->SharedRecoveryState = ARCHIVE_RECOVERING; + SpinLockRelease(&XLogCtl->info_lck); + LWLockRelease(ControlFileLock); CheckRecoveryConsistency(); @@ -5139,11 +5144,20 @@ XLOGShmemInit(void) * in additional info.) */ XLogCtl->XLogCacheBlck = XLOGbuffers - 1; - XLogCtl->SharedRecoveryInProgress = true; XLogCtl->SharedHotStandbyActive = false; XLogCtl->SharedPromoteIsTriggered = false; XLogCtl->WalWriterSleeping = false; + switch (ControlFile->state) + { + case DB_SHUTDOWNED: + case DB_SHUTDOWNED_IN_RECOVERY: + XLogCtl->SharedRecoveryState = ARCHIVE_RECOVERING; + break; + default: + XLogCtl->SharedRecoveryState = CRASH_RECOVERING; + } + SpinLockInit(&XLogCtl->Insert.insertpos_lck); SpinLockInit(&XLogCtl->info_lck); SpinLockInit(&XLogCtl->ulsn_lck); @@ -7884,7 +7898,7 @@ StartupXLOG(void) ControlFile->time = (pg_time_t) time(NULL); SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->SharedRecoveryInProgress = false; + XLogCtl->SharedRecoveryState = NOT_IN_RECOVERY; SpinLockRelease(&XLogCtl->info_lck); UpdateControlFile(); @@ -8030,7 +8044,7 @@ RecoveryInProgress(void) */ volatile XLogCtlData *xlogctl = XLogCtl; - LocalRecoveryInProgress = xlogctl->SharedRecoveryInProgress; + LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != NOT_IN_RECOVERY); /* * Initialize TimeLineID and RedoRecPtr when we discover that recovery @@ -8042,8 +8056,8 @@ RecoveryInProgress(void) { /* * If we just exited recovery, make sure we read TimeLineID and - * RedoRecPtr after SharedRecoveryInProgress (for machines with - * weak memory ordering). + * RedoRecPtr after SharedRecoveryState (for machines with weak + * memory ordering). */ pg_memory_barrier(); InitXLOGAccess(); @@ -8059,6 +8073,19 @@ RecoveryInProgress(void) } } +/* + * Returns current recovery state from shared memory. + * + * See details about RecoveryState in xlog.h + */ +RecoveryState +GetRecoveryState(void) +{ + volatile XLogCtlData *xlogctl = XLogCtl; + + return xlogctl->SharedRecoveryState; +} + /* * Is HotStandby active yet? This is only important in special backends * since normal backends won't ever be able to connect until this returns diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c index d62c12310a..b207827651 100644 --- a/src/backend/access/transam/xlogarchive.c +++ b/src/backend/access/transam/xlogarchive.c @@ -572,18 +572,25 @@ XLogArchiveCheckDone(const char *xlog) { char archiveStatusPath[MAXPGPATH]; struct stat stat_buf; - bool inRecovery = RecoveryInProgress(); + RecoveryState recoveryState = GetRecoveryState(); + + /* The file is always deletable if archive_mode is "off". */ + if ( ! XLogArchivingActive() ) + return true; /* - * The file is always deletable if archive_mode is "off". On standbys - * archiving is disabled if archive_mode is "on", and enabled with - * "always". On a primary, archiving is enabled if archive_mode is "on" - * or "always". + * On standbys, the file is deletable if archive_mode is not + * "always". */ - if (!((XLogArchivingActive() && !inRecovery) || - (XLogArchivingAlways() && inRecovery))) + if ( recoveryState == ARCHIVE_RECOVERING && !XLogArchivingAlways() ) return true; + /* + * We are either: + * * a primary with archive_mode set to "on" or "always" + * * a standby with archive_mode set to "always". + */ + /* First check for .done --- this means archiver is done with it */ StatusFilePath(archiveStatusPath, xlog, ".done"); if (stat(archiveStatusPath, &stat_buf) == 0) diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 7412caa5f2..601ae34dd8 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -165,6 +165,28 @@ typedef enum WalLevel WAL_LEVEL_LOGICAL } WalLevel; +/* + * Recovery state. + * + * There's two different reasons to recover WAL: when standby mode is requested + * or after a crash to catchup with consistency. + * + * CRASH_RECOVERING is set when recovering after a crash. + * ARCHIVE_RECOVERING is set when archive recovery or standby is requested. + * NOT_IN_RECOVERY means the cluster is in production. + * + * Note that even if archive recovery or standby is requested, recovery state + * starts first with CRASH_RECOVERING before switching to ARCHIVE_RECOVERING if: + * - the control file shows evidence the cluster crashed based on its previous state + * - there's no backup label file + */ +typedef enum RecoveryState +{ + NOT_IN_RECOVERY = 0, /* currently in production */ + CRASH_RECOVERING, /* recovering from a crash */ + ARCHIVE_RECOVERING /* recovering archives as requested */ +} RecoveryState; + extern PGDLLIMPORT int wal_level; /* Is WAL archiving enabled (always or only while server is running normally)? */ @@ -279,6 +301,7 @@ extern const char *xlog_identify(uint8 info); extern void issue_xlog_fsync(int fd, XLogSegNo segno); extern bool RecoveryInProgress(void); +extern RecoveryState GetRecoveryState(void); extern bool HotStandbyActive(void); extern bool HotStandbyActiveInReplay(void); extern bool XLogInsertAllowed(void); -- 2.20.1