diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 3dbf6b9..4ac3d69 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -213,6 +213,10 @@ typedef struct MultiXactStateData MultiXactId multiStopLimit; MultiXactId multiWrapLimit; + /* support for members anti-wraparound measures */ + MultiXactOffset offsetWarnLimit; + MultiXactOffset offsetStopLimit; + /* * Per-backend data starts here. We have two arrays stored in the area * immediately following the MultiXactStateData struct. Each is indexed by @@ -341,6 +345,8 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2); static void ExtendMultiXactOffset(MultiXactId multi); static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers); +static MultiXactOffset read_offset_for_multi(MultiXactId multi); +static void DetermineSafeOldestOffset(MultiXactId oldestMXact); static void WriteMZeroPageXlogRec(int pageno, uint8 info); @@ -902,6 +908,41 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, } /* + * Check if we could add "distance" to "start", without moving past + * "boundary", using modulo arithmetic. + */ +static bool +MultiXactOffsetWouldWrap(MultiXactOffset boundary, + MultiXactOffset start, + int distance) +{ + MultiXactOffset finish = start + distance; + Assert(distance >= 0); + if (start < boundary) + /*------------------------------------------------------------ + * + * <----S----B----> + * + * [---) = F wrapped past B (and UINT_MAX) + * [---) = F OK + * [----] = F wrapped past B + *------------------------------------------------------------ + */ + return finish >= boundary || finish < start; + else + /*------------------------------------------------------------ + * + * <----B----S----> + * + * [---) = F OK + * [---) = F wrapped past B (and UINT_MAX) + * [----] = F OK + *------------------------------------------------------------ + */ + return finish >= boundary && finish < start; +} + +/* * GetNewMultiXactId * Get the next MultiXactId. * @@ -967,7 +1008,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) /* * To avoid swamping the postmaster with signals, we issue the autovac - * request only once per 64K transaction starts. This still gives + * request only once per 64K multis generated. This still gives * plenty of chances before we get into real trouble. */ if (IsUnderPostmaster && (result % 65536) == 0) @@ -1043,6 +1084,36 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) else *offset = nextOffset; + /* + * Protect against overrun of the members space as well, with the following + * rules: + * + * - if we're past offsetWarnLimit, emit a warning. + * - if we're past offsetStopLimit, refuse to generate more multis. + * + * Note we haven't updated the shared state yet, so if we fail at this + * point, the multixact ID we grabbed can still be used by the next guy. + * + * Note that thre is no point in forcing autovacuum runs here; the + * multixact freeze settings would have to be reduced for that to have any + * effect, and we can't do that from here. + */ + if (MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset, nmembers)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("database is not accepting commands that generate new MultiXactIds to avoid \"members\" wraparound data loss in database with OID %u", + MultiXactState->oldestMultiXactDB), + errhint("Execute a database-wide VACUUM in that database, with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings."))); + else if (MultiXactOffsetWouldWrap(MultiXactState->offsetWarnLimit, nextOffset, nmembers)) + ereport(WARNING, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used", + "database with OID %u must be vacuumed before %d more multixact members are used", + MultiXactState->offsetStopLimit - nextOffset + nmembers, + MultiXactState->oldestMultiXactDB, + MultiXactState->offsetStopLimit - nextOffset + nmembers), + errhint("Execute a database-wide VACUUM in that database, with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings."))); + ExtendMultiXactMember(nextOffset, nmembers); /* @@ -1899,6 +1970,12 @@ StartupMultiXact(void) */ pageno = MXOffsetToMemberPage(offset); MultiXactMemberCtl->shared->latest_page_number = pageno; + + /* + * compute the oldest member we need to keep around to avoid old member + * data overrun. + */ + DetermineSafeOldestOffset(MultiXactState->oldestMultiXactId); } /* @@ -2099,7 +2176,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid) * * Note: This differs from the magic number used in * SetTransactionIdLimit() since vacuum itself will never generate new - * multis. + * multis. XXX actually it does, if it needs to freeze old multis. */ multiStopLimit = multiWrapLimit - 100; if (multiStopLimit < FirstMultiXactId) @@ -2142,6 +2219,8 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid) curMulti = MultiXactState->nextMXact; LWLockRelease(MultiXactGenLock); + DetermineSafeOldestOffset(oldest_datminmxid); + /* Log the info */ ereport(DEBUG1, (errmsg("MultiXactId wrap limit is %u, limited by database with OID %u", @@ -2228,13 +2307,16 @@ MultiXactAdvanceNextMXact(MultiXactId minMulti, /* * Update our oldestMultiXactId value, but only if it's more recent than - * what we had. + * what we had. However, even if not, always update the oldest multixact + * offset limit. */ void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB) { if (MultiXactIdPrecedes(MultiXactState->oldestMultiXactId, oldestMulti)) SetMultiXactIdLimit(oldestMulti, oldestMultiDB); + else + DetermineSafeOldestOffset(oldestMulti); } /* @@ -2402,6 +2484,93 @@ GetOldestMultiXactId(void) } /* + * Read the offset of the first member of the given multixact. + */ +static MultiXactOffset +read_offset_for_multi(MultiXactId multi) +{ + MultiXactOffset offset; + int pageno; + int entryno; + int slotno; + MultiXactOffset *offptr; + + pageno = MultiXactIdToOffsetPage(multi); + entryno = MultiXactIdToOffsetEntry(multi); + + /* lock is acquired by SimpleLruReadPage_ReadOnly */ + slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi); + offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno]; + offptr += entryno; + offset = *offptr; + LWLockRelease(MultiXactOffsetControlLock); + + return offset; +} + +/* + * Based on the given oldest MultiXactId, determine what's the oldest member + * offset and install the limit info in MultiXactState, where it can be used to + * prevent overrun of old data in the members SLRU area. + */ +static void +DetermineSafeOldestOffset(MultiXactId oldestMXact) +{ + MultiXactOffset oldestOffset; + + /* + * Can't do this while initdb'ing or in the startup process while replaying + * WAL: the segment file to read might have not yet been created, or + * already been removed. + */ + if (IsBootstrapProcessingMode() || InRecovery) + return; + + /* + * We determine the safe upper bound for offsets of new xacts by reading + * the offset of the oldest multixact, and going back one segment. This + * way, the sequence of multixact member segments will always have a + * one-segment hole at a minimum. We start spewing warnings 19 segments + * before that. + */ + oldestOffset = read_offset_for_multi(oldestMXact); + /* move back to start of this segment */ + oldestOffset -= oldestOffset / MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT; + + LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); + /* always leave one segment before the wraparound point */ + MultiXactState->offsetStopLimit = oldestOffset - + (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT); + /* start complaining 20 segments before the wraparound point */ + MultiXactState->offsetWarnLimit = oldestOffset - + (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * 20); + LWLockRelease(MultiXactGenLock); +} + +/* + * Returns an instantaneous snapshot of the current number of members in the + * members SLRU area. + */ +MultiXactOffset +ReadMultiXactMemberCount(void) +{ + MultiXactOffset nextOffset; + MultiXactOffset oldestOffset; + MultiXactId oldestMultiXactId; + + LWLockAcquire(MultiXactGenLock, LW_SHARED); + nextOffset = MultiXactState->nextOffset; + oldestMultiXactId = MultiXactState->oldestMultiXactId; + LWLockRelease(MultiXactGenLock); + /* + * TODO: In future, could oldestMultiXactMemberOffset be stored in shmem, + * pg_controdata, alongside oldestMultiXactId? + */ + oldestOffset = read_offset_for_multi(oldestMultiXactId); + return nextOffset - oldestOffset; +} + +/* * SlruScanDirectory callback. * This callback deletes segments that are outside the range determined by * the given page numbers. @@ -2533,26 +2702,7 @@ TruncateMultiXact(void) * First, compute the safe truncation point for MultiXactMember. This is * the starting offset of the oldest multixact. */ - { - int pageno; - int slotno; - int entryno; - MultiXactOffset *offptr; - - /* lock is acquired by SimpleLruReadPage_ReadOnly */ - - pageno = MultiXactIdToOffsetPage(oldestMXact); - entryno = MultiXactIdToOffsetEntry(oldestMXact); - - slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, - oldestMXact); - offptr = (MultiXactOffset *) - MultiXactOffsetCtl->shared->page_buffer[slotno]; - offptr += entryno; - oldestOffset = *offptr; - - LWLockRelease(MultiXactOffsetControlLock); - } + oldestOffset = read_offset_for_multi(oldestMXact); /* * To truncate MultiXactMembers, we need to figure out the active page diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index ad49964..8e18f85 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -397,6 +397,7 @@ AuxiliaryProcessMain(int argc, char *argv[]) proc_exit(1); /* should never return */ case BootstrapProcess: + SetProcessingMode(BootstrapProcessing); bootstrap_signals(); BootStrapXLOG(); BootstrapModeMain(); @@ -459,8 +460,7 @@ BootstrapModeMain(void) int i; Assert(!IsUnderPostmaster); - - SetProcessingMode(BootstrapProcessing); + Assert(IsBootstrapProcessingMode()); /* * Do backend-like initialization for bootstrap mode diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 7ead161..3502a7f 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -601,7 +601,7 @@ vacuum_set_xid_limits(Relation rel, if (freezetable < 0) freezetable = vacuum_multixact_freeze_table_age; freezetable = Min(freezetable, - autovacuum_multixact_freeze_max_age * 0.95); + autovacuum_multixact_freeze_max_age_adjusted() * 0.95); Assert(freezetable >= 0); /* diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index be4cd1d..071d492 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -1052,6 +1052,34 @@ db_comparator(const void *a, const void *b) } /* + * Returns vacuum_multixact_freeze_max_age, adjusted down to prevent excessive use + * of addressable multixact member space if required. + * + * The goal is to avoid the situation where new multixacts can't be created + * because the offsets used to address pg_multixact/members would wrap around. + * + * If less than "safe fraction" of the total available member space is used, + * then we make no adjustment. If more than the "excessive fraction" of the + * total available member space is used, then we use a freeze max age of zero + * to trigger aggressive vacuuming. In between, we scale the given freeze max + * age down linearly, so that vacuuming becomes more aggressive as the member + * SLRU grows. + */ +int +autovacuum_multixact_freeze_max_age_adjusted() +{ + // TODO: where to get these constants? + const double safe_fraction = 0.25; + const double excessive_fraction = 0.75; + double used_fraction = ReadMultiXactMemberCount() / (double) MaxMultiXactOffset; + // TODO: change this to consider the offset SLRU size as well, as described by Robert? + double scale_factor = + (excessive_fraction - used_fraction) / (excessive_fraction - safe_fraction); + scale_factor = Min(Max(scale_factor, 0.0), 1.0); /* Clamp value in range 0...1. */ + return autovacuum_multixact_freeze_max_age * scale_factor; +} + +/* * do_start_worker * * Bare-bones procedure for starting an autovacuum worker from the launcher. @@ -1118,7 +1146,7 @@ do_start_worker(void) /* Also determine the oldest datminmxid we will consider. */ recentMulti = ReadNextMultiXactId(); - multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age; + multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age_adjusted(); if (multiForceLimit < FirstMultiXactId) multiForceLimit -= FirstMultiXactId; @@ -1931,7 +1959,9 @@ do_autovacuum(void) { default_freeze_min_age = vacuum_freeze_min_age; default_freeze_table_age = vacuum_freeze_table_age; - default_multixact_freeze_min_age = vacuum_multixact_freeze_min_age; + default_multixact_freeze_min_age = + Min(vacuum_multixact_freeze_min_age, + autovacuum_multixact_freeze_max_age_adjusted()); default_multixact_freeze_table_age = vacuum_multixact_freeze_table_age; } @@ -2684,8 +2714,8 @@ relation_needs_vacanalyze(Oid relid, : autovacuum_freeze_max_age; multixact_freeze_max_age = (relopts && relopts->multixact_freeze_max_age >= 0) - ? Min(relopts->multixact_freeze_max_age, autovacuum_multixact_freeze_max_age) - : autovacuum_multixact_freeze_max_age; + ? Min(relopts->multixact_freeze_max_age, autovacuum_multixact_freeze_max_age_adjusted()) + : autovacuum_multixact_freeze_max_age_adjusted(); av_enabled = (relopts ? relopts->enabled : true); diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 640b198..c4afbc2 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -126,6 +126,7 @@ extern void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset); extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); extern void MultiXactSetSafeTruncate(MultiXactId safeTruncateMulti); +extern MultiXactOffset ReadMultiXactMemberCount(void); extern void multixact_twophase_recover(TransactionId xid, uint16 info, void *recdata, uint32 len); diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index 6eaaf4c..71931ef 100644 --- a/src/include/postmaster/autovacuum.h +++ b/src/include/postmaster/autovacuum.h @@ -53,6 +53,8 @@ extern void AutoVacWorkerFailed(void); /* autovacuum cost-delay balancer */ extern void AutoVacuumUpdateDelay(void); +extern int autovacuum_multixact_freeze_max_age_adjusted(void); + #ifdef EXEC_BACKEND extern void AutoVacLauncherMain(int argc, char *argv[]) pg_attribute_noreturn(); extern void AutoVacWorkerMain(int argc, char *argv[]) pg_attribute_noreturn();