diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 5532dc6..478e447 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -2597,6 +2597,97 @@ find_multixact_start(MultiXactId multi) } /* + * Returns an instantaneous snapshot of the current number of active + * multixacts and the number of members in the members SLRU area. + */ +static void +ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members) +{ + MultiXactOffset nextOffset; + MultiXactOffset oldestOffset; + MultiXactId oldestMultiXactId; + MultiXactId nextMultiXactId; + + LWLockAcquire(MultiXactGenLock, LW_SHARED); + nextOffset = MultiXactState->nextOffset; + oldestMultiXactId = MultiXactState->oldestMultiXactId; + nextMultiXactId = MultiXactState->nextMXact; + LWLockRelease(MultiXactGenLock); + /* + * XXX: Could we store oldestMultiXactMemberOffset in shmem and + * pg_controdata, alongside oldestMultiXactId? + */ + oldestOffset = find_multixact_start(oldestMultiXactId); + *members = nextOffset - oldestOffset; + *multixacts = nextMultiXactId - oldestMultiXactId; +} + +/* + * Computes a multixact age that we can use to trigger earlier wraparound + * vacuums than usual, if special action is required to avoid impending + * exhaustion of the address space of multixact members (caused by large + * multixacts). + * + * If less than a "safe member count" is active, then we return -1 to indicate + * that no special action needs to be taken. This should always be the case + * for users who don't make use of large multixacts. + * + * If more than the "dangerous member count" is active, then we return a max + * freeze age of zero to trigger aggressive wraparound vacuuming. + * + * In between the safe and dangerous levels, we return the current number of + * active multixids scaled down linearly for higher usage fractions, so that + * vacuuming becomes more aggressive as the member SLRU grows, in the hope + * that different tables will be vacuumed at different times due to their + * varying relminmxid values. + * + * Based on the assumption that there is no reasonable way for an end user to + * configure the thresholds for this, we define the safe member count to be + * half of the member address space, and the dangerous level to be + * three-quarters. These numbers are lowered for manual invocations of + * vacuum, to give scheduled vacuum commands a head start, so they have a + * chance to tackle member space usage problems before autovacuum needs to be + * invoked. + */ +int +MultiXactCheckMemberUsage(void) +{ + MultiXactOffset members; + uint32 multixacts; + double fraction; + const MultiXactOffset safe_member_count = MaxMultiXactOffset / 4; + const MultiXactOffset dangerous_member_count = MaxMultiXactOffset - + (MaxMultiXactOffset / 4); + + ReadMultiXactCounts(&multixacts, &members); + + if (members <= safe_member_count) + { + /* There is no danger of member space wrap currently. */ + return -1; + } + + if (members >= dangerous_member_count) + { + /* We need a wraparound vacuum for all tables now. */ + return 0; + } + + /* + * Choose a cutoff age which is a fraction of the approximate current + * number of active multixacts. If we are using an amount of member + * address space near safe_member_count, we use a number close to the + * number of active multixacts, so that only tables with the oldest + * relminmxid values become candidates for wraparound vacuums. As we get + * closer to dangerous_member_count, we use a number closer to zero, so + * that more tables become candidates for wraparound vacuums. + */ + fraction = (double) (members - safe_member_count) / + (double) (dangerous_member_count - safe_member_count); + return (int) (multixacts * (1.0 - fraction)); +} + +/* * SlruScanDirectory callback. * This callback deletes segments that are outside the range determined by * the given page numbers. diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index ba3fbbd..2dbdcd8 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -112,6 +112,20 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast, !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE))); Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL); + if (isTopLevel && vacstmt->multixact_freeze_table_age == -1) + { + int safe_multixact_age = MultiXactCheckMemberUsage(); + if (safe_multixact_age >= 0) + { + /* + * Override the multixact freeze settings if we are running out of + * member address space. + */ + vacstmt->multixact_freeze_table_age = safe_multixact_age; + vacstmt->multixact_freeze_min_age = safe_multixact_age / 2; + } + } + stmttype = (vacstmt->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; /* diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index f5b4704..937fa6c 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -302,11 +302,13 @@ static void do_autovacuum(void); static void FreeWorkerInfo(int code, Datum arg); static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map, - TupleDesc pg_class_desc); + TupleDesc pg_class_desc, + int safe_multixact_age); static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, AutoVacOpts2 *relopts2, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, + int safe_multixact_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void autovacuum_do_vac_analyze(autovac_table *tab, @@ -1107,6 +1109,7 @@ do_start_worker(void) Oid retval = InvalidOid; MemoryContext tmpcxt, oldcxt; + int safe_multixact_age; /* return quickly when there are no free workers */ LWLockAcquire(AutovacuumLock, LW_SHARED); @@ -1148,7 +1151,12 @@ do_start_worker(void) /* Also determine the oldest datminmxid we will consider. */ recentMulti = ReadNextMultiXactId(); - multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age; + safe_multixact_age = MultiXactCheckMemberUsage(); + if (safe_multixact_age >= 0) + multiForceLimit = recentMulti - Min(autovacuum_freeze_max_age, + safe_multixact_age); + else + multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age; if (multiForceLimit < FirstMultiXactId) multiForceLimit -= FirstMultiXactId; @@ -1937,6 +1945,7 @@ do_autovacuum(void) BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; + int safe_multixact_age; /* * StartTransactionCommand and CommitTransactionCommand will automatically @@ -2032,6 +2041,13 @@ do_autovacuum(void) relScan = heap_beginscan(classRel, SnapshotNow, 0, NULL); /* + * Check if member space usage is in danger of being exhausted, so we can + * pass the recommended cutoff age to relation_needs_vacanalyze and + * table_recheck_autovac. + */ + safe_multixact_age = MultiXactCheckMemberUsage(); + + /* * On the first pass, we collect main tables to vacuum, and also the main * table relid to TOAST relid mapping. */ @@ -2059,6 +2075,7 @@ do_autovacuum(void) /* Check if it needs vacuum or analyze */ relation_needs_vacanalyze(relid, relopts, relopts2, classForm, tabentry, + safe_multixact_age, &dovacuum, &doanalyze, &wraparound); /* @@ -2188,6 +2205,7 @@ do_autovacuum(void) shared, dbentry); relation_needs_vacanalyze(relid, relopts, relopts2, classForm, tabentry, + safe_multixact_age, &dovacuum, &doanalyze, &wraparound); /* ignore analyze for toast tables */ @@ -2278,7 +2296,8 @@ do_autovacuum(void) * the race condition is not closed but it is very small. */ MemoryContextSwitchTo(AutovacMemCxt); - tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc); + tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc, + safe_multixact_age); if (tab == NULL) { /* someone else vacuumed the table, or it went away */ @@ -2495,7 +2514,8 @@ get_pgstat_tabentry_relid(Oid relid, bool isshared, PgStat_StatDBEntry *shared, */ static autovac_table * table_recheck_autovac(Oid relid, HTAB *table_toast_map, - TupleDesc pg_class_desc) + TupleDesc pg_class_desc, + int safe_multixact_age) { Form_pg_class classForm; HeapTuple classTup; @@ -2542,6 +2562,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, shared, dbentry); relation_needs_vacanalyze(relid, avopts, avopts2, classForm, tabentry, + safe_multixact_age, &dovacuum, &doanalyze, &wraparound); /* ignore ANALYZE for toast tables */ @@ -2598,6 +2619,18 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, ? avopts2->multixact_freeze_table_age : default_multixact_freeze_table_age; + /* + * Override the multixact freeze settings if we are running out of + * member address space. + */ + if (safe_multixact_age >= 0) + { + multixact_freeze_table_age = Min(safe_multixact_age, + multixact_freeze_table_age); + multixact_freeze_min_age = Min(safe_multixact_age / 2, + multixact_freeze_min_age); + } + tab = palloc(sizeof(autovac_table)); tab->at_relid = relid; tab->at_dovacuum = dovacuum; @@ -2670,6 +2703,7 @@ relation_needs_vacanalyze(Oid relid, AutoVacOpts2 *relopts2, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, + int safe_multixact_age, /* output params below */ bool *dovacuum, bool *doanalyze, @@ -2733,6 +2767,10 @@ relation_needs_vacanalyze(Oid relid, ? Min(relopts2->multixact_freeze_max_age, autovacuum_multixact_freeze_max_age) : autovacuum_multixact_freeze_max_age; + /* Special settings if we are running out of member address space. */ + if (safe_multixact_age >= 0) + multixact_freeze_max_age = Min(multixact_freeze_max_age, safe_multixact_age); + av_enabled = (relopts ? relopts->enabled : true); /* Force vacuum if table is at risk of wraparound */ diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index e69a3d2..d4513d7 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -126,6 +126,7 @@ extern void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset); extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); extern void MultiXactSetSafeTruncate(MultiXactId safeTruncateMulti); +extern int MultiXactCheckMemberUsage(void); extern void multixact_twophase_recover(TransactionId xid, uint16 info, void *recdata, uint32 len);