diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 928f9fe..1d7d942 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -2578,6 +2578,32 @@ find_multixact_start(MultiXactId multi) } /* + * Returns an instantaneous snapshot of the current number of active + * multixacts and the number of members in the members SLRU area. + */ +void +ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members) +{ + MultiXactOffset nextOffset; + MultiXactOffset oldestOffset; + MultiXactId oldestMultiXactId; + MultiXactId nextMultiXactId; + + LWLockAcquire(MultiXactGenLock, LW_SHARED); + nextOffset = MultiXactState->nextOffset; + oldestMultiXactId = MultiXactState->oldestMultiXactId; + nextMultiXactId = MultiXactState->nextMXact; + LWLockRelease(MultiXactGenLock); + /* + * XXX: Could we store oldestMultiXactMemberOffset in shmem and + * pg_controdata, alongside oldestMultiXactId? + */ + oldestOffset = find_multixact_start(oldestMultiXactId); + *members = nextOffset - oldestOffset; + *multixacts = nextMultiXactId - oldestMultiXactId; +} + +/* * SlruScanDirectory callback. * This callback deletes segments that are outside the range determined by * the given page numbers. diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 7ead161..417ef64 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -105,10 +105,25 @@ ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel) } else { + int max_multixact_age_to_avoid_member_wrap = + compute_max_multixact_age_to_avoid_member_wrap(true); + if (max_multixact_age_to_avoid_member_wrap >= 0) + { + /* + * Override the multixact freeze settings if we are running out of + * member address space. + */ + params.multixact_freeze_table_age = max_multixact_age_to_avoid_member_wrap; + params.multixact_freeze_min_age = 0; + } + else + { + /* Use the default values. */ + params.multixact_freeze_min_age = -1; + params.multixact_freeze_table_age = -1; + } params.freeze_min_age = -1; params.freeze_table_age = -1; - params.multixact_freeze_min_age = -1; - params.multixact_freeze_table_age = -1; } /* user-invoked vacuum is never "for wraparound" */ diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index be4cd1d..96506d4 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -297,10 +297,12 @@ static void do_autovacuum(void); static void FreeWorkerInfo(int code, Datum arg); static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map, - TupleDesc pg_class_desc); + TupleDesc pg_class_desc, + int max_multixact_age_to_avoid_member_wrap); static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, + int multixact_freeze_age_to_avoid_member_wrap, bool *dovacuum, bool *doanalyze, bool *wraparound); static void autovacuum_do_vac_analyze(autovac_table *tab, @@ -1052,6 +1054,82 @@ db_comparator(const void *a, const void *b) } /* + * Computes a multixact age that we can use to trigger earlier wraparound + * vacuums than usual, if special action is required to avoid impending + * exhaustion of the address space of multixact members (caused by large + * multixacts). + * + * If less than a "safe member count" is active, then we return -1 to indicate + * that no special action needs to be taken. This should always be the case + * for users who don't make use of large multixacts. + * + * If more than the "dangerous member count" is active, then we return a max + * freeze age of zero to trigger aggressive wraparound vacuuming. + * + * In between the safe and dangerous levels, we return the current number of + * active multixids scaled down linearly for higher usage fractions, so that + * vacuuming becomes more aggressive as the member SLRU grows, in the hope + * that different tables will be vacuumed at different times due to their + * varying relminmxid values. + * + * Based on the assumption that there is no reasonable way for an end user to + * configure the thresholds for this, we define the safe member count to be + * half of the member address space, and the dangerous level to be + * three-quarters. These numbers are lowered for manual invocations of + * vacuum, to give scheduled vacuum commands a head start, so they have a + * chance to tackle member space usage problems before autovacuum needs to be + * invoked. + */ +int +compute_max_multixact_age_to_avoid_member_wrap(bool manual) +{ + MultiXactOffset members; + uint32 multixacts; + double fraction; + MultiXactOffset safe_member_count = MaxMultiXactOffset / 2; + MultiXactOffset dangerous_member_count = MaxMultiXactOffset - + (MaxMultiXactOffset / 4); + + /* Give manual vacuum commands a head start before autovacuum jobs. */ + if (manual) + { + safe_member_count /= 2; + dangerous_member_count /= 2; + } + + ReadMultiXactCounts(&multixacts, &members); + + if (members <= safe_member_count) + { + /* + * There is no danger of member wrap. Return the special value that + * meeans there is no max multixact age needed to avoid member wrap, + * at present. + */ + return -1; + } + + if (members >= dangerous_member_count) + { + /* We need a wraparound vacuum for all tables now. */ + return 0; + } + + /* + * Choose a cutoff age which is a fraction of the approximate current + * number of active multixacts. If we are using an amount of member + * address space near safe_member_count, we use a number close to the + * number of active multixacts, so that only tables with the oldest + * relminmxid values become candidates for wraparound vacuums. As we get + * closer to dangerous_member_count, we use a number closer to zero, so + * that more tables become candidates for wraparound vacuums. + */ + fraction = (double) (members - safe_member_count) / + (double) (dangerous_member_count - safe_member_count); + return (int) (multixacts * (1.0 - fraction)); +} + +/* * do_start_worker * * Bare-bones procedure for starting an autovacuum worker from the launcher. @@ -1077,6 +1155,7 @@ do_start_worker(void) Oid retval = InvalidOid; MemoryContext tmpcxt, oldcxt; + int max_multixact_age_to_avoid_member_wrap; /* return quickly when there are no free workers */ LWLockAcquire(AutovacuumLock, LW_SHARED); @@ -1118,7 +1197,12 @@ do_start_worker(void) /* Also determine the oldest datminmxid we will consider. */ recentMulti = ReadNextMultiXactId(); - multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age; + max_multixact_age_to_avoid_member_wrap = + compute_max_multixact_age_to_avoid_member_wrap(false); + if (max_multixact_age_to_avoid_member_wrap >= 0) + multiForceLimit = recentMulti - max_multixact_age_to_avoid_member_wrap; + else + multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age; if (multiForceLimit < FirstMultiXactId) multiForceLimit -= FirstMultiXactId; @@ -1881,6 +1965,7 @@ do_autovacuum(void) BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; + int max_multixact_age_to_avoid_member_wrap; /* * StartTransactionCommand and CommitTransactionCommand will automatically @@ -1975,6 +2060,14 @@ do_autovacuum(void) relScan = heap_beginscan_catalog(classRel, 0, NULL); /* + * Find the multixact age so that relation_needs_vacanalyze and + * table_recheck_autovac can detect cases where the multixact member space + * is in danger of being exhausted. + */ + max_multixact_age_to_avoid_member_wrap = + compute_max_multixact_age_to_avoid_member_wrap(false); + + /* * On the first pass, we collect main tables to vacuum, and also the main * table relid to TOAST relid mapping. */ @@ -2001,6 +2094,7 @@ do_autovacuum(void) /* Check if it needs vacuum or analyze */ relation_needs_vacanalyze(relid, relopts, classForm, tabentry, + max_multixact_age_to_avoid_member_wrap, &dovacuum, &doanalyze, &wraparound); /* @@ -2129,6 +2223,7 @@ do_autovacuum(void) shared, dbentry); relation_needs_vacanalyze(relid, relopts, classForm, tabentry, + max_multixact_age_to_avoid_member_wrap, &dovacuum, &doanalyze, &wraparound); /* ignore analyze for toast tables */ @@ -2235,7 +2330,8 @@ do_autovacuum(void) * the race condition is not closed but it is very small. */ MemoryContextSwitchTo(AutovacMemCxt); - tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc); + tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc, + max_multixact_age_to_avoid_member_wrap); if (tab == NULL) { /* someone else vacuumed the table, or it went away */ @@ -2442,7 +2538,8 @@ get_pgstat_tabentry_relid(Oid relid, bool isshared, PgStat_StatDBEntry *shared, */ static autovac_table * table_recheck_autovac(Oid relid, HTAB *table_toast_map, - TupleDesc pg_class_desc) + TupleDesc pg_class_desc, + int max_multixact_age_to_avoid_member_wrap) { Form_pg_class classForm; HeapTuple classTup; @@ -2488,6 +2585,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, shared, dbentry); relation_needs_vacanalyze(relid, avopts, classForm, tabentry, + max_multixact_age_to_avoid_member_wrap, &dovacuum, &doanalyze, &wraparound); /* ignore ANALYZE for toast tables */ @@ -2550,6 +2648,16 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, ? avopts->multixact_freeze_table_age : default_multixact_freeze_table_age; + /* + * Override the multixact freeze settings if we are running out of + * member address space. + */ + if (max_multixact_age_to_avoid_member_wrap >= 0) + { + multixact_freeze_table_age = max_multixact_age_to_avoid_member_wrap; + multixact_freeze_min_age = 0; + } + tab = palloc(sizeof(autovac_table)); tab->at_relid = relid; tab->at_vacoptions = VACOPT_SKIPTOAST | @@ -2624,6 +2732,7 @@ relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, + int max_multixact_age_to_avoid_member_wrap, /* output params below */ bool *dovacuum, bool *doanalyze, @@ -2687,6 +2796,10 @@ relation_needs_vacanalyze(Oid relid, ? Min(relopts->multixact_freeze_max_age, autovacuum_multixact_freeze_max_age) : autovacuum_multixact_freeze_max_age; + /* Special settings if we are running out of member address space. */ + if (max_multixact_age_to_avoid_member_wrap >= 0) + multixact_freeze_max_age = max_multixact_age_to_avoid_member_wrap; + av_enabled = (relopts ? relopts->enabled : true); /* Force vacuum if table is at risk of wraparound */ diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 640b198..0500435 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -126,6 +126,7 @@ extern void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset); extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); extern void MultiXactSetSafeTruncate(MultiXactId safeTruncateMulti); +extern void ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members); extern void multixact_twophase_recover(TransactionId xid, uint16 info, void *recdata, uint32 len); diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index 6eaaf4c..6a71b5b 100644 --- a/src/include/postmaster/autovacuum.h +++ b/src/include/postmaster/autovacuum.h @@ -53,6 +53,8 @@ extern void AutoVacWorkerFailed(void); /* autovacuum cost-delay balancer */ extern void AutoVacuumUpdateDelay(void); +extern int compute_max_multixact_age_to_avoid_member_wrap(bool manual); + #ifdef EXEC_BACKEND extern void AutoVacLauncherMain(int argc, char *argv[]) pg_attribute_noreturn(); extern void AutoVacWorkerMain(int argc, char *argv[]) pg_attribute_noreturn();