diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml index 8764e00..ebb939a 100644 --- a/doc/src/sgml/maintenance.sgml +++ b/doc/src/sgml/maintenance.sgml @@ -628,6 +628,11 @@ HINT: Stop the postmaster and vacuum that database in single-user mode. Like transaction IDs, multixact IDs are implemented as a 32-bit counter and corresponding storage, all of which requires careful aging management, storage cleanup, and wraparound handling. + In addition to the multixact IDs, the storage area that holds the + transaction IDs included in each multixact must be managed. The number + of members in a multixact is variable and the storage used for members is + also addressed with a 32-bit index, so care must be taken to avoid running + out of addressable member storage space. @@ -639,7 +644,11 @@ HINT: Stop the postmaster and vacuum that database in single-user mode. pg_class.relminmxid stores the oldest possible multixact ID still appearing in any tuple of that table. If this value is older than - , a whole-table + , or if the amount + of used member storage space exceeds the amount that would be used by + that number of multixacts with an average of three members each and this + table has one of the older relminmxid values in the system, or if used + member storage exceeds 75% of addressable storage space, a whole-table scan is forced. mxid_age() can be used on pg_class.relminmxid to find its age. @@ -655,8 +664,14 @@ HINT: Stop the postmaster and vacuum that database in single-user mode. As a safety device, a whole-table vacuum scan will occur for any table whose multixact-age is greater than - . - This will occur even if autovacuum is nominally disabled. + . Whole-table + vacuum scans will also occur progressively for all tables, starting with + those that have the oldest multixact-age, if the amount of used member + storage space exceeds the amount that would be used + by multixacts + with an average of three members each, or exceeds 75% of addressable + storage space. These will occur even if autovacuum is nominally + disabled. diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 928f9fe..09040e3 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -2578,6 +2578,115 @@ find_multixact_start(MultiXactId multi) } /* + * Returns an instantaneous snapshot of the current number of active + * multixacts and the number of members in the members SLRU area. + */ +static void +ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members) +{ + MultiXactOffset nextOffset; + MultiXactOffset oldestOffset; + MultiXactId oldestMultiXactId; + MultiXactId nextMultiXactId; + + LWLockAcquire(MultiXactGenLock, LW_SHARED); + nextOffset = MultiXactState->nextOffset; + oldestMultiXactId = MultiXactState->oldestMultiXactId; + nextMultiXactId = MultiXactState->nextMXact; + LWLockRelease(MultiXactGenLock); + /* + * XXX: Could we store oldestMultiXactMemberOffset in shmem and + * pg_controdata, alongside oldestMultiXactId? + */ + oldestOffset = find_multixact_start(oldestMultiXactId); + *members = nextOffset - oldestOffset; + *multixacts = nextMultiXactId - oldestMultiXactId; +} + +/* + * Computes a multixact age that we can use to trigger earlier wraparound + * vacuums than usual, if special action is required to avoid impending + * exhaustion of the address space of multixact members (caused by large + * multixacts). + * + * If less than a "safe member count" is active, then we return -1 to indicate + * that no special action needs to be taken. This should always be the case + * for users who don't make use of large multixacts. + * + * If more than the "dangerous member count" is active, then we return a max + * freeze age of zero to trigger aggressive wraparound vacuuming. + * + * In between the safe and dangerous levels, we return the current number of + * active multixids scaled down linearly for higher usage fractions, so that + * vacuuming becomes more aggressive as the member SLRU grows, in the hope + * that different tables will be vacuumed at different times due to their + * varying relminmxid values. + * + * The safe member count threshold is based on the freeze_max_age passed in + * (either the autovacuum_multixact_freeze_max_age or + * vacuum_multixact_freeze_max_age GUC) and the scaling factor + * AVG_MULTIXACT_SIZE_THRESHOLD. If the average size of active multixids is + * below AVG_MULTIXACT_SIZE_THRESHOLD, then this function should always return + * -1 because autovacuum should trigger vacuums for multixact ID wraparound + * before the member space exceeds safe_member_count. If the average size of + * active multixacts is above this number, then member space usage should + * exceed safe_member_count before the usual multixact ID wraparound + * prevention, so this function will begin to return values that change the + * the behavior of vacuum and autovacuum to trigger vacuums sooner. + * + * The dangerous member count threshold is arbitrarily set at 75% of member + * addressing space. + */ +#define AVG_MULTIXACT_SIZE_THRESHOLD 3 +#define DANGEROUS_MEMBER_COUNT (MaxMultiXactOffset - (MaxMultiXactOffset / 4)) +int +MultiXactCheckMemberUsage(int freeze_max_age) +{ + /* Avoid overflow if the passed in GUC is set very high. */ + const MultiXactOffset safe_member_count = + (freeze_max_age >= INT_MAX / AVG_MULTIXACT_SIZE_THRESHOLD + ? DANGEROUS_MEMBER_COUNT + : freeze_max_age * AVG_MULTIXACT_SIZE_THRESHOLD); + + MultiXactOffset members; + uint32 multixacts; + double fraction; + + ReadMultiXactCounts(&multixacts, &members); + + if (members >= DANGEROUS_MEMBER_COUNT) + { + /* We need a wraparound vacuum for all tables now. */ + return 0; + } + + if (members <= safe_member_count) + { + /* There is no danger of member space wrap currently. */ + return -1; + } + + /* + * At safe_member_count, we want to select the smallest possible fraction + * of tables to vacuum, and at DANGEROUS_MEMBER_COUNT, we want to select + * the largest fraction (all of them). The autovacuum code is based on + * selecting tables to vacuum by the age of their pg_class.relminmxid, and + * we know the age of the oldest multixact in the system, so that's the + * value we want to when members is near safe_member_count. It should + * hopefully select one table to work on. By gradually scaling that + * number down to zero as members moves towards DANGEROUS_MEMBER_COUNT, we + * select a greater fraction of tables to vacuum. How evenly this + * approach spreads vacuuming work depends on the distribution of tables' + * relminmxid values: the worst case is that they're all the same (like + * after a restored pg_dump), then we'll select them for vacuuming at the + * same time. + */ + fraction = (double) (members - safe_member_count) / + (double) (DANGEROUS_MEMBER_COUNT - safe_member_count); + return (int) (multixacts * (1.0 - fraction)); +} + +/* * SlruScanDirectory callback. * This callback deletes segments that are outside the range determined by * the given page numbers. diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 7ead161..a2199e8 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -105,10 +105,25 @@ ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel) } else { + int safe_multixact_age = MultiXactCheckMemberUsage(vacuum_multixact_freeze_table_age); + + if (safe_multixact_age >= 0) + { + /* + * Override the multixact freeze settings if we are running out of + * member address space. + */ + params.multixact_freeze_table_age = safe_multixact_age; + params.multixact_freeze_min_age = safe_multixact_age / 2; + } + else + { + /* Use the default values. */ + params.multixact_freeze_min_age = -1; + params.multixact_freeze_table_age = -1; + } params.freeze_min_age = -1; params.freeze_table_age = -1; - params.multixact_freeze_min_age = -1; - params.multixact_freeze_table_age = -1; } /* user-invoked vacuum is never "for wraparound" */ diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index be4cd1d..47b58fd 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -297,10 +297,12 @@ static void do_autovacuum(void); static void FreeWorkerInfo(int code, Datum arg); static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map, - TupleDesc pg_class_desc); + TupleDesc pg_class_desc, + int safe_multixact_age); static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, + int safe_multixact_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void autovacuum_do_vac_analyze(autovac_table *tab, @@ -1077,6 +1079,7 @@ do_start_worker(void) Oid retval = InvalidOid; MemoryContext tmpcxt, oldcxt; + int safe_multixact_age; /* return quickly when there are no free workers */ LWLockAcquire(AutovacuumLock, LW_SHARED); @@ -1118,7 +1121,12 @@ do_start_worker(void) /* Also determine the oldest datminmxid we will consider. */ recentMulti = ReadNextMultiXactId(); - multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age; + safe_multixact_age = MultiXactCheckMemberUsage(autovacuum_multixact_freeze_max_age); + if (safe_multixact_age >= 0) + multiForceLimit = recentMulti - Min(autovacuum_freeze_max_age, + safe_multixact_age); + else + multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age; if (multiForceLimit < FirstMultiXactId) multiForceLimit -= FirstMultiXactId; @@ -1881,6 +1889,7 @@ do_autovacuum(void) BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; + int safe_multixact_age; /* * StartTransactionCommand and CommitTransactionCommand will automatically @@ -1975,6 +1984,13 @@ do_autovacuum(void) relScan = heap_beginscan_catalog(classRel, 0, NULL); /* + * Check if member space usage is in danger of being exhausted, so we can + * pass the recommended cutoff age to relation_needs_vacanalyze and + * table_recheck_autovac. + */ + safe_multixact_age = MultiXactCheckMemberUsage(autovacuum_multixact_freeze_max_age); + + /* * On the first pass, we collect main tables to vacuum, and also the main * table relid to TOAST relid mapping. */ @@ -2001,6 +2017,7 @@ do_autovacuum(void) /* Check if it needs vacuum or analyze */ relation_needs_vacanalyze(relid, relopts, classForm, tabentry, + safe_multixact_age, &dovacuum, &doanalyze, &wraparound); /* @@ -2129,6 +2146,7 @@ do_autovacuum(void) shared, dbentry); relation_needs_vacanalyze(relid, relopts, classForm, tabentry, + safe_multixact_age, &dovacuum, &doanalyze, &wraparound); /* ignore analyze for toast tables */ @@ -2235,7 +2253,8 @@ do_autovacuum(void) * the race condition is not closed but it is very small. */ MemoryContextSwitchTo(AutovacMemCxt); - tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc); + tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc, + safe_multixact_age); if (tab == NULL) { /* someone else vacuumed the table, or it went away */ @@ -2442,7 +2461,8 @@ get_pgstat_tabentry_relid(Oid relid, bool isshared, PgStat_StatDBEntry *shared, */ static autovac_table * table_recheck_autovac(Oid relid, HTAB *table_toast_map, - TupleDesc pg_class_desc) + TupleDesc pg_class_desc, + int safe_multixact_age) { Form_pg_class classForm; HeapTuple classTup; @@ -2488,6 +2508,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, shared, dbentry); relation_needs_vacanalyze(relid, avopts, classForm, tabentry, + safe_multixact_age, &dovacuum, &doanalyze, &wraparound); /* ignore ANALYZE for toast tables */ @@ -2550,6 +2571,18 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, ? avopts->multixact_freeze_table_age : default_multixact_freeze_table_age; + /* + * Override the multixact freeze settings if we are running out of + * member address space. + */ + if (safe_multixact_age >= 0) + { + multixact_freeze_table_age = Min(safe_multixact_age, + multixact_freeze_table_age); + multixact_freeze_min_age = Min(safe_multixact_age / 2, + multixact_freeze_min_age); + } + tab = palloc(sizeof(autovac_table)); tab->at_relid = relid; tab->at_vacoptions = VACOPT_SKIPTOAST | @@ -2606,8 +2639,8 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, * analyze. This is asymmetric to the VACUUM case. * * We also force vacuum if the table's relfrozenxid is more than freeze_max_age - * transactions back, and if its relminmxid is more than - * multixact_freeze_max_age multixacts back. + * transactions back, or if its relminmxid is more than + * multixact_freeze_max_age or safe_multixact_age multixacts back. * * A table whose autovacuum_enabled option is false is * automatically skipped (unless we have to vacuum it due to freeze_max_age). @@ -2624,6 +2657,7 @@ relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, + int safe_multixact_age, /* output params below */ bool *dovacuum, bool *doanalyze, @@ -2687,6 +2721,10 @@ relation_needs_vacanalyze(Oid relid, ? Min(relopts->multixact_freeze_max_age, autovacuum_multixact_freeze_max_age) : autovacuum_multixact_freeze_max_age; + /* Special settings if we are running out of member address space. */ + if (safe_multixact_age >= 0) + multixact_freeze_max_age = Min(multixact_freeze_max_age, safe_multixact_age); + av_enabled = (relopts ? relopts->enabled : true); /* Force vacuum if table is at risk of wraparound */ diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 640b198..55a9f6c 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -126,6 +126,7 @@ extern void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset); extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); extern void MultiXactSetSafeTruncate(MultiXactId safeTruncateMulti); +extern int MultiXactCheckMemberUsage(int freze_max_age); extern void multixact_twophase_recover(TransactionId xid, uint16 info, void *recdata, uint32 len);