diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml
index 8764e00..ebb939a 100644
--- a/doc/src/sgml/maintenance.sgml
+++ b/doc/src/sgml/maintenance.sgml
@@ -628,6 +628,11 @@ HINT: Stop the postmaster and vacuum that database in single-user mode.
Like transaction IDs, multixact IDs are implemented as a
32-bit counter and corresponding storage, all of which requires
careful aging management, storage cleanup, and wraparound handling.
+ In addition to the multixact IDs, the storage area that holds the
+ transaction IDs included in each multixact must be managed. The number
+ of members in a multixact is variable and the storage used for members is
+ also addressed with a 32-bit index, so care must be taken to avoid running
+ out of addressable member storage space.
@@ -639,7 +644,11 @@ HINT: Stop the postmaster and vacuum that database in single-user mode.
pg_class>.relminmxid> stores the oldest
possible multixact ID still appearing in any tuple of that table.
If this value is older than
- , a whole-table
+ , or if the amount
+ of used member storage space exceeds the amount that would be used by
+ that number of multixacts with an average of three members each and this
+ table has one of the older relminmxid values in the system, or if used
+ member storage exceeds 75% of addressable storage space, a whole-table
scan is forced. mxid_age()> can be used on
pg_class>.relminmxid> to find its age.
@@ -655,8 +664,14 @@ HINT: Stop the postmaster and vacuum that database in single-user mode.
As a safety device, a whole-table vacuum scan will occur for any table
whose multixact-age is greater than
- .
- This will occur even if autovacuum is nominally disabled.
+ . Whole-table
+ vacuum scans will also occur progressively for all tables, starting with
+ those that have the oldest multixact-age, if the amount of used member
+ storage space exceeds the amount that would be used
+ by multixacts
+ with an average of three members each, or exceeds 75% of addressable
+ storage space. These will occur even if autovacuum is nominally
+ disabled.
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 928f9fe..09040e3 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -2578,6 +2578,115 @@ find_multixact_start(MultiXactId multi)
}
/*
+ * Returns an instantaneous snapshot of the current number of active
+ * multixacts and the number of members in the members SLRU area.
+ */
+static void
+ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members)
+{
+ MultiXactOffset nextOffset;
+ MultiXactOffset oldestOffset;
+ MultiXactId oldestMultiXactId;
+ MultiXactId nextMultiXactId;
+
+ LWLockAcquire(MultiXactGenLock, LW_SHARED);
+ nextOffset = MultiXactState->nextOffset;
+ oldestMultiXactId = MultiXactState->oldestMultiXactId;
+ nextMultiXactId = MultiXactState->nextMXact;
+ LWLockRelease(MultiXactGenLock);
+ /*
+ * XXX: Could we store oldestMultiXactMemberOffset in shmem and
+ * pg_controdata, alongside oldestMultiXactId?
+ */
+ oldestOffset = find_multixact_start(oldestMultiXactId);
+ *members = nextOffset - oldestOffset;
+ *multixacts = nextMultiXactId - oldestMultiXactId;
+}
+
+/*
+ * Computes a multixact age that we can use to trigger earlier wraparound
+ * vacuums than usual, if special action is required to avoid impending
+ * exhaustion of the address space of multixact members (caused by large
+ * multixacts).
+ *
+ * If less than a "safe member count" is active, then we return -1 to indicate
+ * that no special action needs to be taken. This should always be the case
+ * for users who don't make use of large multixacts.
+ *
+ * If more than the "dangerous member count" is active, then we return a max
+ * freeze age of zero to trigger aggressive wraparound vacuuming.
+ *
+ * In between the safe and dangerous levels, we return the current number of
+ * active multixids scaled down linearly for higher usage fractions, so that
+ * vacuuming becomes more aggressive as the member SLRU grows, in the hope
+ * that different tables will be vacuumed at different times due to their
+ * varying relminmxid values.
+ *
+ * The safe member count threshold is based on the freeze_max_age passed in
+ * (either the autovacuum_multixact_freeze_max_age or
+ * vacuum_multixact_freeze_max_age GUC) and the scaling factor
+ * AVG_MULTIXACT_SIZE_THRESHOLD. If the average size of active multixids is
+ * below AVG_MULTIXACT_SIZE_THRESHOLD, then this function should always return
+ * -1 because autovacuum should trigger vacuums for multixact ID wraparound
+ * before the member space exceeds safe_member_count. If the average size of
+ * active multixacts is above this number, then member space usage should
+ * exceed safe_member_count before the usual multixact ID wraparound
+ * prevention, so this function will begin to return values that change the
+ * the behavior of vacuum and autovacuum to trigger vacuums sooner.
+ *
+ * The dangerous member count threshold is arbitrarily set at 75% of member
+ * addressing space.
+ */
+#define AVG_MULTIXACT_SIZE_THRESHOLD 3
+#define DANGEROUS_MEMBER_COUNT (MaxMultiXactOffset - (MaxMultiXactOffset / 4))
+int
+MultiXactCheckMemberUsage(int freeze_max_age)
+{
+ /* Avoid overflow if the passed in GUC is set very high. */
+ const MultiXactOffset safe_member_count =
+ (freeze_max_age >= INT_MAX / AVG_MULTIXACT_SIZE_THRESHOLD
+ ? DANGEROUS_MEMBER_COUNT
+ : freeze_max_age * AVG_MULTIXACT_SIZE_THRESHOLD);
+
+ MultiXactOffset members;
+ uint32 multixacts;
+ double fraction;
+
+ ReadMultiXactCounts(&multixacts, &members);
+
+ if (members >= DANGEROUS_MEMBER_COUNT)
+ {
+ /* We need a wraparound vacuum for all tables now. */
+ return 0;
+ }
+
+ if (members <= safe_member_count)
+ {
+ /* There is no danger of member space wrap currently. */
+ return -1;
+ }
+
+ /*
+ * At safe_member_count, we want to select the smallest possible fraction
+ * of tables to vacuum, and at DANGEROUS_MEMBER_COUNT, we want to select
+ * the largest fraction (all of them). The autovacuum code is based on
+ * selecting tables to vacuum by the age of their pg_class.relminmxid, and
+ * we know the age of the oldest multixact in the system, so that's the
+ * value we want to when members is near safe_member_count. It should
+ * hopefully select one table to work on. By gradually scaling that
+ * number down to zero as members moves towards DANGEROUS_MEMBER_COUNT, we
+ * select a greater fraction of tables to vacuum. How evenly this
+ * approach spreads vacuuming work depends on the distribution of tables'
+ * relminmxid values: the worst case is that they're all the same (like
+ * after a restored pg_dump), then we'll select them for vacuuming at the
+ * same time.
+ */
+ fraction = (double) (members - safe_member_count) /
+ (double) (DANGEROUS_MEMBER_COUNT - safe_member_count);
+ return (int) (multixacts * (1.0 - fraction));
+}
+
+/*
* SlruScanDirectory callback.
* This callback deletes segments that are outside the range determined by
* the given page numbers.
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 7ead161..a2199e8 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -105,10 +105,25 @@ ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel)
}
else
{
+ int safe_multixact_age = MultiXactCheckMemberUsage(vacuum_multixact_freeze_table_age);
+
+ if (safe_multixact_age >= 0)
+ {
+ /*
+ * Override the multixact freeze settings if we are running out of
+ * member address space.
+ */
+ params.multixact_freeze_table_age = safe_multixact_age;
+ params.multixact_freeze_min_age = safe_multixact_age / 2;
+ }
+ else
+ {
+ /* Use the default values. */
+ params.multixact_freeze_min_age = -1;
+ params.multixact_freeze_table_age = -1;
+ }
params.freeze_min_age = -1;
params.freeze_table_age = -1;
- params.multixact_freeze_min_age = -1;
- params.multixact_freeze_table_age = -1;
}
/* user-invoked vacuum is never "for wraparound" */
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index be4cd1d..47b58fd 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -297,10 +297,12 @@ static void do_autovacuum(void);
static void FreeWorkerInfo(int code, Datum arg);
static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map,
- TupleDesc pg_class_desc);
+ TupleDesc pg_class_desc,
+ int safe_multixact_age);
static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts,
Form_pg_class classForm,
PgStat_StatTabEntry *tabentry,
+ int safe_multixact_age,
bool *dovacuum, bool *doanalyze, bool *wraparound);
static void autovacuum_do_vac_analyze(autovac_table *tab,
@@ -1077,6 +1079,7 @@ do_start_worker(void)
Oid retval = InvalidOid;
MemoryContext tmpcxt,
oldcxt;
+ int safe_multixact_age;
/* return quickly when there are no free workers */
LWLockAcquire(AutovacuumLock, LW_SHARED);
@@ -1118,7 +1121,12 @@ do_start_worker(void)
/* Also determine the oldest datminmxid we will consider. */
recentMulti = ReadNextMultiXactId();
- multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age;
+ safe_multixact_age = MultiXactCheckMemberUsage(autovacuum_multixact_freeze_max_age);
+ if (safe_multixact_age >= 0)
+ multiForceLimit = recentMulti - Min(autovacuum_freeze_max_age,
+ safe_multixact_age);
+ else
+ multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age;
if (multiForceLimit < FirstMultiXactId)
multiForceLimit -= FirstMultiXactId;
@@ -1881,6 +1889,7 @@ do_autovacuum(void)
BufferAccessStrategy bstrategy;
ScanKeyData key;
TupleDesc pg_class_desc;
+ int safe_multixact_age;
/*
* StartTransactionCommand and CommitTransactionCommand will automatically
@@ -1975,6 +1984,13 @@ do_autovacuum(void)
relScan = heap_beginscan_catalog(classRel, 0, NULL);
/*
+ * Check if member space usage is in danger of being exhausted, so we can
+ * pass the recommended cutoff age to relation_needs_vacanalyze and
+ * table_recheck_autovac.
+ */
+ safe_multixact_age = MultiXactCheckMemberUsage(autovacuum_multixact_freeze_max_age);
+
+ /*
* On the first pass, we collect main tables to vacuum, and also the main
* table relid to TOAST relid mapping.
*/
@@ -2001,6 +2017,7 @@ do_autovacuum(void)
/* Check if it needs vacuum or analyze */
relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
+ safe_multixact_age,
&dovacuum, &doanalyze, &wraparound);
/*
@@ -2129,6 +2146,7 @@ do_autovacuum(void)
shared, dbentry);
relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
+ safe_multixact_age,
&dovacuum, &doanalyze, &wraparound);
/* ignore analyze for toast tables */
@@ -2235,7 +2253,8 @@ do_autovacuum(void)
* the race condition is not closed but it is very small.
*/
MemoryContextSwitchTo(AutovacMemCxt);
- tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc);
+ tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc,
+ safe_multixact_age);
if (tab == NULL)
{
/* someone else vacuumed the table, or it went away */
@@ -2442,7 +2461,8 @@ get_pgstat_tabentry_relid(Oid relid, bool isshared, PgStat_StatDBEntry *shared,
*/
static autovac_table *
table_recheck_autovac(Oid relid, HTAB *table_toast_map,
- TupleDesc pg_class_desc)
+ TupleDesc pg_class_desc,
+ int safe_multixact_age)
{
Form_pg_class classForm;
HeapTuple classTup;
@@ -2488,6 +2508,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
shared, dbentry);
relation_needs_vacanalyze(relid, avopts, classForm, tabentry,
+ safe_multixact_age,
&dovacuum, &doanalyze, &wraparound);
/* ignore ANALYZE for toast tables */
@@ -2550,6 +2571,18 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
? avopts->multixact_freeze_table_age
: default_multixact_freeze_table_age;
+ /*
+ * Override the multixact freeze settings if we are running out of
+ * member address space.
+ */
+ if (safe_multixact_age >= 0)
+ {
+ multixact_freeze_table_age = Min(safe_multixact_age,
+ multixact_freeze_table_age);
+ multixact_freeze_min_age = Min(safe_multixact_age / 2,
+ multixact_freeze_min_age);
+ }
+
tab = palloc(sizeof(autovac_table));
tab->at_relid = relid;
tab->at_vacoptions = VACOPT_SKIPTOAST |
@@ -2606,8 +2639,8 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
* analyze. This is asymmetric to the VACUUM case.
*
* We also force vacuum if the table's relfrozenxid is more than freeze_max_age
- * transactions back, and if its relminmxid is more than
- * multixact_freeze_max_age multixacts back.
+ * transactions back, or if its relminmxid is more than
+ * multixact_freeze_max_age or safe_multixact_age multixacts back.
*
* A table whose autovacuum_enabled option is false is
* automatically skipped (unless we have to vacuum it due to freeze_max_age).
@@ -2624,6 +2657,7 @@ relation_needs_vacanalyze(Oid relid,
AutoVacOpts *relopts,
Form_pg_class classForm,
PgStat_StatTabEntry *tabentry,
+ int safe_multixact_age,
/* output params below */
bool *dovacuum,
bool *doanalyze,
@@ -2687,6 +2721,10 @@ relation_needs_vacanalyze(Oid relid,
? Min(relopts->multixact_freeze_max_age, autovacuum_multixact_freeze_max_age)
: autovacuum_multixact_freeze_max_age;
+ /* Special settings if we are running out of member address space. */
+ if (safe_multixact_age >= 0)
+ multixact_freeze_max_age = Min(multixact_freeze_max_age, safe_multixact_age);
+
av_enabled = (relopts ? relopts->enabled : true);
/* Force vacuum if table is at risk of wraparound */
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 640b198..55a9f6c 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -126,6 +126,7 @@ extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
MultiXactOffset minMultiOffset);
extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB);
extern void MultiXactSetSafeTruncate(MultiXactId safeTruncateMulti);
+extern int MultiXactCheckMemberUsage(int freze_max_age);
extern void multixact_twophase_recover(TransactionId xid, uint16 info,
void *recdata, uint32 len);