From 0cc869d0f5e7ef33f0ccdcd2ccbd8f89d5711590 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 27 Jun 2019 14:37:56 +0300 Subject: [PATCH 2/2] Use full 64-bit XID for checking if a deleted GiST page is old enough. Otherwise, after a deleted page gets even older, it becomes unrecyclable again. B-tree has the same problem, and has had since time immemorial, but let's at least fix this in GiST, where this is new. --- src/backend/access/gist/gistutil.c | 26 +++++++++++++++-- src/backend/access/gist/gistvacuum.c | 4 +-- src/backend/access/gist/gistxlog.c | 29 +++++++++++++++---- src/backend/access/rmgrdesc/gistdesc.c | 11 +++++--- src/backend/access/transam/varsup.c | 39 ++++++++++++++++++++++++++ src/include/access/gist.h | 35 +++++++++++++++++++++-- src/include/access/gist_private.h | 4 +-- src/include/access/gistxlog.h | 6 ++-- src/include/access/transam.h | 1 + 9 files changed, 133 insertions(+), 22 deletions(-) diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 49df05653b3..3a09197a242 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -882,9 +882,29 @@ gistNewBuffer(Relation r) bool gistPageRecyclable(Page page) { - return PageIsNew(page) || - (GistPageIsDeleted(page) && - TransactionIdPrecedes(GistPageGetDeleteXid(page), RecentGlobalXmin)); + if (PageIsNew(page)) + return true; + if (GistPageIsDeleted(page)) + { + /* + * The page was deleted, but when? If it was just deleted, a scan + * might have seen the downlink to it, and will read the page later. + * As long as that can happen, we must keep the deleted page around as + * a tombstone. + * + * Compare the deletion XID with RecentGlobalXmin. If deleteXid < + * RecentGlobalXmin, then no scan that's still in progress could have + * seen its downlink, and we can recycle it. + */ + FullTransactionId deletexid_full = GistPageGetDeleteXid(page); + FullTransactionId recentxmin_full; + + recentxmin_full = FullTransactionIdFromRecentXid(RecentGlobalXmin); + + if (FullTransactionIdPrecedes(deletexid_full, recentxmin_full)) + return true; + } + return false; } bytea * diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index 4270226eee2..ad1576da5b8 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -595,7 +595,7 @@ gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats, ItemId iid; IndexTuple idxtuple; XLogRecPtr recptr; - TransactionId txid; + FullTransactionId txid; /* * Check that the leaf is still empty and deletable. @@ -648,7 +648,7 @@ gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats, * currently in progress must have ended. (That's much more conservative * than needed, but let's keep it safe and simple.) */ - txid = ReadNewTransactionId(); + txid = ReadNextFullTransactionId(); START_CRIT_SECTION(); diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index 503db34d863..9fc7282200d 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -396,8 +396,27 @@ gistRedoPageReuse(XLogReaderState *record) */ if (InHotStandby) { - ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, - xlrec->node); + FullTransactionId latestRemovedFullXid = xlrec->latestRemovedFullXid; + FullTransactionId nextFullXid = ReadNextFullTransactionId(); + uint64 diff; + + /* + * ResolveRecoveryConflictWithSnapshot operates on 32-bit + * TransactionIds, so truncate the logged FullTransactionId. If the + * logged value is very old, so that XID wrap-around already happened + * on it, there can't be any snapshots that still see it. + */ + nextFullXid = ReadNextFullTransactionId(); + diff = U64FromFullTransactionId(nextFullXid) - + U64FromFullTransactionId(latestRemovedFullXid); + if (diff < MaxTransactionId / 2) + { + TransactionId latestRemovedXid; + + latestRemovedXid = XidFromFullTransactionId(latestRemovedFullXid); + ResolveRecoveryConflictWithSnapshot(latestRemovedXid, + xlrec->node); + } } } @@ -554,7 +573,7 @@ gistXLogSplit(bool page_is_leaf, * downlink from the parent page. */ XLogRecPtr -gistXLogPageDelete(Buffer buffer, TransactionId xid, +gistXLogPageDelete(Buffer buffer, FullTransactionId xid, Buffer parentBuffer, OffsetNumber downlinkOffset) { gistxlogPageDelete xlrec; @@ -578,7 +597,7 @@ gistXLogPageDelete(Buffer buffer, TransactionId xid, * Write XLOG record about reuse of a deleted page. */ void -gistXLogPageReuse(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid) +gistXLogPageReuse(Relation rel, BlockNumber blkno, FullTransactionId latestRemovedXid) { gistxlogPageReuse xlrec_reuse; @@ -591,7 +610,7 @@ gistXLogPageReuse(Relation rel, BlockNumber blkno, TransactionId latestRemovedXi /* XLOG stuff */ xlrec_reuse.node = rel->rd_node; xlrec_reuse.block = blkno; - xlrec_reuse.latestRemovedXid = latestRemovedXid; + xlrec_reuse.latestRemovedFullXid = latestRemovedXid; XLogBeginInsert(); XLogRegisterData((char *) &xlrec_reuse, SizeOfGistxlogPageReuse); diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index 767864b58e6..eccb6fd9428 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -26,10 +26,11 @@ out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec) static void out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec) { - appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %u", + appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %u:%u", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->block, - xlrec->latestRemovedXid); + EpochFromFullTransactionId(xlrec->latestRemovedFullXid), + XidFromFullTransactionId(xlrec->latestRemovedFullXid)); } static void @@ -50,8 +51,10 @@ out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec) static void out_gistxlogPageDelete(StringInfo buf, gistxlogPageDelete *xlrec) { - appendStringInfo(buf, "deleteXid %u; downlink %u", - xlrec->deleteXid, xlrec->downlinkOffset); + appendStringInfo(buf, "deleteXid %u:%u; downlink %u", + EpochFromFullTransactionId(xlrec->deleteXid), + XidFromFullTransactionId(xlrec->deleteXid), + xlrec->downlinkOffset); } void diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 5b759ec7f3f..e305a3be028 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -300,6 +300,45 @@ AdvanceNextFullTransactionIdPastXid(TransactionId xid) LWLockRelease(XidGenLock); } +/* + * Extend a 32-bit TransactionId into a 64-bit FullTransactionId. + * + * This assumes that the xid is "recent", not older than 2 billion XIDs + * from the next xid. + */ +FullTransactionId +FullTransactionIdFromRecentXid(TransactionId xid) +{ + FullTransactionId nextxid_full; + uint32 nextxid_epoch; + TransactionId nextxid_xid; + uint32 xid_epoch; + + if (TransactionIdIsNormal(xid)) + { + /* + * Compute the epoch of the target xid from the next XID's epoch. + * This assumes that the target XID is within the 2 billion XID + * horizon from the next XID. + */ + nextxid_full = ReadNextFullTransactionId(); + nextxid_epoch = EpochFromFullTransactionId(nextxid_full); + nextxid_xid = XidFromFullTransactionId(nextxid_full); + + if (xid > nextxid_xid) + xid_epoch = nextxid_epoch - 1; + else + xid_epoch = nextxid_epoch; + } + else + { + /* Use epoch 0 for special XIDs. */ + xid_epoch = 0; + } + + return FullTransactionIdFromEpochAndXid(xid_epoch, xid); +} + /* * Advance the cluster-wide value for the oldest valid clog entry. * diff --git a/src/include/access/gist.h b/src/include/access/gist.h index 6902f4115b7..14fa9646b23 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -16,6 +16,7 @@ #ifndef GIST_H #define GIST_H +#include "access/transam.h" #include "access/xlog.h" #include "access/xlogdefs.h" #include "storage/block.h" @@ -158,9 +159,37 @@ typedef struct GISTENTRY #define GistPageGetNSN(page) ( PageXLogRecPtrGet(GistPageGetOpaque(page)->nsn)) #define GistPageSetNSN(page, val) ( PageXLogRecPtrSet(GistPageGetOpaque(page)->nsn, val)) -/* For deleted pages we store last xid which could see the page in scan */ -#define GistPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid ) -#define GistPageSetDeleteXid(page, val) ( ((PageHeader) (page))->pd_prune_xid = val) + +/* + * On a deleted page, we store this struct. A deleted page doesn't contain any + * tuples, so we don't use the normal page layout with line pointers. Instead, + * this struct is stored right after the standard page header. pd_lower points + * to the end of this struct. If we add fields to this struct in the future, we + * can distinguish the old and new formats by pd_lower. + */ +typedef struct GISTDeletedPageContents +{ + /* last xid which could see the page in a scan */ + FullTransactionId deleteXid; +} GISTDeletedPageContents; + +static inline FullTransactionId +GistPageGetDeleteXid(Page page) +{ + Assert(GistPageIsDeleted(page)); + Assert(((PageHeader) page)->pd_lower == MAXALIGN(SizeOfPageHeaderData) + sizeof(GISTDeletedPageContents)); + + return ((GISTDeletedPageContents *) PageGetContents(page))->deleteXid; +} + +static inline void +GistPageSetDeleteXid(Page page, FullTransactionId deletexid) +{ + Assert(PageIsEmpty(page)); + ((PageHeader) page)->pd_lower = MAXALIGN(SizeOfPageHeaderData) + sizeof(GISTDeletedPageContents); + + ((GISTDeletedPageContents *) PageGetContents(page))->deleteXid = deletexid; +} /* * Vector of GISTENTRY structs; user-defined methods union and picksplit diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index f80694bf9a8..f84ea71ecba 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -426,11 +426,11 @@ extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup, /* gistxlog.c */ extern XLogRecPtr gistXLogPageDelete(Buffer buffer, - TransactionId xid, Buffer parentBuffer, + FullTransactionId xid, Buffer parentBuffer, OffsetNumber downlinkOffset); extern void gistXLogPageReuse(Relation rel, BlockNumber blkno, - TransactionId latestRemovedXid); + FullTransactionId latestRemovedXid); extern XLogRecPtr gistXLogUpdate(Buffer buffer, OffsetNumber *todelete, int ntodelete, diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h index 969a5376b5e..e44922d915c 100644 --- a/src/include/access/gistxlog.h +++ b/src/include/access/gistxlog.h @@ -83,7 +83,7 @@ typedef struct gistxlogPageSplit */ typedef struct gistxlogPageDelete { - TransactionId deleteXid; /* last Xid which could see page in scan */ + FullTransactionId deleteXid; /* last Xid which could see page in scan */ OffsetNumber downlinkOffset; /* Offset of downlink referencing this * page */ } gistxlogPageDelete; @@ -98,10 +98,10 @@ typedef struct gistxlogPageReuse { RelFileNode node; BlockNumber block; - TransactionId latestRemovedXid; + FullTransactionId latestRemovedFullXid; } gistxlogPageReuse; -#define SizeOfGistxlogPageReuse (offsetof(gistxlogPageReuse, latestRemovedXid) + sizeof(TransactionId)) +#define SizeOfGistxlogPageReuse (offsetof(gistxlogPageReuse, latestRemovedFullXid) + sizeof(FullTransactionId)) extern void gist_redo(XLogReaderState *record); extern void gist_desc(StringInfo buf, XLogReaderState *record); diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 6cbb0c82c73..e5e5e4b3bf3 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -225,6 +225,7 @@ extern XLogRecPtr TransactionIdGetCommitLSN(TransactionId xid); extern FullTransactionId GetNewTransactionId(bool isSubXact); extern void AdvanceNextFullTransactionIdPastXid(TransactionId xid); extern FullTransactionId ReadNextFullTransactionId(void); +extern FullTransactionId FullTransactionIdFromRecentXid(TransactionId xid); extern void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid); extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid); -- 2.20.1