From 3b6c4f6901f4b861e37e2aaba755dc66a5012607 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 11 Mar 2019 15:34:01 +0200 Subject: [PATCH 4/6] Move the page deletion logic to separate function. If a VACUUM does multiple index passes, I think we only want to do the empty page deletion after the final pass. That saves effort, since we only need to scan the internal pages once. But even if we wanted to do it on every pass, I think having a separate function makes it more readable. --- src/backend/access/gist/gistvacuum.c | 464 ++++++++++++++------------- 1 file changed, 240 insertions(+), 224 deletions(-) diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index eb90b2077d3..b95e755406e 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -23,25 +23,31 @@ #include "storage/indexfsm.h" #include "storage/lmgr.h" -/* Working state needed by gistbulkdelete */ typedef struct { + IndexBulkDeleteResult stats; + IndexVacuumInfo *info; - IndexBulkDeleteResult *stats; + BlockNumber numEmptyPages; + BlockSet internalPagesMap; + BlockSet emptyLeafPagesMap; +} GistBulkDeleteResult; + +/* Working state needed by gistbulkdelete */ +typedef struct +{ + GistBulkDeleteResult *stats; IndexBulkDeleteCallback callback; void *callback_state; GistNSN startNSN; BlockNumber totFreePages; /* true total # of free pages */ - BlockNumber emptyPages; - - BlockSet internalPagesMap; - BlockSet emptyLeafPagesMap; } GistVacState; -static void gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, +static void gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state); static void gistvacuumpage(GistVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno); +static void gistvacuum_recycle_pages(GistBulkDeleteResult *stats); /* * VACUUM bulkdelete stage: remove index entries. @@ -50,13 +56,15 @@ IndexBulkDeleteResult * gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state) { + GistBulkDeleteResult *gist_stats = (GistBulkDeleteResult *) stats; + /* allocate stats if first time through, else re-use existing struct */ - if (stats == NULL) - stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + if (gist_stats == NULL) + gist_stats = (GistBulkDeleteResult *) palloc0(sizeof(GistBulkDeleteResult)); - gistvacuumscan(info, stats, callback, callback_state); + gistvacuumscan(info, gist_stats, callback, callback_state); - return stats; + return (IndexBulkDeleteResult *) gist_stats; } /* @@ -65,6 +73,8 @@ gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteResult * gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) { + GistBulkDeleteResult *gist_stats = (GistBulkDeleteResult *) stats; + /* No-op in ANALYZE ONLY mode */ if (info->analyze_only) return stats; @@ -74,12 +84,15 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) * stats from the latest gistbulkdelete call. If it wasn't called, we * still need to do a pass over the index, to obtain index statistics. */ - if (stats == NULL) + if (gist_stats == NULL) { - stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); - gistvacuumscan(info, stats, NULL, NULL); + gist_stats = (GistBulkDeleteResult *) palloc0(sizeof(GistBulkDeleteResult)); + gistvacuumscan(info, gist_stats, NULL, NULL); } + /* Recycle empty pages */ + gistvacuum_recycle_pages(gist_stats); + /* * It's quite possible for us to be fooled by concurrent page splits into * double-counting some index tuples, so disbelieve any total that exceeds @@ -88,11 +101,11 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) */ if (!info->estimated_count) { - if (stats->num_index_tuples > info->num_heap_tuples) - stats->num_index_tuples = info->num_heap_tuples; + if (gist_stats->stats.num_index_tuples > info->num_heap_tuples) + gist_stats->stats.num_index_tuples = info->num_heap_tuples; } - return stats; + return (IndexBulkDeleteResult *) gist_stats; } /* @@ -116,7 +129,7 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) * whether any given heap tuple (identified by ItemPointer) is being deleted. */ static void -gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, +gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state) { Relation rel = info->index; @@ -129,12 +142,12 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, * Reset counts that will be incremented during the scan; needed in case * of multiple scans during a single VACUUM command. */ - stats->estimated_count = false; - stats->num_index_tuples = 0; - stats->pages_deleted = 0; + stats->stats.estimated_count = false; + stats->stats.num_index_tuples = 0; + stats->stats.pages_deleted = 0; /* Set up info to pass down to gistvacuumpage */ - vstate.info = info; + stats->info = info; vstate.stats = stats; vstate.callback = callback; vstate.callback_state = callback_state; @@ -143,9 +156,6 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, else vstate.startNSN = gistGetFakeLSN(rel); vstate.totFreePages = 0; - vstate.emptyPages = 0; - vstate.internalPagesMap = NULL; - vstate.emptyLeafPagesMap = NULL; /* * The outer loop iterates over all index pages, in physical order (we @@ -207,196 +217,8 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexFreeSpaceMapVacuum(rel); /* update statistics */ - stats->num_pages = num_pages; - stats->pages_free = vstate.totFreePages; - - /* rescan all inner pages to find those that have empty child pages */ - if (vstate.emptyPages > 0) - { - BlockNumber x; - - x = InvalidBlockNumber; - while (vstate.emptyPages > 0 && - (x = blockset_next(vstate.internalPagesMap, x)) != InvalidBlockNumber) - { - Buffer buffer; - Page page; - OffsetNumber off, - maxoff; - IndexTuple idxtuple; - ItemId iid; - OffsetNumber todelete[MaxOffsetNumber]; - Buffer buftodelete[MaxOffsetNumber]; - int ntodelete = 0; - - blkno = (BlockNumber) x; - - buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, - info->strategy); - - LockBuffer(buffer, GIST_EXCLUSIVE); - page = (Page) BufferGetPage(buffer); - if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page)) - { - UnlockReleaseBuffer(buffer); - continue; - } - - maxoff = PageGetMaxOffsetNumber(page); - /* Check that leafs are still empty and decide what to delete */ - for (off = FirstOffsetNumber; off <= maxoff && ntodelete < maxoff-1; off = OffsetNumberNext(off)) - { - Buffer leafBuffer; - Page leafPage; - BlockNumber leafBlockNo; - - iid = PageGetItemId(page, off); - idxtuple = (IndexTuple) PageGetItem(page, iid); - /* if this page was not empty in previous scan - we do not consider it */ - leafBlockNo = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); - if (!blockset_get(leafBlockNo, vstate.emptyLeafPagesMap)) - continue; - - leafBuffer = ReadBufferExtended(rel, MAIN_FORKNUM, leafBlockNo, - RBM_NORMAL, info->strategy); - - buftodelete[ntodelete] = leafBuffer; - todelete[ntodelete++] = off; - - LockBuffer(leafBuffer, GIST_EXCLUSIVE); - gistcheckpage(rel, leafBuffer); - leafPage = (Page) BufferGetPage(leafBuffer); - if (!GistPageIsLeaf(leafPage)) - { - UnlockReleaseBuffer(leafBuffer); - continue; - } - - if (PageGetMaxOffsetNumber(leafPage) == InvalidOffsetNumber /* Nothing left to split */ - && !(GistFollowRight(leafPage) || GistPageGetNSN(page) < GistPageGetNSN(leafPage)) /* No follow-right */ - && ntodelete < maxoff-1) /* We must keep at least one leaf page per each */ - { - buftodelete[ntodelete] = leafBuffer; - todelete[ntodelete++] = off; - } - else - UnlockReleaseBuffer(leafBuffer); - } - - /* - * We will have to relock internal page in case of deletes: - * we cannot lock child while holding parent lock without risk - * of a deadlock - */ - LockBuffer(buffer, GIST_UNLOCK); - - if (ntodelete) - { - TransactionId txid; - int i; - - for (i = 0; i < ntodelete; i++) - { - Buffer leafBuffer = buftodelete[i]; - Page leafPage; - LockBuffer(leafBuffer, GIST_EXCLUSIVE); - gistcheckpage(rel, leafBuffer); - leafPage = (Page) BufferGetPage(leafBuffer); - if (!GistPageIsLeaf(leafPage) /* not a leaf anymore */ - || PageGetMaxOffsetNumber(leafPage) != InvalidOffsetNumber /* Page is not empry */ - || (GistFollowRight(leafPage) || GistPageGetNSN(page) < GistPageGetNSN(leafPage)) /* No follow-right */ - ) - { - UnlockReleaseBuffer(leafBuffer); - buftodelete[i] = InvalidBuffer; - todelete[i] = InvalidOffsetNumber; - } - } - - LockBuffer(buffer, GIST_EXCLUSIVE); - page = (Page) BufferGetPage(buffer); - - for (i = 0; i < ntodelete; i++) - { - Buffer leafBuffer = buftodelete[i]; - bool inconsistent = false; - if (todelete[i] == InvalidOffsetNumber) - continue; - - if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page) - || PageGetMaxOffsetNumber(page) < todelete[i]) - inconsistent = true; - - if (!inconsistent) - { - iid = PageGetItemId(page, todelete[i]); - idxtuple = (IndexTuple) PageGetItem(page, iid); - if (todelete[i] != ItemPointerGetBlockNumber(&(idxtuple->t_tid))) - inconsistent = true; - } - - if (inconsistent) - { - UnlockReleaseBuffer(leafBuffer); - buftodelete[i] = InvalidBuffer; - todelete[i] = InvalidOffsetNumber; - } - } - - /* - * Like in _bt_unlink_halfdead_page we need an upper bound on xid - * that could hold downlinks to this page. We use - * ReadNewTransactionId() to instead of GetCurrentTransactionId - * since we are in a VACUUM. - */ - txid = ReadNewTransactionId(); - - START_CRIT_SECTION(); - - /* Mark pages as deleted dropping references from internal pages */ - for (i = 0; i < ntodelete; i++) - { - Page leafPage; - XLogRecPtr recptr; - - if (todelete[i] == InvalidOffsetNumber) - continue; - - leafPage = (Page) BufferGetPage(buftodelete[i]); - - /* Remember xid of last transaction that could see this page */ - GistPageSetDeleteXid(leafPage,txid); - - GistPageSetDeleted(leafPage); - MarkBufferDirty(buftodelete[i]); - stats->pages_deleted++; - vstate.emptyPages--; - - MarkBufferDirty(buffer); - /* Offsets are changed as long as we delete tuples from internal page */ - PageIndexTupleDelete(page, todelete[i] - i); - - if (RelationNeedsWAL(rel)) - recptr = gistXLogSetDeleted(rel->rd_node, buftodelete[i], - txid, buffer, todelete[i] - i); - else - recptr = gistGetFakeLSN(rel); - PageSetLSN(page, recptr); - PageSetLSN(leafPage, recptr); - - UnlockReleaseBuffer(buftodelete[i]); - } - END_CRIT_SECTION(); - - LockBuffer(buffer, GIST_UNLOCK); - } - - ReleaseBuffer(buffer); - } - } - - blockset_free(vstate.emptyLeafPagesMap); - blockset_free(vstate.internalPagesMap); + stats->stats.num_pages = num_pages; + stats->stats.pages_free = vstate.totFreePages; } /* @@ -413,8 +235,8 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, static void gistvacuumpage(GistVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno) { - IndexVacuumInfo *info = vstate->info; - IndexBulkDeleteResult *stats = vstate->stats; + GistBulkDeleteResult *stats = vstate->stats; + IndexVacuumInfo *info = stats->info; IndexBulkDeleteCallback callback = vstate->callback; void *callback_state = vstate->callback_state; Relation rel = info->index; @@ -443,7 +265,7 @@ restart: /* Okay to recycle this page */ RecordFreeIndexPage(rel, blkno); vstate->totFreePages++; - stats->pages_deleted++; + stats->stats.pages_deleted++; } else if (GistPageIsLeaf(page)) { @@ -518,7 +340,7 @@ restart: END_CRIT_SECTION(); - stats->tuples_removed += ntodelete; + stats->stats.tuples_removed += ntodelete; /* must recompute maxoff */ maxoff = PageGetMaxOffsetNumber(page); } @@ -526,16 +348,14 @@ restart: nremain = maxoff - FirstOffsetNumber + 1; if (nremain == 0) { - vstate->emptyLeafPagesMap = blockset_set(vstate->emptyLeafPagesMap, blkno); - vstate->emptyPages++; + stats->emptyLeafPagesMap = blockset_set(stats->emptyLeafPagesMap, blkno); + stats->numEmptyPages++; } else - stats->num_index_tuples += nremain; + stats->stats.num_index_tuples += nremain; } else { - vstate->internalPagesMap = blockset_set(vstate->internalPagesMap, blkno); - /* * On an internal page, check for "invalid tuples", left behind by an * incomplete page split on PostgreSQL 9.0 or below. These are not @@ -560,6 +380,8 @@ restart: errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."), errhint("Please REINDEX it."))); } + + stats->internalPagesMap = blockset_set(stats->internalPagesMap, blkno); } UnlockReleaseBuffer(buffer); @@ -577,3 +399,197 @@ restart: goto restart; } } + +static void +gistvacuum_recycle_pages(GistBulkDeleteResult *stats) +{ + IndexVacuumInfo *info = stats->info; + Relation rel = info->index; + BlockNumber x; + + /* quick exit if no empty pages */ + if (stats->numEmptyPages > 0) + gistvacuum_recycle_pages(stats); + + /* rescan all inner pages to find those that have empty child pages */ + x = InvalidBlockNumber; + while (stats->numEmptyPages > 0 && + (x = blockset_next(stats->internalPagesMap, x)) != InvalidBlockNumber) + { + Buffer buffer; + Page page; + OffsetNumber off, + maxoff; + IndexTuple idxtuple; + ItemId iid; + OffsetNumber todelete[MaxOffsetNumber]; + Buffer buftodelete[MaxOffsetNumber]; + int ntodelete = 0; + BlockNumber blkno; + + blkno = (BlockNumber) x; + + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, + info->strategy); + + LockBuffer(buffer, GIST_EXCLUSIVE); + page = (Page) BufferGetPage(buffer); + if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page)) + { + UnlockReleaseBuffer(buffer); + continue; + } + + maxoff = PageGetMaxOffsetNumber(page); + /* Check that leafs are still empty and decide what to delete */ + for (off = FirstOffsetNumber; off <= maxoff && ntodelete < maxoff-1; off = OffsetNumberNext(off)) + { + Buffer leafBuffer; + Page leafPage; + BlockNumber leafBlockNo; + + iid = PageGetItemId(page, off); + idxtuple = (IndexTuple) PageGetItem(page, iid); + /* if this page was not empty in previous scan - we do not consider it */ + leafBlockNo = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); + if (!blockset_get(leafBlockNo, stats->emptyLeafPagesMap)) + continue; + + leafBuffer = ReadBufferExtended(rel, MAIN_FORKNUM, leafBlockNo, + RBM_NORMAL, info->strategy); + + buftodelete[ntodelete] = leafBuffer; + todelete[ntodelete++] = off; + + LockBuffer(leafBuffer, GIST_EXCLUSIVE); + gistcheckpage(rel, leafBuffer); + leafPage = (Page) BufferGetPage(leafBuffer); + if (!GistPageIsLeaf(leafPage)) + { + UnlockReleaseBuffer(leafBuffer); + continue; + } + + if (PageGetMaxOffsetNumber(leafPage) == InvalidOffsetNumber /* Nothing left to split */ + && !(GistFollowRight(leafPage) || GistPageGetNSN(page) < GistPageGetNSN(leafPage)) /* No follow-right */ + && ntodelete < maxoff-1) /* We must keep at least one leaf page per each */ + { + buftodelete[ntodelete] = leafBuffer; + todelete[ntodelete++] = off; + } + else + UnlockReleaseBuffer(leafBuffer); + } + + /* + * We will have to relock internal page in case of deletes: + * we cannot lock child while holding parent lock without risk + * of a deadlock + */ + LockBuffer(buffer, GIST_UNLOCK); + + if (ntodelete) + { + TransactionId txid; + int i; + + for (i = 0; i < ntodelete; i++) + { + Buffer leafBuffer = buftodelete[i]; + Page leafPage; + LockBuffer(leafBuffer, GIST_EXCLUSIVE); + gistcheckpage(rel, leafBuffer); + leafPage = (Page) BufferGetPage(leafBuffer); + if (!GistPageIsLeaf(leafPage) /* not a leaf anymore */ + || PageGetMaxOffsetNumber(leafPage) != InvalidOffsetNumber /* Page is not empry */ + || (GistFollowRight(leafPage) || GistPageGetNSN(page) < GistPageGetNSN(leafPage)) /* No follow-right */ + ) + { + UnlockReleaseBuffer(leafBuffer); + buftodelete[i] = InvalidBuffer; + todelete[i] = InvalidOffsetNumber; + } + } + + LockBuffer(buffer, GIST_EXCLUSIVE); + page = (Page) BufferGetPage(buffer); + + for (i = 0; i < ntodelete; i++) + { + Buffer leafBuffer = buftodelete[i]; + bool inconsistent = false; + + if (todelete[i] == InvalidOffsetNumber) + continue; + + if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page) + || PageGetMaxOffsetNumber(page) < todelete[i]) + inconsistent = true; + + if (!inconsistent) + { + iid = PageGetItemId(page, todelete[i]); + idxtuple = (IndexTuple) PageGetItem(page, iid); + if (todelete[i] != ItemPointerGetBlockNumber(&(idxtuple->t_tid))) + inconsistent = true; + } + + if (inconsistent) + { + UnlockReleaseBuffer(leafBuffer); + buftodelete[i] = InvalidBuffer; + todelete[i] = InvalidOffsetNumber; + } + } + + /* + * Like in _bt_unlink_halfdead_page we need an upper bound on xid + * that could hold downlinks to this page. We use + * ReadNewTransactionId() to instead of GetCurrentTransactionId + * since we are in a VACUUM. + */ + txid = ReadNewTransactionId(); + + START_CRIT_SECTION(); + + /* Mark pages as deleted dropping references from internal pages */ + for (i = 0; i < ntodelete; i++) + { + Page leafPage; + XLogRecPtr recptr; + + if (todelete[i] == InvalidOffsetNumber) + continue; + + leafPage = (Page) BufferGetPage(buftodelete[i]); + + /* Remember xid of last transaction that could see this page */ + GistPageSetDeleteXid(leafPage,txid); + + GistPageSetDeleted(leafPage); + MarkBufferDirty(buftodelete[i]); + stats->stats.pages_deleted++; + stats->numEmptyPages--; + + MarkBufferDirty(buffer); + /* Offsets are changed as long as we delete tuples from internal page */ + PageIndexTupleDelete(page, todelete[i] - i); + + if (RelationNeedsWAL(rel)) + recptr = gistXLogPageDelete(buftodelete[i], + txid, buffer, todelete[i] - i); + else + recptr = gistGetFakeLSN(rel); + PageSetLSN(page, recptr); + PageSetLSN(leafPage, recptr); + + UnlockReleaseBuffer(buftodelete[i]); + } + END_CRIT_SECTION(); + + LockBuffer(buffer, GIST_UNLOCK); + } + + ReleaseBuffer(buffer); + } +} -- 2.20.1