diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index e34a2f9..6cce1f3 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -551,6 +551,13 @@
+ aminsertcleanup
+ regproc
+ pg_proc.oid
+ Post-INSERT cleanup finction (optional)
+
+
+
amvacuumcleanup
regproc
pg_proc.oid
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 7493ca9..d7236f8 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3525,6 +3525,11 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
This setting can be overridden for individual tables by entries in
pg_autovacuum>.
+
+ This parameter affects on vacuuming a table with GIN
+ index, it also specifies the minimum number of inserted or updated
+ tuples needed to trigger a VACUUM> on thos table.
+
diff --git a/doc/src/sgml/gin.sgml b/doc/src/sgml/gin.sgml
index 1c5841a..adc77c4 100644
--- a/doc/src/sgml/gin.sgml
+++ b/doc/src/sgml/gin.sgml
@@ -188,9 +188,45 @@
list of heap pointers (PL, posting list) if the list is small enough.
+
+ GIN fast update technique
+
+
+ Updating a GIN index tends to be slow because of the
+ intrinsic nature of inverted indexes: inserting or updating one heap row
+ can cause many inserts into the index (one for each key extracted
+ from the indexed value). As of
+ PostgreSQL 8.4, this problem is alleviated
+ by postponing most of the work until the next VACUUM>.
+ Newly inserted index entries are temporarily stored in an unsorted list of
+ pending entries. VACUUM> inserts all pending entries into the
+ main GIN index data structure,
+ using the same bulk insert techniques used during initial index creation.
+ This greatly improves GIN index update speed, even
+ counting the additional vacuum overhead.
+
+
+
+ The disadvantage of this approach is that searches must scan the list
+ of pending entries in addition to searching the regular index, and so
+ a large list of pending entries will slow searches significantly.
+ It's recommended to use properly-configured autovacuum with tables
+ having GIN indexes, to keep this overhead to
+ reasonable levels.
+
+
+
+ If consistently-fast search speed is more important than update speed,
+ use of pending entries can be disabled by turning off the
+ FASTUPDATE storage parameter for a
+ GIN index. See for details.
+
+
+
Partial match algorithm
-
+
GIN can support partial match> queries, in which the query
does not determine an exact match for one or more keys, but the possible
@@ -225,11 +261,18 @@
Create vs insert
- In most cases, insertion into a GIN index is slow
+ Insertion into a GIN index can be slow
due to the likelihood of many keys being inserted for each value.
So, for bulk insertions into a table it is advisable to drop the GIN
index and recreate it after finishing bulk insertion.
+
+
+ As of PostgreSQL 8.4, this advice is less
+ necessary since delayed indexing is used (see for details). But for very large updates
+ it may still be best to drop and recreate the index.
+
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 8b502e6..b75ccc9 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -265,7 +265,7 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] name
The WITH> clause can specify storage parameters>
for indexes. Each index method can have its own set of allowed storage
- parameters. The built-in index methods all accept a single parameter:
+ parameters. All built-in index methods accept this parameter:
@@ -292,6 +292,36 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] name
+
+ GIN indexes accept an additional parameter:
+
+
+
+
+
+ FASTUPDATE>
+
+
+ This setting controls usage of the fast update technique described in
+ . It is a Boolean parameter:
+ ON> enables fast update, OFF> disables it.
+ (Alternative spellings of ON> and OFF> are
+ allowed as described in .) The
+ default is ON>.
+
+
+
+
+ Turning FASTUPDATE> off via ALTER INDEX> prevents
+ future insertions from going into the list of pending index entries,
+ but does not in itself flush previous entries. You might want to do a
+ VACUUM> afterward to ensure the pending list is emptied.
+
+
+
+
+
+
@@ -500,6 +530,13 @@ CREATE UNIQUE INDEX title_idx ON films (title) WITH (fillfactor = 70);
+ To create a GIN> index with fast update turned off:
+
+CREATE INDEX gin_idx ON documents_table (locations) WITH (fastupdate = off);
+
+
+
+
To create an index on the column code> in the table
films> and have the index reside in the tablespace
indexspace>:
diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml
index bee0667..952481c 100644
--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -63,6 +63,13 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] ANALYZE [
blocks. This form is much slower and requires an exclusive lock on each
table while it is being processed.
+
+
+ For tables with GIN> indexes, VACUUM (in
+ any form) also completes any delayed index insertions, by moving pending
+ index entries to the appropriate places in the main GIN> index
+ structure. (See for more details.)
+
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml
index 1b1310c..8560c07 100644
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -3224,7 +3224,9 @@ SELECT plainto_tsquery('supernovae stars');
- GIN indexes are about ten times slower to update than GiST
+ GIN indexes are moderately slower to update than GiST indexes, but
+ about 10 times slower if fast update support was disabled
+ (see for details)
diff --git a/src/backend/access/gin/Makefile b/src/backend/access/gin/Makefile
index 93442ae..99ded7a 100644
--- a/src/backend/access/gin/Makefile
+++ b/src/backend/access/gin/Makefile
@@ -14,6 +14,6 @@ include $(top_builddir)/src/Makefile.global
OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \
ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \
- ginbulk.o
+ ginbulk.o ginfast.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c
index 5219e55..63b5be5 100644
--- a/src/backend/access/gin/ginbulk.c
+++ b/src/backend/access/gin/ginbulk.c
@@ -197,6 +197,8 @@ ginInsertRecordBA(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber att
if (nentry <= 0)
return;
+ Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber);
+
i = nentry - 1;
for (; i > 0; i >>= 1)
nbit++;
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index bf0651d..3c188f3 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -43,8 +43,14 @@ MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPoint
while (aptr - a < na && bptr - b < nb)
{
- if (compareItemPointers(aptr, bptr) > 0)
+ int cmp = compareItemPointers(aptr, bptr);
+ if (cmp > 0)
*dptr++ = *bptr++;
+ else if ( cmp == 0 )
+ {
+ *dptr++ = *bptr++;
+ aptr++;
+ }
else
*dptr++ = *aptr++;
}
@@ -630,11 +636,16 @@ insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
- elog(ERROR, "item pointer (%u,%d) already exists",
- ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem),
- ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem));
-
- ginInsertValue(&(gdi->btree), gdi->stack);
+ {
+ /*
+ * gdi->btree.items[ gdi->btree.curitem ] already exists in index
+ */
+ gdi->btree.curitem ++;
+ LockBuffer(gdi->stack->buffer, GIN_UNLOCK);
+ freeGinBtreeStack(gdi->stack);
+ }
+ else
+ ginInsertValue(&(gdi->btree), gdi->stack);
gdi->stack = NULL;
}
diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
new file mode 100644
index 0000000..3ca335d
--- /dev/null
+++ b/src/backend/access/gin/ginfast.c
@@ -0,0 +1,761 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginfast.c
+ * Fast insert routines for the Postgres inverted index access method.
+ * Pending entries are stored in linear list of pages and vacuum
+ * will transfer them into regular structure.
+ *
+ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * $PostgreSQL$
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/gin.h"
+#include "access/tuptoaster.h"
+#include "catalog/index.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "utils/memutils.h"
+
+
+static int32
+writeListPage(Relation index, Buffer buffer, IndexTuple *tuples, int32 ntuples, BlockNumber rightlink)
+{
+ Page page = BufferGetPage(buffer);
+ int i, freesize, size=0;
+ OffsetNumber l, off;
+
+ START_CRIT_SECTION();
+
+ GinInitBuffer(buffer, GIN_LIST);
+
+ off = FirstOffsetNumber;
+
+ for(i=0; irightlink = rightlink;
+ /*
+ * tail page may contain only the whole row(s) or final
+ * part of row placed on previous pages
+ */
+ if ( rightlink == InvalidBlockNumber )
+ GinPageSetFullRow(page);
+
+ freesize = PageGetFreeSpace(page);
+
+ MarkBufferDirty(buffer);
+
+ if (!index->rd_istemp)
+ {
+ XLogRecData rdata[2];
+ ginxlogInsertListPage data;
+ XLogRecPtr recptr;
+ char *ptr;
+
+ rdata[0].buffer = buffer;
+ rdata[0].buffer_std = true;
+ rdata[0].data = (char*)&data;
+ rdata[0].len = sizeof(ginxlogInsertListPage);
+ rdata[0].next = rdata+1;
+
+ rdata[1].buffer = InvalidBuffer;
+ ptr = rdata[1].data = palloc( size );
+ rdata[1].len = size;
+ rdata[1].next = NULL;
+
+ for(i=0; i 0);
+
+ /*
+ * Split tuples for pages
+ */
+ for(i=0;ihead = BufferGetBlockNumber(curBuffer);
+ }
+
+ prevBuffer = curBuffer;;
+ startTuple = i;
+ size = 0;
+ }
+
+ tupsize = IndexTupleSize(tuples[i]) + sizeof(ItemIdData);
+
+ if ( size + tupsize >= GinListPageSize )
+ {
+ i--;
+ curBuffer = InvalidBuffer;
+ }
+ else
+ {
+ size += tupsize;
+ }
+ }
+
+ /*
+ * Write last page
+ */
+ res->tail = BufferGetBlockNumber(curBuffer);
+ res->tailFreeSize = writeListPage(index, curBuffer, tuples+startTuple, ntuples-startTuple, InvalidBlockNumber);
+}
+
+#define GIN_PAGE_FREESIZE \
+ ( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) )
+/*
+ * Inserts collected values during normal insertion. Function guarantees
+ * that all values of heap will be stored sequentually with
+ * preserving order
+ */
+void
+ginHeapTupleFastInsert(Relation index, GinTupleCollector *collector)
+{
+ Buffer metabuffer;
+ Page metapage;
+ GinMetaPageData *metadata = NULL;
+ XLogRecData rdata[2];
+ Buffer buffer = InvalidBuffer;
+ Page page = NULL;
+ ginxlogUpdateMeta data;
+ bool separateList = false;
+
+ if ( collector->ntuples == 0 )
+ return;
+
+ data.node = index->rd_node;
+ data.ntuples = 0;
+ data.newRightlink = data.prevTail = InvalidBlockNumber;
+
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = sizeof(ginxlogUpdateMeta);
+ rdata[0].next = NULL;
+
+ metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+ metapage = BufferGetPage(metabuffer);
+
+ if ( collector->sumsize + collector->ntuples * sizeof(ItemIdData) > GIN_PAGE_FREESIZE )
+ {
+ /*
+ * Total size is greater than one page => make sublist
+ */
+ separateList = true;
+ }
+ else
+ {
+ LockBuffer(metabuffer, GIN_EXCLUSIVE);
+ metadata = GinPageGetMeta(metapage);
+
+ if ( metadata->head == InvalidBlockNumber ||
+ collector->sumsize + collector->ntuples * sizeof(ItemIdData) > metadata->tailFreeSize )
+ {
+ /*
+ * Pending list is empty or total size is greater than freespace
+ * on tail page => make sublist
+ * We unlock metabuffer to keep high concurrency
+ */
+ separateList = true;
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ }
+ }
+
+ if ( separateList )
+ {
+ GinMetaPageData sublist;
+
+ /*
+ * We should make sublist separately and append it to the tail
+ */
+ memset( &sublist, 0, sizeof(GinMetaPageData) );
+
+ makeSublist(index, collector->tuples, collector->ntuples, &sublist);
+
+ /*
+ * metapage was unlocked, see above
+ */
+ LockBuffer(metabuffer, GIN_EXCLUSIVE);
+ metadata = GinPageGetMeta(metapage);
+
+ if ( metadata->head == InvalidBlockNumber )
+ {
+ /*
+ * Sublist becomes main list
+ */
+ START_CRIT_SECTION();
+ memcpy(metadata, &sublist, sizeof(GinMetaPageData) );
+ memcpy(&data.metadata, &sublist, sizeof(GinMetaPageData) );
+ }
+ else
+ {
+ /*
+ * merge lists
+ */
+
+ data.prevTail = metadata->tail;
+ buffer = ReadBuffer(index, metadata->tail);
+ LockBuffer(buffer, GIN_EXCLUSIVE);
+ page = BufferGetPage(buffer);
+ Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
+
+ START_CRIT_SECTION();
+
+ GinPageGetOpaque(page)->rightlink = sublist.head;
+ metadata->tail = sublist.tail;
+ metadata->tailFreeSize = sublist.tailFreeSize;
+
+ memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) );
+ data.newRightlink = sublist.head;
+
+ MarkBufferDirty(buffer);
+ }
+ }
+ else
+ {
+ /*
+ * Insert into tail page, metapage is already locked
+ */
+
+ OffsetNumber l, off;
+ int i, tupsize;
+ char *ptr;
+
+ buffer = ReadBuffer(index, metadata->tail);
+ LockBuffer(buffer, GIN_EXCLUSIVE);
+ page = BufferGetPage(buffer);
+ off = (PageIsEmpty(page)) ? FirstOffsetNumber :
+ OffsetNumberNext(PageGetMaxOffsetNumber(page));
+
+ rdata[0].next = rdata + 1;
+
+ rdata[1].buffer = buffer;
+ rdata[1].buffer_std = true;
+ ptr = rdata[1].data = (char *) palloc( collector->sumsize );
+ rdata[1].len = collector->sumsize;
+ rdata[1].next = NULL;
+
+ data.ntuples = collector->ntuples;
+
+ START_CRIT_SECTION();
+
+ for(i=0; intuples; i++)
+ {
+ tupsize = IndexTupleSize(collector->tuples[i]);
+ l = PageAddItem(page, (Item)collector->tuples[i], tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page in \"%s\"",
+ RelationGetRelationName(index));
+
+ memcpy(ptr, collector->tuples[i], tupsize);
+ ptr+=tupsize;
+
+ off++;
+ }
+
+ metadata->tailFreeSize -= collector->sumsize + collector->ntuples * sizeof(ItemIdData);
+ memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) );
+ MarkBufferDirty(buffer);
+ }
+
+ /*
+ * Make real write
+ */
+
+ MarkBufferDirty(metabuffer);
+ if ( !index->rd_istemp )
+ {
+ XLogRecPtr recptr;
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata);
+ PageSetLSN(metapage, recptr);
+ PageSetTLI(metapage, ThisTimeLineID);
+
+ if ( buffer != InvalidBuffer )
+ {
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+ }
+ }
+
+ if (buffer != InvalidBuffer)
+ UnlockReleaseBuffer(buffer);
+ UnlockReleaseBuffer(metabuffer);
+
+ END_CRIT_SECTION();
+}
+
+/*
+ * Collect values from one tuples to be indexed. All values for
+ * one tuples shouold be written at once - to guarantee consistent state
+ */
+uint32
+ginHeapTupleFastCollect(Relation index, GinState *ginstate, GinTupleCollector *collector,
+ OffsetNumber attnum, Datum value, ItemPointer item)
+{
+ Datum *entries;
+ int32 i,
+ nentries;
+
+ entries = extractEntriesSU(ginstate, attnum, value, &nentries);
+
+ if (nentries == 0)
+ /* nothing to insert */
+ return 0;
+
+ /*
+ * Allocate/reallocate memory for storing collected tuples
+ */
+ if ( collector->tuples == NULL )
+ {
+ collector->lentuples = nentries * index->rd_att->natts;
+ collector->tuples = (IndexTuple*)palloc(sizeof(IndexTuple) * collector->lentuples);
+ }
+
+ while ( collector->ntuples + nentries > collector->lentuples )
+ {
+ collector->lentuples *= 2;
+ collector->tuples = (IndexTuple*)repalloc( collector->tuples,
+ sizeof(IndexTuple) * collector->lentuples);
+ }
+
+ /*
+ * Creates tuple's array
+ */
+ for (i = 0; i < nentries; i++)
+ {
+ int32 tupsize;
+
+ collector->tuples[collector->ntuples + i] = GinFormTuple(ginstate, attnum, entries[i], NULL, 0);
+ collector->tuples[collector->ntuples + i]->t_tid = *item;
+ tupsize = IndexTupleSize(collector->tuples[collector->ntuples + i]);
+
+ if ( tupsize > TOAST_INDEX_TARGET || tupsize >= GinMaxItemSize)
+ elog(ERROR, "huge tuple");
+
+ collector->sumsize += tupsize;
+ }
+
+ collector->ntuples += nentries;
+
+ return nentries;
+}
+
+/*
+ * Deletes first pages in list before newHead page.
+ * If newHead == InvalidBlockNumber then function drops the whole list.
+ * returns true if concurrent completion process is running
+ */
+static bool
+shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
+ IndexBulkDeleteResult *stats)
+{
+#define NDELETE_AT_ONCE (16)
+ Buffer buffers[NDELETE_AT_ONCE];
+ ginxlogDeleteListPages data;
+ XLogRecData rdata[1];
+ Page metapage;
+ GinMetaPageData *metadata;
+ BlockNumber blknoToDelete;
+
+ metapage = BufferGetPage(metabuffer);
+ metadata = GinPageGetMeta(metapage);
+ blknoToDelete = metadata->head;
+
+ data.node = index->rd_node;
+
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = sizeof(ginxlogDeleteListPages);
+ rdata[0].next = NULL;
+
+ do
+ {
+ Page page;
+ int i;
+
+ data.ndeleted = 0;
+ while( data.ndeleted < NDELETE_AT_ONCE && blknoToDelete != newHead )
+ {
+ data.toDelete[ data.ndeleted ] = blknoToDelete;
+ buffers[ data.ndeleted ] = ReadBuffer(index, blknoToDelete);
+ LockBufferForCleanup( buffers[ data.ndeleted ] );
+ page = BufferGetPage( buffers[ data.ndeleted ] );
+
+ data.ndeleted++;
+ stats->pages_deleted++;
+
+ if ( GinPageIsDeleted(page) )
+ {
+ /* concurrent deletion process is detected */
+ for(i=0;irightlink;
+ }
+
+ START_CRIT_SECTION();
+
+ metadata->head = blknoToDelete;
+ if ( blknoToDelete == InvalidBlockNumber )
+ {
+ metadata->tail = InvalidBlockNumber;
+ metadata->tailFreeSize = 0;
+ }
+ memcpy( &data.metadata, metadata, sizeof(GinMetaPageData));
+ MarkBufferDirty( metabuffer );
+
+ for(i=0; iflags = GIN_DELETED;
+ MarkBufferDirty( buffers[ i ] );
+ }
+
+ if ( !index->rd_istemp )
+ {
+ XLogRecPtr recptr;
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, rdata);
+ PageSetLSN(metapage, recptr);
+ PageSetTLI(metapage, ThisTimeLineID);
+
+ for(i=0; invalues >= datums->maxvalues)
+ {
+ datums->maxvalues *= 2;
+ datums->values = (Datum*)repalloc( datums->values, sizeof(Datum)*datums->maxvalues);
+ }
+
+ datums->values[ datums->nvalues++ ] = datum;
+}
+
+/*
+ * Go through all tuples on page and collect values in memory
+ */
+
+static void
+processPendingPage(BuildAccumulator *accum, DatumArray *da, Page page, OffsetNumber startoff)
+{
+ ItemPointerData heapptr;
+ OffsetNumber i,maxoff;
+ OffsetNumber attrnum, curattnum;
+
+ maxoff = PageGetMaxOffsetNumber(page);
+ Assert( maxoff >= FirstOffsetNumber );
+ ItemPointerSetInvalid(&heapptr);
+ attrnum = 0;
+
+ for (i = startoff; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
+
+ curattnum = gintuple_get_attrnum(accum->ginstate, itup);
+
+ if ( !ItemPointerIsValid(&heapptr) )
+ {
+ heapptr = itup->t_tid;
+ attrnum = curattnum;
+ }
+ else if ( !(ItemPointerEquals(&heapptr, &itup->t_tid) && curattnum == attrnum) )
+ {
+ /*
+ * We can insert several datums per call, but only for one heap tuple
+ * and one column.
+ */
+ ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues);
+ da->nvalues = 0;
+ heapptr = itup->t_tid;
+ attrnum = curattnum;
+ }
+ addDatum(da, gin_index_getattr(accum->ginstate, itup));
+ }
+
+ ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues);
+}
+
+/*
+ * Moves tuples from pending pages into regular GIN structure.
+ * Function doesn't require special locking and could be called
+ * in any time but only one at the same time.
+ */
+
+Datum
+gininsertcleanup(PG_FUNCTION_ARGS)
+{
+ IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
+ IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
+ Relation index = info->index;
+ GinState ginstate;
+ Buffer metabuffer, buffer;
+ Page metapage, page;
+ GinMetaPageData *metadata;
+ MemoryContext opCtx, oldCtx;
+ BuildAccumulator accum;
+ DatumArray datums;
+ BlockNumber blkno;
+
+ /* Set up all-zero stats if ginbulkdelete wasn't called */
+ if (stats == NULL)
+ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+
+ initGinState(&ginstate, index);
+
+ metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+ LockBuffer(metabuffer, GIN_SHARE);
+ metapage = BufferGetPage(metabuffer);
+ metadata = GinPageGetMeta(metapage);
+
+ if ( metadata->head == InvalidBlockNumber )
+ {
+ UnlockReleaseBuffer(metabuffer);
+ PG_RETURN_POINTER(stats);
+ }
+
+ /*
+ * Init
+ */
+ datums.maxvalues=128;
+ datums.nvalues = 0;
+ datums.values = (Datum*)palloc(sizeof(Datum)*datums.maxvalues);
+
+ ginInitBA(&accum);
+ accum.ginstate = &ginstate;
+
+ opCtx = AllocSetContextCreate(CurrentMemoryContext,
+ "Gin refresh temporary context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
+ oldCtx = MemoryContextSwitchTo(opCtx);
+
+ /*
+ * Read and lock head
+ */
+ blkno = metadata->head;
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, GIN_SHARE);
+ page = BufferGetPage(buffer);
+
+ LockBuffer(metabuffer, GIN_UNLOCK);
+
+ for(;;)
+ {
+ /*
+ * reset datum's collector and read page's datums into memory
+ */
+ datums.nvalues = 0;
+
+ if ( GinPageIsDeleted(page) )
+ {
+ /* concurrent completion is running */
+ UnlockReleaseBuffer( buffer );
+ break;
+ }
+
+ processPendingPage(&accum, &datums, page, FirstOffsetNumber);
+
+ vacuum_delay_point();
+
+ /*
+ * Is it time to flush memory to disk?
+ */
+ if ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
+ ( GinPageHasFullRow(page) && accum.allocatedMemory > maintenance_work_mem * 1024L ) )
+ {
+ ItemPointerData *list;
+ uint32 nlist;
+ Datum entry;
+ OffsetNumber maxoff, attnum;
+
+ /*
+ * Unlock current page to increase performance.
+ * Changes of page will be checked later by comparing
+ * maxoff after completion of memory flush.
+ */
+ maxoff = PageGetMaxOffsetNumber(page);
+ LockBuffer(buffer, GIN_UNLOCK);
+
+ /*
+ * Moving collected data into regular structure can take
+ * significant amount of time - so, run it without locking pending
+ * list.
+ */
+ while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
+ {
+ vacuum_delay_point();
+ ginEntryInsert(index, &ginstate, attnum, entry, list, nlist, FALSE);
+ }
+
+ /*
+ * Lock the whole list to remove pages
+ */
+ LockBuffer(metabuffer, GIN_EXCLUSIVE);
+ LockBuffer(buffer, GIN_SHARE);
+
+ if ( GinPageIsDeleted(page) )
+ {
+ /* concurrent completion is running */
+ UnlockReleaseBuffer(buffer);
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ break;
+ }
+
+ /*
+ * While we keeped page unlocked it might be changed -
+ * add read the changes separately. On one page is rather
+ * small - so, overused memory isn't very big, although
+ * we should reinit accumulator. We need to make a
+ * check only once because now both page and metapage are
+ * locked. Inserion algorithm gurantees that inserted row(s)
+ * will not continue on next page.
+ */
+ if ( PageGetMaxOffsetNumber(page) != maxoff )
+ {
+ ginInitBA(&accum);
+ datums.nvalues = 0;
+ processPendingPage(&accum, &datums, page, maxoff+1);
+
+ while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
+ ginEntryInsert(index, &ginstate, attnum, entry, list, nlist, FALSE);
+ }
+
+ /*
+ * Remember next page - it will become a new head
+ */
+ blkno = GinPageGetOpaque(page)->rightlink;
+ UnlockReleaseBuffer(buffer); /* shiftList will do exclusive locking */
+
+ /*
+ * remove readed pages from pending list, at this point all
+ * content of readed pages is in regular structure
+ */
+ if ( shiftList(index, metabuffer, blkno, stats) )
+ {
+ /* concurrent completion is running */
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ break;
+ }
+
+ Assert( blkno == metadata->head );
+ LockBuffer(metabuffer, GIN_UNLOCK);
+
+ /*
+ * if we remove the whole list just exit
+ */
+ if ( blkno == InvalidBlockNumber )
+ break;
+
+ /*
+ * reinit state
+ */
+ MemoryContextReset(opCtx);
+ ginInitBA(&accum);
+ }
+ else
+ {
+ blkno = GinPageGetOpaque(page)->rightlink;
+ UnlockReleaseBuffer(buffer);
+ }
+
+
+ /*
+ * Read next page in pending list
+ */
+ CHECK_FOR_INTERRUPTS();
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, GIN_SHARE);
+ page = BufferGetPage(buffer);
+ }
+
+ ReleaseBuffer(metabuffer);
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextDelete(opCtx);
+
+ PG_RETURN_POINTER(stats);
+}
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index 23131e5..69c15fc 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -268,6 +268,15 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
Page page;
bool needUnlock = TRUE;
+ entry->buffer = InvalidBuffer;
+ entry->offset = InvalidOffsetNumber;
+ entry->list = NULL;
+ entry->nlist = 0;
+ entry->partialMatch = NULL;
+ entry->partialMatchResult = NULL;
+ entry->reduceResult = FALSE;
+ entry->predictNumberResult = 0;
+
if (entry->master != NULL)
{
entry->isFinished = entry->master->isFinished;
@@ -285,14 +294,6 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
page = BufferGetPage(stackEntry->buffer);
entry->isFinished = TRUE;
- entry->buffer = InvalidBuffer;
- entry->offset = InvalidOffsetNumber;
- entry->list = NULL;
- entry->nlist = 0;
- entry->partialMatch = NULL;
- entry->partialMatchResult = NULL;
- entry->reduceResult = FALSE;
- entry->predictNumberResult = 0;
if ( entry->isPartialMatch )
{
@@ -350,9 +351,10 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
entry->buffer = scanBeginPostingTree(gdi);
/*
- * We keep buffer pinned because we need to prevent deletition
+ * We keep buffer pinned because we need to prevent deletion of
* page during scan. See GIN's vacuum implementation. RefCount
- * is increased to keep buffer pinned after freeGinBtreeStack() call.
+ * is increased to keep buffer pinned after freeGinBtreeStack()
+ * call.
*/
IncrBufferRefCount(entry->buffer);
@@ -429,6 +431,15 @@ startScan(IndexScanDesc scan)
uint32 i;
GinScanOpaque so = (GinScanOpaque) scan->opaque;
+ /*
+ * If isScanFastInsert is still true, set up to scan the pending-insert
+ * list rather than the main index.
+ */
+ if (so->isScanFastInsert)
+ {
+ return;
+ }
+
for (i = 0; i < so->nkeys; i++)
startScanKey(scan->indexRelation, &so->ginstate, so->keys + i);
}
@@ -671,6 +682,336 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
return FALSE;
}
+typedef struct fastPosition {
+ Buffer fastBuffer;
+ OffsetNumber firstOffset;
+ OffsetNumber lastOffset;
+ ItemPointerData item;
+} fastPosition;
+
+
+/*
+ * Get ItemPointer of next heap row to be checked from fast insert storage.
+ * Returns false if there are no more.
+ *
+ * The fastBuffer is presumed pinned and share-locked on entry, and is
+ * pinned and share-locked on success exit. On failure exit it's released.
+ */
+static bool
+scanGetCandidate(IndexScanDesc scan, fastPosition *pos)
+{
+ OffsetNumber maxoff;
+ Page page;
+ IndexTuple itup;
+
+ ItemPointerSetInvalid( &pos->item );
+ for(;;)
+ {
+ page = BufferGetPage(pos->fastBuffer);
+
+ maxoff = PageGetMaxOffsetNumber(page);
+ if ( pos->firstOffset > maxoff )
+ {
+ BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
+ if ( blkno == InvalidBlockNumber )
+ {
+ UnlockReleaseBuffer(pos->fastBuffer);
+ pos->fastBuffer=InvalidBuffer;
+
+ return false;
+ }
+ else
+ {
+ /*
+ * Here we should prevent deletion of next page by
+ * insertcleanup process, which uses LockBufferForCleanup.
+ * So, we pin next page before unpin current one
+ */
+ Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
+
+ UnlockReleaseBuffer( pos->fastBuffer);
+ pos->fastBuffer=tmpbuf;
+ LockBuffer( pos->fastBuffer, GIN_SHARE );
+
+ pos->firstOffset = FirstOffsetNumber;
+ }
+ }
+ else
+ {
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
+ pos->item = itup->t_tid;
+ if ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW )
+ {
+ /*
+ * find itempointer to the next row
+ */
+ for(pos->lastOffset = pos->firstOffset+1; pos->lastOffset<=maxoff; pos->lastOffset++)
+ {
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
+ if (!ItemPointerEquals(&pos->item, &itup->t_tid))
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * All itempointers are the same on this page
+ */
+ pos->lastOffset = maxoff + 1;
+ }
+ break;
+ }
+ }
+
+ return true;
+}
+
+static bool
+matchPartialInPendingList(GinState *ginstate, Page page, OffsetNumber off,
+ OffsetNumber maxoff, Datum value, OffsetNumber attrnum,
+ Datum *datum, bool *datumExtracted, StrategyNumber strategy)
+{
+ IndexTuple itup;
+ int res;
+
+ while( off < maxoff )
+ {
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
+ if ( attrnum != gintuple_get_attrnum(ginstate, itup) )
+ return false;
+
+ if (datumExtracted[ off-1 ] == false)
+ {
+ datum[ off-1 ] = gin_index_getattr(ginstate, itup);
+ datumExtracted[ off-1 ] = true;
+ }
+
+ res = DatumGetInt32(FunctionCall3(&ginstate->comparePartialFn[attrnum],
+ value,
+ datum[ off-1 ],
+ UInt16GetDatum(strategy)));
+ if ( res == 0 )
+ return true;
+ else if (res>0)
+ return false;
+ }
+
+ return false;
+}
+/*
+ * Sets entryRes array for each key by looking on
+ * every entry per indexed value (row) in fast insert storage.
+ * returns true if at least one of datum was matched by key's entry
+ *
+ * The fastBuffer is presumed pinned and share-locked on entry.
+ */
+static bool
+collectDatumForItem(IndexScanDesc scan, fastPosition *pos)
+{
+ GinScanOpaque so = (GinScanOpaque) scan->opaque;
+ OffsetNumber attrnum;
+ Page page;
+ IndexTuple itup;
+ int i, j;
+ bool hasMatch = false;
+
+ /*
+ * Resets entryRes
+ */
+ for (i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+ memset( key->entryRes, FALSE, key->nentries );
+ }
+
+ for(;;)
+ {
+ Datum datum[ BLCKSZ/sizeof(IndexTupleData) ];
+ bool datumExtracted[ BLCKSZ/sizeof(IndexTupleData) ];
+
+ Assert( pos->lastOffset > pos->firstOffset );
+ memset(datumExtracted + pos->firstOffset - 1, 0, sizeof(bool) * (pos->lastOffset - pos->firstOffset ));
+
+ page = BufferGetPage(pos->fastBuffer);
+
+ for(i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ for(j=0; jnentries; j++)
+ {
+ OffsetNumber StopLow = pos->firstOffset,
+ StopHigh = pos->lastOffset,
+ StopMiddle;
+ GinScanEntry entry = key->scanEntry + j;
+
+ if ( key->entryRes[j] )
+ continue;
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
+ attrnum = gintuple_get_attrnum(&so->ginstate, itup);
+
+ if (key->attnum < attrnum)
+ StopHigh = StopMiddle;
+ else if (key->attnum > attrnum)
+ StopLow = StopMiddle + 1;
+ else
+ {
+ int res;
+
+ if (datumExtracted[ StopMiddle-1 ] == false)
+ {
+ datum[ StopMiddle-1 ] = gin_index_getattr(&so->ginstate, itup);
+ datumExtracted[ StopMiddle-1 ] = true;
+ }
+ res = compareEntries(&so->ginstate,
+ entry->attnum,
+ entry->entry,
+ datum[ StopMiddle-1 ]);
+
+ if ( res == 0 )
+ {
+ if ( entry->isPartialMatch )
+ key->entryRes[j] = matchPartialInPendingList(&so->ginstate, page, StopMiddle,
+ pos->lastOffset, entry->entry, entry->attnum,
+ datum, datumExtracted, entry->strategy);
+ else
+ key->entryRes[j] = true;
+ break;
+ }
+ else if ( res < 0 )
+ StopHigh = StopMiddle;
+ else
+ StopLow = StopMiddle + 1;
+ }
+ }
+
+ if ( StopLow>=StopHigh && entry->isPartialMatch )
+ key->entryRes[j] = matchPartialInPendingList(&so->ginstate, page, StopHigh,
+ pos->lastOffset, entry->entry, entry->attnum,
+ datum, datumExtracted, entry->strategy);
+
+ hasMatch |= key->entryRes[j];
+ }
+ }
+
+ pos->firstOffset = pos->lastOffset;
+
+ if ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW )
+ {
+ /*
+ * We scan all values from one tuple, go to next one
+ */
+
+ return hasMatch;
+ }
+ else
+ {
+ ItemPointerData item = pos->item;
+
+ if ( scanGetCandidate(scan, pos) == false || !ItemPointerEquals(&pos->item, &item) )
+ elog(ERROR,"Could not process tuple"); /* XXX should not be here ! */
+ }
+ }
+
+ return hasMatch;
+}
+
+/*
+ * Collect all matched rows from pending list in bitmap
+ */
+static TIDBitmap*
+scanFastInsert(IndexScanDesc scan)
+{
+ GinScanOpaque so = (GinScanOpaque) scan->opaque;
+ MemoryContext oldCtx;
+ bool recheck, keyrecheck, match;
+ TIDBitmap *tbm = NULL;
+ int i;
+ fastPosition pos;
+ Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
+ BlockNumber blkno;
+
+ LockBuffer(metabuffer, GIN_SHARE);
+ blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
+
+ /*
+ * fetch head of list before unlocking metapage.
+ * head page must be pinned to prevent deletion by vacuum process
+ */
+ if ( blkno == InvalidBlockNumber )
+ {
+ /* No pending list, so proceed with normal scan */
+ UnlockReleaseBuffer( metabuffer );
+ return NULL;
+ }
+
+ pos.fastBuffer = ReadBuffer(scan->indexRelation, blkno);
+ LockBuffer(pos.fastBuffer, GIN_SHARE);
+ pos.firstOffset = FirstOffsetNumber;
+ UnlockReleaseBuffer( metabuffer );
+
+ /*
+ * loop for each heap row
+ */
+ while( scanGetCandidate(scan, &pos) )
+ {
+
+ /*
+ * Check entries in rows and setup entryRes array
+ */
+ if (!collectDatumForItem(scan, &pos))
+ continue;
+
+ /*
+ * check for consistent
+ */
+ oldCtx = MemoryContextSwitchTo(so->tempCtx);
+ recheck = false;
+ match = true;
+
+ for (i = 0; match && i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ keyrecheck = true;
+
+ if ( DatumGetBool(FunctionCall4(&so->ginstate.consistentFn[ key->attnum-1 ],
+ PointerGetDatum(key->entryRes),
+ UInt16GetDatum(key->strategy),
+ key->query,
+ PointerGetDatum(&keyrecheck))) == false )
+ {
+ match = false;
+ }
+
+ recheck |= keyrecheck;
+ }
+
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextReset(so->tempCtx);
+
+ if ( match )
+ {
+ if ( tbm == NULL )
+ tbm = tbm_create( work_mem * 1024L );
+ tbm_add_tuples(tbm, &pos.item, 1, recheck);
+ }
+ }
+
+ if ( tbm && tbm_has_lossy(tbm) )
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("not enough memory to store result of pending list or VACUUME table" ),
+ errhint("Increase the \"work_mem\" parameter.")));
+
+ return tbm;
+}
+
/*
* Get heap item pointer from scan
* returns true if found
@@ -693,44 +1034,112 @@ scanGetItem(IndexScanDesc scan, ItemPointerData *item, bool *recheck)
*/
*recheck = false;
- ItemPointerSetMin(item);
- for (i = 0; i < so->nkeys; i++)
+ /*
+ * First of all we should check fast insert list of pages
+ */
+ if ( so->isScanFastInsert )
{
- GinScanKey key = so->keys + i;
+ if ( so->scanFastTuples )
+ {
+ /*
+ * Items from pending list is already collected in memory
+ */
- if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx,
- key, &keyrecheck))
- return FALSE; /* finished one of keys */
- if (compareItemPointers(item, &key->curItem) < 0)
- *item = key->curItem;
- *recheck |= keyrecheck;
- }
+ if ( so->scanFastResult == NULL || so->scanFastOffset >= so->scanFastResult->ntuples )
+ {
+ so->scanFastResult = tbm_iterate( so->scanFastTuples );
- for (i = 1; i <= so->nkeys; i++)
- {
- GinScanKey key = so->keys + i - 1;
+ if ( so->scanFastResult == NULL )
+ {
+ /* scan of pending pages is finished */
+ so->isScanFastInsert = false;
+ startScan(scan);
+ return scanGetItem(scan, item, recheck);
+ }
+ Assert( so->scanFastResult->ntuples >= 0 );
+ so->scanFastOffset = 0;
+ }
+
+ ItemPointerSet(item,
+ so->scanFastResult->blockno,
+ so->scanFastResult->offsets[ so->scanFastOffset ]);
+ *recheck = true; /* be conserative due to concurrent
+ removal from pending list */
- for (;;)
+ so->scanFastOffset ++;
+
+ return true;
+ }
+ else
{
- int cmp = compareItemPointers(item, &key->curItem);
+ /*
+ * Collect ItemPointers in memory
+ */
+ so->scanFastTuples = scanFastInsert(scan);
- if (cmp == 0)
- break;
- else if (cmp > 0)
+ if ( so->scanFastTuples == NULL )
{
- if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx,
- key, &keyrecheck))
- return FALSE; /* finished one of keys */
- *recheck |= keyrecheck;
+ /* nothing found */
+ so->isScanFastInsert = false;
+ startScan(scan);
}
else
- { /* returns to begin */
+ {
+ tbm_begin_iterate(so->scanFastTuples);
+ }
+
+ return scanGetItem(scan, item, recheck);
+ }
+ }
+
+ /*
+ * Regular scanning with filtering by already returned
+ * ItemPointers from pending list
+ */
+
+ do
+ {
+ ItemPointerSetMin(item);
+ *recheck = false;
+
+ for (i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx,
+ key, &keyrecheck))
+ return FALSE; /* finished one of keys */
+ if (compareItemPointers(item, &key->curItem) < 0)
*item = key->curItem;
- i = 0;
+ *recheck |= keyrecheck;
+ }
+
+ for (i = 1; i <= so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i - 1;
+
+ for (;;)
+ {
+ int cmp = compareItemPointers(item, &key->curItem);
+
+ if (cmp == 0)
break;
+ else if (cmp > 0)
+ {
+ if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx,
+ key, &keyrecheck))
+ return FALSE; /* finished one of keys */
+ *recheck |= keyrecheck;
+ }
+ else
+ { /* returns to begin */
+ *item = key->curItem;
+ i = 0;
+ break;
+ }
}
}
- }
+ } while( so->scanFastTuples && tbm_check_tuple(so->scanFastTuples, item) );
return TRUE;
}
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 4be89bc..062ddba 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -138,7 +138,7 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
/*
* Inserts only one entry to the index, but it can add more than 1 ItemPointer.
*/
-static void
+void
ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value,
ItemPointerData *items, uint32 nitem, bool isBuild)
{
@@ -273,7 +273,7 @@ ginbuild(PG_FUNCTION_ARGS)
IndexBuildResult *result;
double reltuples;
GinBuildState buildstate;
- Buffer buffer;
+ Buffer RootBuffer, MetaBuffer;
ItemPointerData *list;
Datum entry;
uint32 nlist;
@@ -286,11 +286,17 @@ ginbuild(PG_FUNCTION_ARGS)
initGinState(&buildstate.ginstate, index);
+ /* initialize the meta page */
+ MetaBuffer = GinNewBuffer(index);
+
/* initialize the root page */
- buffer = GinNewBuffer(index);
+ RootBuffer = GinNewBuffer(index);
+
START_CRIT_SECTION();
- GinInitBuffer(buffer, GIN_LEAF);
- MarkBufferDirty(buffer);
+ GinInitMetabuffer(MetaBuffer);
+ MarkBufferDirty(MetaBuffer);
+ GinInitBuffer(RootBuffer, GIN_LEAF);
+ MarkBufferDirty(RootBuffer);
if (!index->rd_istemp)
{
@@ -303,16 +309,19 @@ ginbuild(PG_FUNCTION_ARGS)
rdata.len = sizeof(RelFileNode);
rdata.next = NULL;
- page = BufferGetPage(buffer);
-
-
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
+
+ page = BufferGetPage(RootBuffer);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
+ page = BufferGetPage(MetaBuffer);
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
}
- UnlockReleaseBuffer(buffer);
+ UnlockReleaseBuffer(MetaBuffer);
+ UnlockReleaseBuffer(RootBuffer);
END_CRIT_SECTION();
/* build the index */
@@ -417,9 +426,26 @@ gininsert(PG_FUNCTION_ARGS)
initGinState(&ginstate, index);
- for(i=0; inatts;i++)
- if ( !isnull[i] )
- res += ginHeapTupleInsert(index, &ginstate, (OffsetNumber)(i+1), values[i], ht_ctid);
+ if ( GinGetUseFastUpdate(index) )
+ {
+ GinTupleCollector collector;
+
+ memset(&collector, 0, sizeof(GinTupleCollector));
+ for(i=0; inatts;i++)
+ if ( !isnull[i] )
+ res += ginHeapTupleFastCollect(index, &ginstate, &collector,
+ (OffsetNumber)(i+1), values[i], ht_ctid);
+
+ ginHeapTupleFastInsert(index, &collector);
+ }
+ else
+ {
+ for(i=0; inatts;i++)
+ if ( !isnull[i] )
+ res += ginHeapTupleInsert(index, &ginstate,
+ (OffsetNumber)(i+1), values[i], ht_ctid);
+
+ }
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(insertCtx);
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c
index bc51e94..0c0ce52 100644
--- a/src/backend/access/gin/ginscan.c
+++ b/src/backend/access/gin/ginscan.c
@@ -159,6 +159,9 @@ newScanKey(IndexScanDesc scan)
errmsg("GIN indexes do not support whole-index scans")));
so->isVoidRes = false;
+ so->isScanFastInsert = true;
+ so->scanFastTuples = NULL;
+ so->scanFastResult = NULL;
for (i = 0; i < scan->numberOfKeys; i++)
{
@@ -233,8 +236,11 @@ ginrescan(PG_FUNCTION_ARGS)
else
{
freeScanKeys(so->keys, so->nkeys);
+ if ( so->scanFastTuples )
+ tbm_free( so->scanFastTuples );
}
+ so->scanFastTuples = NULL;
so->keys = NULL;
if (scankey && scan->numberOfKeys > 0)
@@ -256,6 +262,8 @@ ginendscan(PG_FUNCTION_ARGS)
if (so != NULL)
{
freeScanKeys(so->keys, so->nkeys);
+ if ( so->scanFastTuples )
+ tbm_free( so->scanFastTuples );
MemoryContextDelete(so->tempCtx);
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 5e71c85..6633dce 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -21,6 +21,7 @@
#include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
+#include "utils/guc.h"
void
initGinState(GinState *state, Relation index)
@@ -57,7 +58,7 @@ initGinState(GinState *state, Relation index)
CurrentMemoryContext);
/*
- * Check opclass capability to do partial match.
+ * Check opclass capability to do partial match.
*/
if ( index_getprocid(index, i+1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid )
{
@@ -88,7 +89,7 @@ gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
bool isnull;
/*
- * First attribute is always int16, so we can safely use any
+ * First attribute is always int16, so we can safely use any
* tuple descriptor to obtain first attribute of tuple
*/
res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
@@ -213,6 +214,20 @@ GinInitBuffer(Buffer b, uint32 f)
GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
}
+void
+GinInitMetabuffer(Buffer b)
+{
+ GinMetaPageData *metadata;
+ Page page = BufferGetPage(b);
+
+ GinInitPage(page, GIN_META, BufferGetPageSize(b));
+
+ metadata = GinPageGetMeta(page);
+
+ metadata->head = metadata->tail = InvalidBlockNumber;
+ metadata->tailFreeSize = 0;
+}
+
int
compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b)
{
@@ -310,12 +325,10 @@ extractEntriesSU(GinState *ginstate, OffsetNumber attnum, Datum value, int32 *ne
return entries;
}
-Datum
-ginoptions(PG_FUNCTION_ARGS)
+static int
+parseFillfactor(char *value, bool validate)
{
- Datum reloptions = PG_GETARG_DATUM(0);
- bool validate = PG_GETARG_BOOL(1);
- bytea *result;
+ int fillfactor;
/*
* It's not clear that fillfactor is useful for GIN, but for the moment
@@ -324,10 +337,73 @@ ginoptions(PG_FUNCTION_ARGS)
#define GIN_MIN_FILLFACTOR 10
#define GIN_DEFAULT_FILLFACTOR 100
- result = default_reloptions(reloptions, validate,
- GIN_MIN_FILLFACTOR,
- GIN_DEFAULT_FILLFACTOR);
- if (result)
- PG_RETURN_BYTEA_P(result);
- PG_RETURN_NULL();
+ if (value == NULL)
+ return GIN_DEFAULT_FILLFACTOR;
+
+ if (!parse_int(value, &fillfactor, 0, NULL))
+ {
+ if (validate)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("fillfactor must be an integer: \"%s\"",
+ value)));
+ return GIN_DEFAULT_FILLFACTOR;
+ }
+
+ if (fillfactor < GIN_MIN_FILLFACTOR || fillfactor > 100)
+ {
+ if (validate)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("fillfactor=%d is out of range (should be between %d and 100)",
+ fillfactor, GIN_MIN_FILLFACTOR)));
+ return GIN_DEFAULT_FILLFACTOR;
+ }
+
+ return fillfactor;
+}
+
+static bool
+parseFastupdate(char *value, bool validate)
+{
+ bool result;
+
+ if (value == NULL)
+ return GIN_DEFAULT_USE_FASTUPDATE;
+
+ if (!parse_bool(value, &result))
+ {
+ if (validate)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("fastupdate=\"%s\" is not recognized",
+ value)));
+ return GIN_DEFAULT_USE_FASTUPDATE;
+ }
+
+ return result;
+}
+
+Datum
+ginoptions(PG_FUNCTION_ARGS)
+{
+ Datum reloptions = PG_GETARG_DATUM(0);
+ bool validate = PG_GETARG_BOOL(1);
+ static const char *const gin_keywords[2] = {"fillfactor", "fastupdate"};
+ char *values[2];
+ GinOptions *options;
+
+ parseRelOptions(reloptions, 2, gin_keywords, values, validate);
+
+ /* If no options, just return NULL */
+ if (values[0] == NULL && values[1] == NULL)
+ PG_RETURN_NULL();
+
+ options = (GinOptions *) palloc(sizeof(GinOptions));
+ SET_VARSIZE(options, sizeof(GinOptions));
+
+ options->std.fillfactor = parseFillfactor(values[0], validate);
+ options->useFastUpdate = parseFastupdate(values[1], validate);
+
+ PG_RETURN_BYTEA_P(options);
}
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index b180cd7..4146995 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -595,7 +595,14 @@ ginbulkdelete(PG_FUNCTION_ARGS)
/* first time through? */
if (stats == NULL)
- stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+ {
+ stats = (IndexBulkDeleteResult *)DatumGetPointer(
+ DirectFunctionCall2(gininsertcleanup,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1)
+ ));
+ }
+
/* we'll re-count the tuples each time */
stats->num_index_tuples = 0;
@@ -703,9 +710,18 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
BlockNumber lastBlock = GIN_ROOT_BLKNO,
lastFilledBlock = GIN_ROOT_BLKNO;
- /* Set up all-zero stats if ginbulkdelete wasn't called */
+ /*
+ * Set up all-zero stats and finalyze fast insertion
+ * if ginbulkdelete wasn't called
+ */
if (stats == NULL)
- stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+ {
+ stats = (IndexBulkDeleteResult *)DatumGetPointer(
+ DirectFunctionCall2(gininsertcleanup,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1)
+ ));
+ }
/*
* XXX we always report the heap tuple count as the number of index
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c
index 0d40bfb..76db49c 100644
--- a/src/backend/access/gin/ginxlog.c
+++ b/src/backend/access/gin/ginxlog.c
@@ -71,20 +71,30 @@ static void
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
- Buffer buffer;
+ Buffer RootBuffer, MetaBuffer;
Page page;
- buffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
- Assert(BufferIsValid(buffer));
- page = (Page) BufferGetPage(buffer);
+ MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true);
+ Assert(BufferIsValid(MetaBuffer));
+ GinInitMetabuffer(MetaBuffer);
+
+ page = (Page) BufferGetPage(MetaBuffer);
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
- GinInitBuffer(buffer, GIN_LEAF);
+ RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
+ Assert(BufferIsValid(RootBuffer));
+ page = (Page) BufferGetPage(RootBuffer);
+
+ GinInitBuffer(RootBuffer, GIN_LEAF);
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
+ MarkBufferDirty(MetaBuffer);
+ UnlockReleaseBuffer(MetaBuffer);
+ MarkBufferDirty(RootBuffer);
+ UnlockReleaseBuffer(RootBuffer);
}
static void
@@ -433,6 +443,161 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
}
}
+static void
+ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
+{
+ ginxlogUpdateMeta *data = (ginxlogUpdateMeta*) XLogRecGetData(record);
+ Buffer metabuffer;
+ Page metapage;
+
+ metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
+ metapage = BufferGetPage(metabuffer);
+
+ if (!XLByteLE(lsn, PageGetLSN(metapage)))
+ {
+ memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+ PageSetLSN(metapage, lsn);
+ PageSetTLI(metapage, ThisTimeLineID);
+ MarkBufferDirty(metabuffer);
+ }
+
+ if ( data->ntuples > 0 )
+ {
+ /*
+ * insert into tail page
+ */
+ if (!(record->xl_info & XLR_BKP_BLOCK_1))
+ {
+ Buffer buffer = XLogReadBuffer(data->node, data->metadata.tail, false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ OffsetNumber l, off = (PageIsEmpty(page)) ? FirstOffsetNumber :
+ OffsetNumberNext(PageGetMaxOffsetNumber(page));
+ int i, tupsize;
+ IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
+
+ for(i=0; intuples; i++)
+ {
+ tupsize = IndexTupleSize(tuples);
+
+ l = PageAddItem(page, (Item)tuples, tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page");
+
+ tuples = (IndexTuple)( ((char*)tuples) + tupsize );
+ }
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+ UnlockReleaseBuffer(buffer);
+ }
+ }
+ else if ( data->prevTail != InvalidBlockNumber )
+ {
+ /*
+ * New tail
+ */
+
+ Buffer buffer = XLogReadBuffer(data->node, data->prevTail, false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ GinPageGetOpaque(page)->rightlink = data->newRightlink;
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+ UnlockReleaseBuffer(buffer);
+ }
+
+ UnlockReleaseBuffer(metabuffer);
+}
+
+static void
+ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
+{
+ ginxlogInsertListPage *data = (ginxlogInsertListPage*) XLogRecGetData(record);
+ Buffer buffer;
+ Page page;
+ OffsetNumber l, off = FirstOffsetNumber;
+ int i, tupsize;
+ IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage));
+
+ if (record->xl_info & XLR_BKP_BLOCK_1)
+ return;
+
+ buffer = XLogReadBuffer(data->node, data->blkno, true);
+ page = BufferGetPage(buffer);
+
+ GinInitBuffer(buffer, GIN_LIST);
+ GinPageGetOpaque(page)->rightlink = data->rightlink;
+ if ( data->rightlink == InvalidBlockNumber )
+ GinPageSetFullRow(page);
+
+ for(i=0; intuples; i++)
+ {
+ tupsize = IndexTupleSize(tuples);
+
+ l = PageAddItem(page, (Item)tuples, tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page");
+
+ tuples = (IndexTuple)( ((char*)tuples) + tupsize );
+ }
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+
+ UnlockReleaseBuffer(buffer);
+}
+
+static void
+ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
+{
+ ginxlogDeleteListPages *data = (ginxlogDeleteListPages*) XLogRecGetData(record);
+ Buffer metabuffer;
+ Page metapage;
+ int i;
+
+ metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
+ metapage = BufferGetPage(metabuffer);
+
+ if (!XLByteLE(lsn, PageGetLSN(metapage)))
+ {
+ memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+ PageSetLSN(metapage, lsn);
+ PageSetTLI(metapage, ThisTimeLineID);
+ MarkBufferDirty(metabuffer);
+ }
+
+ for(i=0; indeleted; i++)
+ {
+ Buffer buffer = XLogReadBuffer(data->node,data->toDelete[i],false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ GinPageGetOpaque(page)->flags = GIN_DELETED;
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+
+ UnlockReleaseBuffer(buffer);
+ }
+ UnlockReleaseBuffer(metabuffer);
+}
+
void
gin_redo(XLogRecPtr lsn, XLogRecord *record)
{
@@ -459,6 +624,15 @@ gin_redo(XLogRecPtr lsn, XLogRecord *record)
case XLOG_GIN_DELETE_PAGE:
ginRedoDeletePage(lsn, record);
break;
+ case XLOG_GIN_UPDATE_META_PAGE:
+ ginRedoUpdateMetapage(lsn, record);
+ break;
+ case XLOG_GIN_INSERT_LISTPAGE:
+ ginRedoInsertListPage(lsn, record);
+ break;
+ case XLOG_GIN_DELETE_LISTPAGE:
+ ginRedoDeleteListPages(lsn, record);
+ break;
default:
elog(PANIC, "gin_redo: unknown op code %u", info);
}
@@ -514,6 +688,18 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
appendStringInfo(buf, "Delete page, ");
desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno);
break;
+ case XLOG_GIN_UPDATE_META_PAGE:
+ appendStringInfo(buf, "Update metapage, ");
+ desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, ((ginxlogUpdateMeta *) rec)->metadata.tail);
+ break;
+ case XLOG_GIN_INSERT_LISTPAGE:
+ appendStringInfo(buf, "insert new list page, ");
+ desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno);
+ break;
+ case XLOG_GIN_DELETE_LISTPAGE:
+ appendStringInfo(buf, "Delete list page (%d), ", ((ginxlogDeleteListPages *) rec)->ndeleted);
+ desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, ((ginxlogDeleteListPages *) rec)->metadata.head);
+ break;
default:
elog(PANIC, "gin_desc: unknown op code %u", info);
}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 2fc6f05..4434ab4 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -193,6 +193,7 @@ CREATE VIEW pg_stat_all_tables AS
pg_stat_get_tuples_updated(C.oid) AS n_tup_upd,
pg_stat_get_tuples_deleted(C.oid) AS n_tup_del,
pg_stat_get_tuples_hot_updated(C.oid) AS n_tup_hot_upd,
+ pg_stat_get_fresh_inserted_tuples(C.oid) AS n_fresh_tup,
pg_stat_get_live_tuples(C.oid) AS n_live_tup,
pg_stat_get_dead_tuples(C.oid) AS n_dead_tup,
pg_stat_get_last_vacuum_time(C.oid) as last_vacuum,
diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c
index ffc882f..0864a04 100644
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@@ -306,6 +306,47 @@ tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids,
}
/*
+ * tbm_check_tuple - Check presence of tuple's ID in a TIDBitmap
+ */
+bool
+tbm_check_tuple(TIDBitmap *tbm, const ItemPointer tid) {
+ BlockNumber blk = ItemPointerGetBlockNumber(tid);
+ OffsetNumber off = ItemPointerGetOffsetNumber(tid);
+ PagetableEntry *page;
+ int wordnum,
+ bitnum;
+
+ /* safety check to ensure we don't overrun bit array bounds */
+ if (off < 1 || off > MAX_TUPLES_PER_PAGE)
+ elog(ERROR, "tuple offset out of range: %u", off);
+
+ if (tbm_page_is_lossy(tbm, blk))
+ return true; /* whole page is already marked */
+
+ page = tbm_get_pageentry(tbm, blk);
+ if (page->ischunk)
+ {
+ wordnum = bitnum = 0;
+ }
+ else
+ {
+ wordnum = WORDNUM(off - 1);
+ bitnum = BITNUM(off - 1);
+ }
+
+ return ( page->words[wordnum] & ((bitmapword) 1 << bitnum) ) ? true : false;
+}
+
+/*
+ * tbm_has_lossy - returns true if there is at least one lossy page
+ */
+bool
+tbm_has_lossy(TIDBitmap *tbm)
+{
+ return (tbm->nchunks>0);
+}
+
+/*
* tbm_union - set union
*
* a is modified in-place, b is not changed
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 2c68779..324ae44 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -2477,6 +2477,58 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map)
}
/*
+ * relation_has_pending_indexes
+ *
+ * Returns true if relation has indexes with delayed insertion.
+ * Currently, only GIN has that possiblity
+ */
+
+static bool
+relation_has_pending_indexes(Oid relid, Form_pg_class classForm)
+{
+ Relation rel;
+ List *indexoidlist;
+ ListCell *indexoidscan;
+ bool has = false;
+
+ /* only ordinary cataloged heap can contains such indexes */
+ if ( classForm->relkind != RELKIND_RELATION )
+ return false;
+
+ /* has not indexes at all */
+ if ( classForm->relhasindex == false )
+ return false;
+
+ rel = RelationIdGetRelation(relid);
+
+ indexoidlist = RelationGetIndexList(rel);
+
+ foreach(indexoidscan, indexoidlist)
+ {
+ Oid indexoid = lfirst_oid(indexoidscan);
+ Relation irel = RelationIdGetRelation(indexoid);
+
+ /*
+ * Currently, only GIN
+ */
+ if ( irel->rd_rel->relam == GIN_AM_OID )
+ {
+ RelationClose(irel);
+ has = true;
+ break;
+ }
+
+ RelationClose(irel);
+ }
+
+ list_free(indexoidlist);
+
+ RelationClose(rel);
+
+ return has;
+}
+
+/*
* relation_needs_vacanalyze
*
* Check whether a relation needs to be vacuumed or analyzed; return each into
@@ -2533,7 +2585,8 @@ relation_needs_vacanalyze(Oid relid,
/* number of vacuum (resp. analyze) tuples at this time */
float4 vactuples,
- anltuples;
+ anltuples,
+ instuples;
/* freeze parameters */
int freeze_max_age;
@@ -2598,6 +2651,7 @@ relation_needs_vacanalyze(Oid relid,
vactuples = tabentry->n_dead_tuples;
anltuples = tabentry->n_live_tuples + tabentry->n_dead_tuples -
tabentry->last_anl_tuples;
+ instuples = tabentry->n_inserted_tuples;
vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;
@@ -2611,8 +2665,13 @@ relation_needs_vacanalyze(Oid relid,
NameStr(classForm->relname),
vactuples, vacthresh, anltuples, anlthresh);
- /* Determine if this table needs vacuum or analyze. */
- *dovacuum = force_vacuum || (vactuples > vacthresh);
+ /*
+ * Determine if this table needs vacuum or analyze.
+ * Use vac_base_thresh as a theshhold for instuples because
+ * search time of GIN's pending pages is linear by its number.
+ */
+ *dovacuum = force_vacuum || (vactuples > vacthresh) ||
+ (relation_has_pending_indexes(relid, classForm) && instuples > vac_base_thresh);
*doanalyze = (anltuples > anlthresh);
}
else
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 5ae0ec1..24573ff 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3537,6 +3537,9 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
tabentry->tuples_updated = tabmsg[i].t_counts.t_tuples_updated;
tabentry->tuples_deleted = tabmsg[i].t_counts.t_tuples_deleted;
tabentry->tuples_hot_updated = tabmsg[i].t_counts.t_tuples_hot_updated;
+ tabentry->n_inserted_tuples = tabmsg[i].t_counts.t_tuples_inserted +
+ tabmsg[i].t_counts.t_tuples_updated -
+ tabmsg[i].t_counts.t_tuples_hot_updated;
tabentry->n_live_tuples = tabmsg[i].t_counts.t_new_live_tuples;
tabentry->n_dead_tuples = tabmsg[i].t_counts.t_new_dead_tuples;
tabentry->blocks_fetched = tabmsg[i].t_counts.t_blocks_fetched;
@@ -3560,6 +3563,9 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
tabentry->tuples_updated += tabmsg[i].t_counts.t_tuples_updated;
tabentry->tuples_deleted += tabmsg[i].t_counts.t_tuples_deleted;
tabentry->tuples_hot_updated += tabmsg[i].t_counts.t_tuples_hot_updated;
+ tabentry->n_inserted_tuples += tabmsg[i].t_counts.t_tuples_inserted +
+ tabmsg[i].t_counts.t_tuples_updated -
+ tabmsg[i].t_counts.t_tuples_hot_updated;
tabentry->n_live_tuples += tabmsg[i].t_counts.t_new_live_tuples;
tabentry->n_dead_tuples += tabmsg[i].t_counts.t_new_dead_tuples;
tabentry->blocks_fetched += tabmsg[i].t_counts.t_blocks_fetched;
@@ -3570,6 +3576,8 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
tabentry->n_live_tuples = Max(tabentry->n_live_tuples, 0);
/* Likewise for n_dead_tuples */
tabentry->n_dead_tuples = Max(tabentry->n_dead_tuples, 0);
+ /* Likewise for n_inserted_tuples */
+ tabentry->n_inserted_tuples = Max(tabentry->n_inserted_tuples, 0);
/*
* Add per-table stats to the per-database entry, too.
@@ -3770,6 +3778,7 @@ pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
tabentry->n_live_tuples = msg->m_tuples;
/* Resetting dead_tuples to 0 is an approximation ... */
tabentry->n_dead_tuples = 0;
+ tabentry->n_inserted_tuples = 0;
if (msg->m_analyze)
{
if (msg->m_scanned_all)
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 77c2baa..381de6f 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -31,6 +31,7 @@ extern Datum pg_stat_get_tuples_updated(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_tuples_deleted(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_tuples_hot_updated(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_live_tuples(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_fresh_inserted_tuples(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_dead_tuples(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_blocks_fetched(PG_FUNCTION_ARGS);
extern Datum pg_stat_get_blocks_hit(PG_FUNCTION_ARGS);
@@ -209,6 +210,20 @@ pg_stat_get_live_tuples(PG_FUNCTION_ARGS)
PG_RETURN_INT64(result);
}
+Datum
+pg_stat_get_fresh_inserted_tuples(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->n_inserted_tuples);
+
+ PG_RETURN_INT64(result);
+}
Datum
pg_stat_get_dead_tuples(PG_FUNCTION_ARGS)
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 0fd2cbd..f514358 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -21,6 +21,7 @@
#include "storage/buf.h"
#include "storage/off.h"
#include "storage/relfilenode.h"
+#include "utils/rel.h"
/*
@@ -52,11 +53,34 @@ typedef struct GinPageOpaqueData
typedef GinPageOpaqueData *GinPageOpaque;
-#define GIN_ROOT_BLKNO (0)
+#define GIN_METAPAGE_BLKNO (0)
+#define GIN_ROOT_BLKNO (1)
#define GIN_DATA (1 << 0)
#define GIN_LEAF (1 << 1)
#define GIN_DELETED (1 << 2)
+#define GIN_META (1 << 3)
+#define GIN_LIST (1 << 4)
+#define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
+
+typedef struct GinMetaPageData
+{
+ /*
+ * Pointers to head and tail of list of GIN_LIST pages. These store
+ * fast-inserted entries that haven't yet been moved into the regular
+ * GIN structure.
+ */
+ BlockNumber head;
+ BlockNumber tail;
+
+ /*
+ * Free space in bytes in the list's tail page.
+ */
+ uint32 tailFreeSize;
+} GinMetaPageData;
+
+#define GinPageGetMeta(p) \
+ ((GinMetaPageData *) PageGetContents(p))
/*
* Works on page
@@ -68,6 +92,8 @@ typedef GinPageOpaqueData *GinPageOpaque;
#define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
#define GinPageIsData(page) ( GinPageGetOpaque(page)->flags & GIN_DATA )
#define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA )
+#define GinPageHasFullRow(page) ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW )
+#define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
#define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED)
#define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
@@ -135,6 +161,20 @@ typedef struct
- GinPageGetOpaque(page)->maxoff * GinSizeOfItem(page) \
- MAXALIGN(sizeof(GinPageOpaqueData)))
+/*
+ * storage type for GIN's options. Must be upward compatible with
+ * StdRdOptions, since we might call RelationGetFillFactor().
+ */
+typedef struct GinOptions
+{
+ StdRdOptions std; /* standard options */
+ bool useFastUpdate; /* use fast updates? */
+} GinOptions;
+
+#define GIN_DEFAULT_USE_FASTUPDATE true
+#define GinGetUseFastUpdate(relation) \
+ ((relation)->rd_options ? \
+ ((GinOptions *) (relation)->rd_options)->useFastUpdate : GIN_DEFAULT_USE_FASTUPDATE)
#define GIN_UNLOCK BUFFER_LOCK_UNLOCK
#define GIN_SHARE BUFFER_LOCK_SHARE
@@ -234,12 +274,49 @@ typedef struct ginxlogDeletePage
BlockNumber rightLink;
} ginxlogDeletePage;
+
+#define XLOG_GIN_UPDATE_META_PAGE 0x60
+
+typedef struct ginxlogUpdateMeta
+{
+ RelFileNode node;
+ GinMetaPageData metadata;
+ BlockNumber prevTail;
+ BlockNumber newRightlink;
+ int32 ntuples; /* if ntuples > 0 then metadata.tail was updated with
+ that tuples else new sub list was inserted */
+ /* follows array of inserted tuples */
+} ginxlogUpdateMeta;
+
+#define XLOG_GIN_INSERT_LISTPAGE 0x70
+
+typedef struct ginxlogInsertListPage
+{
+ RelFileNode node;
+ BlockNumber blkno;
+ BlockNumber rightlink;
+ int32 ntuples;
+ /* follows array of inserted tuples */
+} ginxlogInsertListPage;
+
+#define XLOG_GIN_DELETE_LISTPAGE 0x80
+
+#define NDELETE_AT_ONCE (16)
+typedef struct ginxlogDeleteListPages
+{
+ RelFileNode node;
+ GinMetaPageData metadata;
+ int32 ndeleted;
+ BlockNumber toDelete[ NDELETE_AT_ONCE ];
+} ginxlogDeleteListPages;
+
/* ginutil.c */
extern Datum ginoptions(PG_FUNCTION_ARGS);
extern void initGinState(GinState *state, Relation index);
extern Buffer GinNewBuffer(Relation index);
extern void GinInitBuffer(Buffer b, uint32 f);
extern void GinInitPage(Page page, uint32 f, Size pageSize);
+extern void GinInitMetabuffer(Buffer b);
extern int compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b);
extern int compareAttEntries(GinState *ginstate, OffsetNumber attnum_a, Datum a,
OffsetNumber attnum_b, Datum b);
@@ -252,6 +329,8 @@ extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
/* gininsert.c */
extern Datum ginbuild(PG_FUNCTION_ARGS);
extern Datum gininsert(PG_FUNCTION_ARGS);
+extern void ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value,
+ ItemPointerData *items, uint32 nitem, bool isBuild);
/* ginxlog.c */
extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
@@ -425,6 +504,10 @@ typedef struct GinScanOpaqueData
uint32 nkeys;
bool isVoidRes; /* true if ginstate.extractQueryFn guarantees
* that nothing will be found */
+ bool isScanFastInsert; /* scan process in scanning fast update pages */
+ TIDBitmap *scanFastTuples;
+ TBMIterateResult *scanFastResult;
+ OffsetNumber scanFastOffset;
} GinScanOpaqueData;
typedef GinScanOpaqueData *GinScanOpaque;
@@ -488,4 +571,23 @@ extern void ginInsertRecordBA(BuildAccumulator *accum,
OffsetNumber attnum, Datum *entries, int32 nentry);
extern ItemPointerData *ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *entry, uint32 *n);
+/* ginfast.c */
+
+typedef struct GinTupleCollector {
+ IndexTuple *tuples;
+ uint32 ntuples;
+ uint32 lentuples;
+ uint32 sumsize;
+} GinTupleCollector;
+
+extern void ginHeapTupleFastInsert(Relation index, GinTupleCollector *collector);
+extern uint32 ginHeapTupleFastCollect(Relation index, GinState *ginstate,
+ GinTupleCollector *collector,
+ OffsetNumber attnum, Datum value, ItemPointer item);
+
+#define GinListPageSize \
+ ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) )
+
+extern Datum gininsertcleanup(PG_FUNCTION_ARGS);
+
#endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index d405d82..9165f08 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2928,6 +2928,8 @@ DATA(insert OID = 1933 ( pg_stat_get_tuples_deleted PGNSP PGUID 12 1 0 0 f f f
DESCR("statistics: number of tuples deleted");
DATA(insert OID = 1972 ( pg_stat_get_tuples_hot_updated PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_tuples_hot_updated _null_ _null_ _null_ ));
DESCR("statistics: number of tuples hot updated");
+DATA(insert OID = 2316 ( pg_stat_get_fresh_inserted_tuples PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_fresh_inserted_tuples _null_ _null_ _null_ ));
+DESCR("statistics: number of inserted tuples since last vacuum");
DATA(insert OID = 2878 ( pg_stat_get_live_tuples PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_live_tuples _null_ _null_ _null_ ));
DESCR("statistics: number of live tuples");
DATA(insert OID = 2879 ( pg_stat_get_dead_tuples PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_dead_tuples _null_ _null_ _null_ ));
diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h
index 56d6a0d..c8dbeea 100644
--- a/src/include/nodes/tidbitmap.h
+++ b/src/include/nodes/tidbitmap.h
@@ -49,6 +49,8 @@ extern void tbm_free(TIDBitmap *tbm);
extern void tbm_add_tuples(TIDBitmap *tbm,
const ItemPointer tids, int ntids,
bool recheck);
+extern bool tbm_check_tuple(TIDBitmap *tbm, const ItemPointer tid);
+extern bool tbm_has_lossy(TIDBitmap *tbm);
extern void tbm_union(TIDBitmap *a, const TIDBitmap *b);
extern void tbm_intersect(TIDBitmap *a, const TIDBitmap *b);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 4a1e274..79754dc 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -476,6 +476,8 @@ typedef struct PgStat_StatTabEntry
PgStat_Counter tuples_deleted;
PgStat_Counter tuples_hot_updated;
+ PgStat_Counter n_inserted_tuples; /* number of non-hot inserted tuples
+ * since last vacuum */
PgStat_Counter n_live_tuples;
PgStat_Counter n_dead_tuples;
PgStat_Counter last_anl_tuples;
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 977b17c..c037696 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1291,14 +1291,14 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
pg_shadow | SELECT pg_authid.rolname AS usename, pg_authid.oid AS usesysid, pg_authid.rolcreatedb AS usecreatedb, pg_authid.rolsuper AS usesuper, pg_authid.rolcatupdate AS usecatupd, pg_authid.rolpassword AS passwd, (pg_authid.rolvaliduntil)::abstime AS valuntil, pg_authid.rolconfig AS useconfig FROM pg_authid WHERE pg_authid.rolcanlogin;
pg_stat_activity | SELECT s.datid, d.datname, s.procpid, s.usesysid, u.rolname AS usename, s.current_query, s.waiting, s.xact_start, s.query_start, s.backend_start, s.client_addr, s.client_port FROM pg_database d, pg_stat_get_activity(NULL::integer) s(datid, procpid, usesysid, current_query, waiting, xact_start, query_start, backend_start, client_addr, client_port), pg_authid u WHERE ((s.datid = d.oid) AND (s.usesysid = u.oid));
pg_stat_all_indexes | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, pg_stat_get_numscans(i.oid) AS idx_scan, pg_stat_get_tuples_returned(i.oid) AS idx_tup_read, pg_stat_get_tuples_fetched(i.oid) AS idx_tup_fetch FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"]));
- pg_stat_all_tables | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, pg_stat_get_numscans(c.oid) AS seq_scan, pg_stat_get_tuples_returned(c.oid) AS seq_tup_read, (sum(pg_stat_get_numscans(i.indexrelid)))::bigint AS idx_scan, ((sum(pg_stat_get_tuples_fetched(i.indexrelid)))::bigint + pg_stat_get_tuples_fetched(c.oid)) AS idx_tup_fetch, pg_stat_get_tuples_inserted(c.oid) AS n_tup_ins, pg_stat_get_tuples_updated(c.oid) AS n_tup_upd, pg_stat_get_tuples_deleted(c.oid) AS n_tup_del, pg_stat_get_tuples_hot_updated(c.oid) AS n_tup_hot_upd, pg_stat_get_live_tuples(c.oid) AS n_live_tup, pg_stat_get_dead_tuples(c.oid) AS n_dead_tup, pg_stat_get_last_vacuum_time(c.oid) AS last_vacuum, pg_stat_get_last_autovacuum_time(c.oid) AS last_autovacuum, pg_stat_get_last_analyze_time(c.oid) AS last_analyze, pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname;
+ pg_stat_all_tables | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, pg_stat_get_numscans(c.oid) AS seq_scan, pg_stat_get_tuples_returned(c.oid) AS seq_tup_read, (sum(pg_stat_get_numscans(i.indexrelid)))::bigint AS idx_scan, ((sum(pg_stat_get_tuples_fetched(i.indexrelid)))::bigint + pg_stat_get_tuples_fetched(c.oid)) AS idx_tup_fetch, pg_stat_get_tuples_inserted(c.oid) AS n_tup_ins, pg_stat_get_tuples_updated(c.oid) AS n_tup_upd, pg_stat_get_tuples_deleted(c.oid) AS n_tup_del, pg_stat_get_tuples_hot_updated(c.oid) AS n_tup_hot_upd, pg_stat_get_fresh_inserted_tuples(c.oid) AS n_fresh_tup, pg_stat_get_live_tuples(c.oid) AS n_live_tup, pg_stat_get_dead_tuples(c.oid) AS n_dead_tup, pg_stat_get_last_vacuum_time(c.oid) AS last_vacuum, pg_stat_get_last_autovacuum_time(c.oid) AS last_autovacuum, pg_stat_get_last_analyze_time(c.oid) AS last_analyze, pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname;
pg_stat_bgwriter | SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean, pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean, pg_stat_get_buf_written_backend() AS buffers_backend, pg_stat_get_buf_alloc() AS buffers_alloc;
pg_stat_database | SELECT d.oid AS datid, d.datname, pg_stat_get_db_numbackends(d.oid) AS numbackends, pg_stat_get_db_xact_commit(d.oid) AS xact_commit, pg_stat_get_db_xact_rollback(d.oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(d.oid) - pg_stat_get_db_blocks_hit(d.oid)) AS blks_read, pg_stat_get_db_blocks_hit(d.oid) AS blks_hit, pg_stat_get_db_tuples_returned(d.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(d.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(d.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(d.oid) AS tup_updated, pg_stat_get_db_tuples_deleted(d.oid) AS tup_deleted FROM pg_database d;
pg_stat_sys_indexes | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_indexes.schemaname ~ '^pg_toast'::text));
- pg_stat_sys_tables | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_tables.schemaname ~ '^pg_toast'::text));
+ pg_stat_sys_tables | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_fresh_tup, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_tables.schemaname ~ '^pg_toast'::text));
pg_stat_user_functions | SELECT p.oid AS funcid, n.nspname AS schemaname, p.proname AS funcname, pg_stat_get_function_calls(p.oid) AS calls, (pg_stat_get_function_time(p.oid) / 1000) AS total_time, (pg_stat_get_function_self_time(p.oid) / 1000) AS self_time FROM (pg_proc p LEFT JOIN pg_namespace n ON ((n.oid = p.pronamespace))) WHERE ((p.prolang <> (12)::oid) AND (pg_stat_get_function_calls(p.oid) IS NOT NULL));
pg_stat_user_indexes | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_indexes.schemaname !~ '^pg_toast'::text));
- pg_stat_user_tables | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_tables.schemaname !~ '^pg_toast'::text));
+ pg_stat_user_tables | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_fresh_tup, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_tables.schemaname !~ '^pg_toast'::text));
pg_statio_all_indexes | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, (pg_stat_get_blocks_fetched(i.oid) - pg_stat_get_blocks_hit(i.oid)) AS idx_blks_read, pg_stat_get_blocks_hit(i.oid) AS idx_blks_hit FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"]));
pg_statio_all_sequences | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, (pg_stat_get_blocks_fetched(c.oid) - pg_stat_get_blocks_hit(c.oid)) AS blks_read, pg_stat_get_blocks_hit(c.oid) AS blks_hit FROM (pg_class c LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = 'S'::"char");
pg_statio_all_tables | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, (pg_stat_get_blocks_fetched(c.oid) - pg_stat_get_blocks_hit(c.oid)) AS heap_blks_read, pg_stat_get_blocks_hit(c.oid) AS heap_blks_hit, (sum((pg_stat_get_blocks_fetched(i.indexrelid) - pg_stat_get_blocks_hit(i.indexrelid))))::bigint AS idx_blks_read, (sum(pg_stat_get_blocks_hit(i.indexrelid)))::bigint AS idx_blks_hit, (pg_stat_get_blocks_fetched(t.oid) - pg_stat_get_blocks_hit(t.oid)) AS toast_blks_read, pg_stat_get_blocks_hit(t.oid) AS toast_blks_hit, (pg_stat_get_blocks_fetched(x.oid) - pg_stat_get_blocks_hit(x.oid)) AS tidx_blks_read, pg_stat_get_blocks_hit(x.oid) AS tidx_blks_hit FROM ((((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_class t ON ((c.reltoastrelid = t.oid))) LEFT JOIN pg_class x ON ((t.reltoastidxid = x.oid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname, t.oid, x.oid;