diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index d77e999..951f59b 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -7376,7 +7376,7 @@ pg_locks - currently held locks + locks currently held or awaited @@ -8015,16 +8015,16 @@ The view pg_locks provides access to - information about the locks held by open transactions within the + information about the locks held by active processes within the database server. See for more discussion of locking. pg_locks contains one row per active lockable - object, requested lock mode, and relevant transaction. Thus, the same + object, requested lock mode, and relevant process. Thus, the same lockable object might - appear many times, if multiple transactions are holding or waiting + appear many times, if multiple processes are holding or waiting for locks on it. However, an object that currently has no locks on it will not appear at all. @@ -8200,31 +8200,31 @@ granted is true in a row representing a lock - held by the indicated transaction. False indicates that this transaction is - currently waiting to acquire this lock, which implies that some other - transaction is holding a conflicting lock mode on the same lockable object. - The waiting transaction will sleep until the other lock is released (or a - deadlock situation is detected). A single transaction can be waiting to - acquire at most one lock at a time. + held by the indicated process. False indicates that this process is + currently waiting to acquire this lock, which implies that at least one + other process is holding or waiting for a conflicting lock mode on the same + lockable object. The waiting process will sleep until the other lock is + released (or a deadlock situation is detected). A single process can be + waiting to acquire at most one lock at a time. - Every transaction holds an exclusive lock on its virtual transaction ID for - its entire duration. If a permanent ID is assigned to the transaction - (which normally happens only if the transaction changes the state of the - database), it also holds an exclusive lock on its permanent transaction ID - until it ends. When one transaction finds it necessary to wait specifically - for another transaction, it does so by attempting to acquire share lock on - the other transaction ID (either virtual or permanent ID depending on the - situation). That will succeed only when the other transaction - terminates and releases its locks. + Throughout running a transaction, a server process holds an exclusive lock + on the transaction's virtual transaction ID. If a permanent ID is assigned + to the transaction (which normally happens only if the transaction changes + the state of the database), it also holds an exclusive lock on the + transaction's permanent transaction ID until it ends. When a process finds + it necessary to wait specifically for another transaction to end, it does + so by attempting to acquire share lock on the other transaction's ID + (either virtual or permanent ID depending on the situation). That will + succeed only when the other transaction terminates and releases its locks. Although tuples are a lockable type of object, information about row-level locks is stored on disk, not in memory, and therefore row-level locks normally do not appear in this view. - If a transaction is waiting for a + If a process is waiting for a row-level lock, it will usually appear in the view as waiting for the permanent transaction ID of the current holder of that row lock. @@ -8260,7 +8260,7 @@ pid column of the pg_stat_activity view to get more - information on the session holding or waiting to hold each lock, + information on the session holding or awaiting each lock, for example SELECT * FROM pg_locks pl LEFT JOIN pg_stat_activity psa @@ -8281,6 +8281,20 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx + While it is possible to obtain information about which processes block + which other processes by joining pg_locks against + itself, this is very difficult to get right in detail. Such a query would + have to encode knowledge about which lock modes conflict with which + others. Worse, the pg_locks view does not expose + information about which processes are ahead of which others in lock wait + queues, nor information about which processes are parallel workers running + on behalf of which other client sessions. It is better to use + the pg_blocking_pids() function + (see ) to identify which + process(es) a waiting process is blocked behind. + + + The pg_locks view displays data from both the regular lock manager and the predicate lock manager, which are separate systems; in addition, the regular lock manager subdivides its diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index b001ce5..c0b94bc 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -14997,6 +14997,12 @@ SELECT * FROM pg_ls_dir('.') WITH ORDINALITY AS t(ls,n); + pg_blocking_pids(int) + int[] + Process ID(s) that are blocking specified server process ID + + + pg_conf_load_time() timestamp with time zone configuration load time @@ -15184,6 +15190,29 @@ SET search_path TO schema , schema, .. + pg_blocking_pids + + + + pg_blocking_pids returns an array of the process IDs + of the sessions that are blocking the server process with the specified + process ID, or an empty array if there is no such server process or it is + not blocked. One server process blocks another if it either holds a lock + that conflicts with the blocked process's lock request (hard block), or is + waiting for a lock that would conflict with the blocked process's lock + request and is ahead of it in the wait queue (soft block). When using + parallel queries the result always lists client-visible process IDs (that + is, pg_backend_pid results) even if the actual lock is held + or awaited by a child worker process. As a result of that, there may be + duplicated PIDs in the result. Also note that when a prepared transaction + holds a conflicting lock, it will be represented by a zero process ID in + the result of this function. + Frequent calls to this function could have some impact on database + performance, because it needs exclusive access to the lock manager's + shared state for a short time. + + + pg_conf_load_time diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index c740952..22ebba5 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -634,7 +634,7 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo) XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx); XLogRegisterBuffer(0, meta, REGBUF_WILL_INIT); - recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX); + recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX, 0); page = BufferGetPage(meta); PageSetLSN(page, recptr); diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c index d0ca485..98fcc2c 100644 --- a/src/backend/access/brin/brin_pageops.c +++ b/src/backend/access/brin/brin_pageops.c @@ -199,7 +199,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD); XLogRegisterBufData(0, (char *) newtup, newsz); - recptr = XLogInsert(RM_BRIN_ID, info); + recptr = XLogInsert(RM_BRIN_ID, info, 0); PageSetLSN(oldpage, recptr); } @@ -294,7 +294,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, /* old page */ XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD); - recptr = XLogInsert(RM_BRIN_ID, info); + recptr = XLogInsert(RM_BRIN_ID, info, 0); PageSetLSN(oldpage, recptr); PageSetLSN(newpage, recptr); @@ -444,7 +444,7 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, XLogRegisterBuffer(1, revmapbuf, 0); - recptr = XLogInsert(RM_BRIN_ID, info); + recptr = XLogInsert(RM_BRIN_ID, info, 0); PageSetLSN(page, recptr); PageSetLSN(BufferGetPage(revmapbuf), recptr); diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c index b2c273d..d30345f 100644 --- a/src/backend/access/brin/brin_revmap.c +++ b/src/backend/access/brin/brin_revmap.c @@ -487,7 +487,7 @@ revmap_physical_extend(BrinRevmap *revmap) XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT); - recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND); + recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND, 0); PageSetLSN(metapage, recptr); PageSetLSN(page, recptr); } diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 06ba9cb..db2f959 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -415,7 +415,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, sizeof(BlockIdData) * 2); } - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, 0); PageSetLSN(page, recptr); if (childbuf != InvalidBuffer) PageSetLSN(childpage, recptr); @@ -576,7 +576,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, XLogRegisterData((char *) &data, sizeof(ginxlogSplit)); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, 0); PageSetLSN(BufferGetPage(stack->buffer), recptr); PageSetLSN(BufferGetPage(rbuffer), recptr); if (stack->parent == NULL) diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index a55bb4a..5799c0a 100644 --- a/src/backend/access/gin/gindatapage.c +++ b/src/backend/access/gin/gindatapage.c @@ -829,7 +829,7 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs) { XLogRecPtr recptr; - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE, 0); PageSetLSN(page, recptr); } @@ -1748,7 +1748,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems, rootsize); XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, 0); PageSetLSN(page, recptr); } diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index 2ddf568..7211dee 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -124,7 +124,7 @@ writeListPage(Relation index, Buffer buffer, XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT); XLogRegisterBufData(0, workspace, size); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE, 0); PageSetLSN(page, recptr); } @@ -406,7 +406,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector) XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT); XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta)); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, 0); PageSetLSN(metapage, recptr); if (buffer != InvalidBuffer) @@ -601,7 +601,7 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead, XLogRegisterData((char *) &data, sizeof(ginxlogDeleteListPages)); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, 0); PageSetLSN(metapage, recptr); for (i = 0; i < data.ndeleted; i++) diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index cd21e0e..b8268da 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -350,7 +350,7 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo) XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT); XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, 0); page = BufferGetPage(RootBuffer); PageSetLSN(page, recptr); diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 9450267..c9f61af 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -658,7 +658,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats) XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta)); XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, 0); PageSetLSN(metapage, recptr); } diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 6a4b98a..35f8f17 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -104,7 +104,7 @@ xlogVacuumPage(Relation index, Buffer buffer) XLogBeginInsert(); XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE, 0); PageSetLSN(page, recptr); } @@ -265,7 +265,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage)); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, 0); PageSetLSN(page, recptr); PageSetLSN(parentPage, recptr); PageSetLSN(BufferGetPage(lBuffer), recptr); diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 4e43a69..6b41bdc 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -184,7 +184,7 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) XLogBeginInsert(); XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT); - recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX); + recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, 0); PageSetLSN(page, recptr); } else diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index b48e97c..d066677 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -370,7 +370,7 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf, i++; } - recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT); + recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, 0); return recptr; } @@ -416,7 +416,7 @@ gistXLogUpdate(RelFileNode node, Buffer buffer, if (BufferIsValid(leftchildbuf)) XLogRegisterBuffer(1, leftchildbuf, REGBUF_STANDARD); - recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE); + recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE, 0); return recptr; } diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index f443742..24666fe 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2510,7 +2510,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, /* filtering by origin on a row level is much more efficient */ XLogIncludeOrigin(); - recptr = XLogInsert(RM_HEAP_ID, info); + recptr = XLogInsert(RM_HEAP_ID, info, 0); PageSetLSN(page, recptr); } @@ -2849,7 +2849,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, /* filtering by origin on a row level is much more efficient */ XLogIncludeOrigin(); - recptr = XLogInsert(RM_HEAP2_ID, info); + recptr = XLogInsert(RM_HEAP2_ID, info, 0); PageSetLSN(page, recptr); } @@ -3311,7 +3311,7 @@ l1: /* filtering by origin on a row level is much more efficient */ XLogIncludeOrigin(); - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE); + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, 0); PageSetLSN(page, recptr); } @@ -5064,7 +5064,7 @@ failed: /* we don't decode row locks atm, so no need to log the origin */ - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK); + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK, 0); PageSetLSN(page, recptr); } @@ -5706,7 +5706,7 @@ l4: XLogRegisterData((char *) &xlrec, SizeOfHeapLockUpdated); - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED); + recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED, 0); PageSetLSN(page, recptr); } @@ -5846,7 +5846,7 @@ heap_finish_speculative(Relation relation, HeapTuple tuple) XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM); + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM, 0); PageSetLSN(page, recptr); } @@ -5980,7 +5980,7 @@ heap_abort_speculative(Relation relation, HeapTuple tuple) /* No replica identity & replication origin logged */ - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE); + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, 0); PageSetLSN(page, recptr); } @@ -6084,7 +6084,7 @@ heap_inplace_update(Relation relation, HeapTuple tuple) /* inplace updates aren't decoded atm, don't log the origin */ - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE); + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE, 0); PageSetLSN(page, recptr); } @@ -7094,7 +7094,7 @@ log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfHeapCleanupInfo); - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO); + recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO, 0); return recptr; } @@ -7153,7 +7153,7 @@ log_heap_clean(Relation reln, Buffer buffer, XLogRegisterBufData(0, (char *) nowunused, nunused * sizeof(OffsetNumber)); - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEAN); + recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEAN, 0); return recptr; } @@ -7189,7 +7189,7 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, XLogRegisterBufData(0, (char *) tuples, ntuples * sizeof(xl_heap_freeze_tuple)); - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE); + recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE, 0); return recptr; } @@ -7225,7 +7225,7 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer, flags |= REGBUF_NO_IMAGE; XLogRegisterBuffer(1, heap_buffer, flags); - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VISIBLE); + recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VISIBLE, 0); return recptr; } @@ -7445,7 +7445,7 @@ log_heap_update(Relation reln, Buffer oldbuf, /* filtering by origin on a row level is much more efficient */ XLogIncludeOrigin(); - recptr = XLogInsert(RM_HEAP_ID, info); + recptr = XLogInsert(RM_HEAP_ID, info, 0); return recptr; } @@ -7520,7 +7520,7 @@ log_heap_new_cid(Relation relation, HeapTuple tup) /* will be looked at irrespective of origin */ - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_NEW_CID); + recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_NEW_CID, 0); return recptr; } diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index f9ce986..df94448 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -931,7 +931,7 @@ logical_heap_rewrite_flush_mappings(RewriteState state) XLogRegisterData(waldata_start, len); /* write xlog record */ - XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE); + XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE, 0); pfree(waldata_start); } diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index e3c55eb..a2b10d7 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -906,7 +906,7 @@ _bt_insertonpg(Relation rel, else XLogRegisterBufData(0, (char *) itup, IndexTupleDSize(*itup)); - recptr = XLogInsert(RM_BTREE_ID, xlinfo); + recptr = XLogInsert(RM_BTREE_ID, xlinfo, 0); if (BufferIsValid(metabuf)) { @@ -1325,7 +1325,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, else xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R; - recptr = XLogInsert(RM_BTREE_ID, xlinfo); + recptr = XLogInsert(RM_BTREE_ID, xlinfo, 0); PageSetLSN(origpage, recptr); PageSetLSN(rightpage, recptr); @@ -2045,7 +2045,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) ((PageHeader) rootpage)->pd_special - ((PageHeader) rootpage)->pd_upper); - recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT); + recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, 0); PageSetLSN(lpage, recptr); PageSetLSN(rootpage, recptr); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 67755d7..9453b0f 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -254,7 +254,7 @@ _bt_getroot(Relation rel, int access) XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot); - recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT); + recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, 0); PageSetLSN(rootpage, recptr); PageSetLSN(metapg, recptr); @@ -551,7 +551,7 @@ _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedX XLogBeginInsert(); XLogRegisterData((char *) &xlrec_reuse, SizeOfBtreeReusePage); - XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE); + XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, 0); } /* @@ -837,7 +837,7 @@ _bt_delitems_vacuum(Relation rel, Buffer buf, if (nitems > 0) XLogRegisterBufData(0, (char *) itemnos, nitems * sizeof(OffsetNumber)); - recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM); + recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM, 0); PageSetLSN(page, recptr); } @@ -911,7 +911,7 @@ _bt_delitems_delete(Relation rel, Buffer buf, */ XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber)); - recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE); + recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, 0); PageSetLSN(page, recptr); } @@ -1476,7 +1476,7 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack) XLogRegisterData((char *) &xlrec, SizeOfBtreeMarkPageHalfDead); - recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD); + recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD, 0); page = BufferGetPage(topparent); PageSetLSN(page, recptr); @@ -1820,7 +1820,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) else xlinfo = XLOG_BTREE_UNLINK_PAGE; - recptr = XLogInsert(RM_BTREE_ID, xlinfo); + recptr = XLogInsert(RM_BTREE_ID, xlinfo, 0); if (BufferIsValid(metabuf)) { diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c index f090ca5..4d41db7 100644 --- a/src/backend/access/spgist/spgdoinsert.c +++ b/src/backend/access/spgist/spgdoinsert.c @@ -304,7 +304,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple, if (xlrec.offnumParent != InvalidOffsetNumber) XLogRegisterBuffer(1, parent->buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF, 0); PageSetLSN(current->page, recptr); @@ -541,7 +541,7 @@ moveLeafs(Relation index, SpGistState *state, XLogRegisterBuffer(1, nbuf, REGBUF_STANDARD | (xlrec.newPage ? REGBUF_WILL_INIT : 0)); XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS, 0); PageSetLSN(current->page, recptr); PageSetLSN(npage, recptr); @@ -1386,7 +1386,7 @@ doPickSplit(Relation index, SpGistState *state, } /* Issue the WAL record */ - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT, 0); /* Update page LSNs on all affected pages */ if (newLeafBuffer != InvalidBuffer) @@ -1540,7 +1540,7 @@ spgAddNodeAction(Relation index, SpGistState *state, XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, 0); PageSetLSN(current->page, recptr); } @@ -1664,7 +1664,7 @@ spgAddNodeAction(Relation index, SpGistState *state, XLogRegisterData((char *) &xlrec, sizeof(xlrec)); XLogRegisterData((char *) newInnerTuple, newInnerTuple->size); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, 0); /* we don't bother to check if any of these are redundant */ PageSetLSN(current->page, recptr); @@ -1834,7 +1834,7 @@ spgSplitNodeAction(Relation index, SpGistState *state, XLogRegisterBuffer(1, newBuffer, flags); } - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE, 0); PageSetLSN(current->page, recptr); diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c index 44fd644..507ae74 100644 --- a/src/backend/access/spgist/spginsert.c +++ b/src/backend/access/spgist/spginsert.c @@ -113,7 +113,7 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo) XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD); XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX, 0); PageSetLSN(BufferGetPage(metabuffer), recptr); PageSetLSN(BufferGetPage(rootbuffer), recptr); diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c index 15b867f..5036219 100644 --- a/src/backend/access/spgist/spgvacuum.c +++ b/src/backend/access/spgist/spgvacuum.c @@ -389,7 +389,7 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer, XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF, 0); PageSetLSN(page, recptr); } @@ -470,7 +470,7 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer) XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT, 0); PageSetLSN(page, recptr); } @@ -591,7 +591,7 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer) XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT, 0); PageSetLSN(page, recptr); } diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 06aff18..15a5e53 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -701,7 +701,7 @@ WriteZeroPageXlogRec(int pageno) { XLogBeginInsert(); XLogRegisterData((char *) (&pageno), sizeof(int)); - (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE); + (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE, 0); } /* @@ -717,7 +717,7 @@ WriteTruncateXlogRec(int pageno) XLogBeginInsert(); XLogRegisterData((char *) (&pageno), sizeof(int)); - recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE); + recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE, 0); XLogFlush(recptr); } diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index 1713439..adc42fc 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -893,7 +893,7 @@ WriteZeroPageXlogRec(int pageno) { XLogBeginInsert(); XLogRegisterData((char *) (&pageno), sizeof(int)); - (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE); + (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE, 0); } /* @@ -904,7 +904,7 @@ WriteTruncateXlogRec(int pageno) { XLogBeginInsert(); XLogRegisterData((char *) (&pageno), sizeof(int)); - (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_TRUNCATE); + (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_TRUNCATE, 0); } /* @@ -926,7 +926,7 @@ WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids, offsetof(xl_commit_ts_set, mainxid) + sizeof(TransactionId)); XLogRegisterData((char *) subxids, nsubxids * sizeof(TransactionId)); - XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_SETTS); + XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_SETTS, 0); } /* diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index a677af0..9a1ad41 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -814,7 +814,7 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members) XLogRegisterData((char *) (&xlrec), SizeOfMultiXactCreate); XLogRegisterData((char *) members, nmembers * sizeof(MultiXactMember)); - (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID); + (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, 0); /* Now enter the information into the OFFSETs and MEMBERs logs */ RecordNewMultiXact(multi, offset, nmembers, members); @@ -3177,7 +3177,7 @@ WriteMZeroPageXlogRec(int pageno, uint8 info) { XLogBeginInsert(); XLogRegisterData((char *) (&pageno), sizeof(int)); - (void) XLogInsert(RM_MULTIXACT_ID, info); + (void) XLogInsert(RM_MULTIXACT_ID, info, 0); } /* @@ -3204,7 +3204,7 @@ WriteMTruncateXlogRec(Oid oldestMultiDB, XLogBeginInsert(); XLogRegisterData((char *) (&xlrec), SizeOfMultiXactTruncate); - recptr = XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_TRUNCATE_ID); + recptr = XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_TRUNCATE_ID, 0); XLogFlush(recptr); } diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 8a22836..b1c5983 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1059,7 +1059,7 @@ EndPrepare(GlobalTransaction gxact) XLogBeginInsert(); for (record = records.head; record != NULL; record = record->next) XLogRegisterData(record->data, record->len); - gxact->prepare_end_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE); + gxact->prepare_end_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE, 0); XLogFlush(gxact->prepare_end_lsn); /* If we crash now, we have prepared: WAL replay will fix things */ diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index b0d5440..57a1c17 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -630,7 +630,7 @@ AssignTransactionId(TransactionState s) XLogRegisterData((char *) unreportedXids, nUnreportedXids * sizeof(TransactionId)); - (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT); + (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT, 0); nUnreportedXids = 0; /* mark top, not current xact as having been logged */ @@ -5207,7 +5207,7 @@ XactLogCommitRecord(TimestampTz commit_time, /* we allow filtering by xacts */ XLogIncludeOrigin(); - return XLogInsert(RM_XACT_ID, info); + return XLogInsert(RM_XACT_ID, info, 0); } /* @@ -5294,7 +5294,7 @@ XactLogAbortRecord(TimestampTz abort_time, if (xl_xinfo.xinfo & XACT_XINFO_HAS_TWOPHASE) XLogRegisterData((char *) (&xl_twophase), sizeof(xl_xact_twophase)); - return XLogInsert(RM_XACT_ID, info); + return XLogInsert(RM_XACT_ID, info, 0); } /* diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 94b79ac..05e49d7 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -436,11 +436,30 @@ typedef struct XLogwrtResult * the WAL record is just copied to the page and the lock is released. But * to avoid the deadlock-scenario explained above, the indicator is always * updated before sleeping while holding an insertion lock. + * + * The progressAt values indicate the insertion progress used to determine + * WAL insertion activity since a previous checkpoint, which is aimed at + * finding out if a checkpoint should be skipped or not or if standby + * activity should be logged. Progress position is basically updated + * for all types of records, for the time being only snapshot logging + * is out of this scope to properly skip their logging on idle systems. + * Tracking the WAL activity directly in WALInsertLock has the advantage + * to not rely on taking an exclusive lock on all the WAL insertion locks, + * hence reducing the impact of the activity lookup. This takes also + * advantage to avoid 8-byte torn reads on some platforms by using the + * fact that each insert lock is located on the same cache line. + * XXX: There is still room for more improvements here, particularly + * WAL operations related to unlogged relations (INIT_FORKNUM) should not + * update the progress LSN as those relations are reset during crash + * recovery so enforcing buffers of such relations to be flushed for + * example in the case of a load only on unlogged relations is a waste + * of disk write. */ typedef struct { LWLock lock; XLogRecPtr insertingAt; + XLogRecPtr progressAt; } WALInsertLock; /* @@ -878,6 +897,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); * which pages need a full-page image, and retry. If fpw_lsn is invalid, the * record is always inserted. * + * 'flags' gives more in-depth control on the record being inserted. As of + * now, this controls if the progress LSN positions are updated. + * * The first XLogRecData in the chain must be for the record header, and its * data must be MAXALIGNed. XLogInsertRecord fills in the xl_prev and * xl_crc fields in the header, the rest of the header must already be filled @@ -890,7 +912,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); * WAL rule "write the log before the data".) */ XLogRecPtr -XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn) +XLogInsertRecord(XLogRecData *rdata, + XLogRecPtr fpw_lsn, + uint8 flags) { XLogCtlInsert *Insert = &XLogCtl->Insert; pg_crc32c rdata_crc; @@ -989,6 +1013,25 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn) inserted = true; } + /* + * Update the progress LSN positions. At least one WAL insertion lock + * is already taken appropriately before doing that, and it is just more + * simple to do that here where WAL record data and type is at hand. + * The progress is set at the start position of the record tracked that + * is being added, making easier checkpoint progress tracking as the + * control file already saves the start LSN position of the last + * checkpoint run. If an exclusive lock is taken for WAL insertion, + * there is actually no need to update all the progression fields, so + * just do it on the first one. + */ + if ((flags & XLOG_INSERT_NO_PROGRESS) == 0) + { + if (holdingAllLocks) + WALInsertLocks[0].l.progressAt = StartPos; + else + WALInsertLocks[MyLockNo].l.progressAt = StartPos; + } + if (inserted) { /* @@ -4732,6 +4775,7 @@ XLOGShmemInit(void) { LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT); WALInsertLocks[i].l.insertingAt = InvalidXLogRecPtr; + WALInsertLocks[i].l.progressAt = InvalidXLogRecPtr; } /* @@ -7917,6 +7961,55 @@ GetFlushRecPtr(void) } /* + * GetProgressRecPtr -- Returns the newest WAL activity position, aimed + * at the last significant WAL activity, or in other words any activity + * not referring to standby logging as of now. Finding the last activity + * position is done by scanning each WAL insertion lock by taking directly + * the light-weight lock associated to it. + */ +XLogRecPtr +GetProgressRecPtr(void) +{ + XLogRecPtr res = InvalidXLogRecPtr; + int i; + + /* + * Look at the latest LSN position referring to the activity done by + * WAL insertion. An exclusive lock is taken because currently the + * locking logic for WAL insertion only expects such a level of locking. + * Taking a lock is as well necessary to prevent potential torn reads + * on some platforms. + */ + for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++) + { + XLogRecPtr progress_lsn; + + LWLockAcquire(&WALInsertLocks[i].l.lock, LW_EXCLUSIVE); + progress_lsn = WALInsertLocks[i].l.progressAt; + LWLockRelease(&WALInsertLocks[i].l.lock); + + if (res < progress_lsn) + res = progress_lsn; + } + + return res; +} + +/* + * GetLastCheckpointRecPtr -- Returns the last checkpoint insert position. + */ +XLogRecPtr +GetLastCheckpointRecPtr(void) +{ + XLogRecPtr ckpt_lsn; + + LWLockAcquire(ControlFileLock, LW_SHARED); + ckpt_lsn = ControlFile->checkPoint; + LWLockRelease(ControlFileLock); + return ckpt_lsn; +} + +/* * Get the time of the last xlog segment switch */ pg_time_t @@ -8176,7 +8269,7 @@ CreateCheckPoint(int flags) uint32 freespace; XLogRecPtr PriorRedoPtr; XLogRecPtr curInsert; - XLogRecPtr prevPtr; + XLogRecPtr progress_lsn; VirtualTransactionId *vxids; int nvxids; @@ -8257,34 +8350,30 @@ CreateCheckPoint(int flags) checkPoint.oldestActiveXid = InvalidTransactionId; /* + * Get progress before acquiring insert locks to shorten the locked + * section waiting ahead. + */ + progress_lsn = GetProgressRecPtr(); + + /* * We must block concurrent insertions while examining insert state to * determine the checkpoint REDO pointer. */ WALInsertLockAcquireExclusive(); curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos); - prevPtr = XLogBytePosToRecPtr(Insert->PrevBytePos); /* - * If this isn't a shutdown or forced checkpoint, and we have not inserted - * any XLOG records since the start of the last checkpoint, skip the - * checkpoint. The idea here is to avoid inserting duplicate checkpoints - * when the system is idle. That wastes log space, and more importantly it - * exposes us to possible loss of both current and previous checkpoint - * records if the machine crashes just as we're writing the update. - * (Perhaps it'd make even more sense to checkpoint only when the previous - * checkpoint record is in a different xlog page?) - * - * If the previous checkpoint crossed a WAL segment, however, we create - * the checkpoint anyway, to have the latest checkpoint fully contained in - * the new segment. This is for a little bit of extra robustness: it's - * better if you don't need to keep two WAL segments around to recover the - * checkpoint. + * If this isn't a shutdown or forced checkpoint, and if there has been no + * WAL activity, skip the checkpoint. The idea here is to avoid inserting + * duplicate checkpoints when the system is idle. That wastes log space, + * and more importantly it exposes us to possible loss of both current and + * previous checkpoint records if the machine crashes just as we're writing + * the update. */ if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_FORCE)) == 0) { - if (prevPtr == ControlFile->checkPointCopy.redo && - prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE) + if (progress_lsn == ControlFile->checkPoint) { WALInsertLockRelease(); LWLockRelease(CheckpointLock); @@ -8463,7 +8552,7 @@ CreateCheckPoint(int flags) XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint)); recptr = XLogInsert(RM_XLOG_ID, shutdown ? XLOG_CHECKPOINT_SHUTDOWN : - XLOG_CHECKPOINT_ONLINE); + XLOG_CHECKPOINT_ONLINE, 0); XLogFlush(recptr); @@ -8617,7 +8706,7 @@ CreateEndOfRecoveryRecord(void) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_end_of_recovery)); - recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY); + recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, 0); XLogFlush(recptr); @@ -8988,7 +9077,7 @@ XLogPutNextOid(Oid nextOid) { XLogBeginInsert(); XLogRegisterData((char *) (&nextOid), sizeof(Oid)); - (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID); + (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID, 0); /* * We need not flush the NEXTOID record immediately, because any of the @@ -9027,7 +9116,7 @@ RequestXLogSwitch(void) /* XLOG SWITCH has no data */ XLogBeginInsert(); - RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH); + RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH, 0); return RecPtr; } @@ -9047,7 +9136,7 @@ XLogRestorePoint(const char *rpName) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point)); - RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT); + RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT, 0); ereport(LOG, (errmsg("restore point \"%s\" created at %X/%X", @@ -9094,7 +9183,7 @@ XLogReportParameters(void) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xlrec)); - recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE); + recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE, 0); XLogFlush(recptr); } @@ -9156,7 +9245,7 @@ UpdateFullPageWrites(void) XLogBeginInsert(); XLogRegisterData((char *) (&fullPageWrites), sizeof(bool)); - XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE); + XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE, 0); } if (!fullPageWrites) @@ -10469,7 +10558,7 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) */ XLogBeginInsert(); XLogRegisterData((char *) (&startpoint), sizeof(startpoint)); - stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END); + stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END, 0); stoptli = ThisTimeLineID; /* diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index c37003a..bf800db 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -396,7 +396,9 @@ XLogIncludeOrigin(void) /* * Insert an XLOG record having the specified RMID and info bytes, with the * body of the record being the data and buffer references registered earlier - * with XLogRegister* calls. + * with XLogRegister* calls. 'flags' allow users to control more in-depth + * operations during WAL record insertion. As of now, this gives control on + * if the progress LSN positions are updated or not. * * Returns XLOG pointer to end of record (beginning of next record). * This can be used as LSN for data pages affected by the logged action. @@ -405,7 +407,7 @@ XLogIncludeOrigin(void) * WAL rule "write the log before the data".) */ XLogRecPtr -XLogInsert(RmgrId rmid, uint8 info) +XLogInsert(RmgrId rmid, uint8 info, uint8 flags) { XLogRecPtr EndPos; @@ -450,7 +452,7 @@ XLogInsert(RmgrId rmid, uint8 info) rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites, &fpw_lsn); - EndPos = XLogInsertRecord(rdt, fpw_lsn); + EndPos = XLogInsertRecord(rdt, fpw_lsn, flags); } while (EndPos == InvalidXLogRecPtr); XLogResetInsertion(); @@ -915,7 +917,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std) BufferGetTag(buffer, &rnode, &forkno, &blkno); XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer, flags); - recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI_FOR_HINT); + recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI_FOR_HINT, 0); } return recptr; @@ -946,7 +948,7 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, XLogBeginInsert(); XLogRegisterBlock(0, rnode, forkNum, blkno, page, flags); - recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI); + recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI, 0); /* * The page may be uninitialized. If so, we can't set the LSN because that diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index fe68c99..1064600 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -134,7 +134,7 @@ log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xlrec)); - XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE); + XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE, 0); } /* @@ -273,7 +273,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks) XLogRegisterData((char *) &xlrec, sizeof(xlrec)); lsn = XLogInsert(RM_SMGR_ID, - XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE); + XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE, 0); /* * Flush, because otherwise the truncation of the main relation might diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index c1c0223..feae710 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -630,7 +630,8 @@ createdb(const CreatedbStmt *stmt) XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec)); (void) XLogInsert(RM_DBASE_ID, - XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); + XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE, + 0); } } heap_endscan(scan); @@ -1238,7 +1239,8 @@ movedb(const char *dbname, const char *tblspcname) XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec)); (void) XLogInsert(RM_DBASE_ID, - XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); + XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE, + 0); } /* @@ -1338,7 +1340,8 @@ movedb(const char *dbname, const char *tblspcname) XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec)); (void) XLogInsert(RM_DBASE_ID, - XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE); + XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE, + 0); } /* Now it's safe to release the database lock */ @@ -1879,7 +1882,8 @@ remove_dbtablespaces(Oid db_id) XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec)); (void) XLogInsert(RM_DBASE_ID, - XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE); + XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE, + 0); } pfree(dstpath); diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index c98f981..766b542 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -388,7 +388,7 @@ fill_seq_with_data(Relation rel, HeapTuple tuple) XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); XLogRegisterData((char *) tuple->t_data, tuple->t_len); - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, 0); PageSetLSN(page, recptr); } @@ -472,7 +472,7 @@ AlterSequence(AlterSeqStmt *stmt) XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len); - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, 0); PageSetLSN(page, recptr); } @@ -749,7 +749,7 @@ nextval_internal(Oid relid) XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len); - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, 0); PageSetLSN(page, recptr); } @@ -932,7 +932,7 @@ do_setval(Oid relid, int64 next, bool iscalled) XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len); - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, 0); PageSetLSN(page, recptr); } diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 1ff5728..2fde0ac 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -368,7 +368,7 @@ CreateTableSpace(CreateTableSpaceStmt *stmt) offsetof(xl_tblspc_create_rec, ts_path)); XLogRegisterData((char *) location, strlen(location) + 1); - (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE); + (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, 0); } /* @@ -523,7 +523,7 @@ DropTableSpace(DropTableSpaceStmt *stmt) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_tblspc_drop_rec)); - (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP); + (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, 0); } /* diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 4ff4caf..c7e56a0 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -78,12 +78,12 @@ int BgWriterDelay = 200; #define LOG_SNAPSHOT_INTERVAL_MS 15000 /* - * LSN and timestamp at which we last issued a LogStandbySnapshot(), to avoid - * doing so too often or repeatedly if there has been no other write activity - * in the system. + * Last progress LSN and timestamp at which we last logged a standby + * snapshot, to avoid doing so too often or repeatedly if there has been + * no other write activity in the system. */ static TimestampTz last_snapshot_ts; -static XLogRecPtr last_snapshot_lsn = InvalidXLogRecPtr; +static XLogRecPtr last_progress_lsn = InvalidXLogRecPtr; /* * Flags set by interrupt handlers for later service in the main loop. @@ -301,7 +301,7 @@ BackgroundWriterMain(void) * check whether there has been any WAL inserted since the last time * we've logged a running xacts. * - * We do this logging in the bgwriter as its the only process that is + * We do this logging in the bgwriter as it is the only process that is * run regularly and returns to its mainloop all the time. E.g. * Checkpointer, when active, is barely ever in its mainloop and thus * makes it hard to log regularly. @@ -310,19 +310,23 @@ BackgroundWriterMain(void) { TimestampTz timeout = 0; TimestampTz now = GetCurrentTimestamp(); + XLogRecPtr current_progress_lsn = GetProgressRecPtr(); timeout = TimestampTzPlusMilliseconds(last_snapshot_ts, LOG_SNAPSHOT_INTERVAL_MS); /* - * only log if enough time has passed and some xlog record has - * been inserted. + * only log if enough time has passed, that some WAL activity + * has happened since last checkpoint, and that some xlog record + * has been inserted. */ if (now >= timeout && - last_snapshot_lsn != GetXLogInsertRecPtr()) + GetLastCheckpointRecPtr() < current_progress_lsn && + last_progress_lsn < current_progress_lsn) { - last_snapshot_lsn = LogStandbySnapshot(); + (void) LogStandbySnapshot(); last_snapshot_ts = now; + last_progress_lsn = current_progress_lsn; } } diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c index 0caf7a3..771d63e 100644 --- a/src/backend/replication/logical/origin.c +++ b/src/backend/replication/logical/origin.c @@ -362,7 +362,7 @@ replorigin_drop(RepOriginId roident) xlrec.node_id = roident; XLogBeginInsert(); XLogRegisterData((char *) (&xlrec), sizeof(xlrec)); - XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_DROP); + XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_DROP, 0); } /* then reset the in-memory entry */ @@ -913,7 +913,7 @@ replorigin_advance(RepOriginId node, XLogBeginInsert(); XLogRegisterData((char *) (&xlrec), sizeof(xlrec)); - XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_SET); + XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_SET, 0); } /* diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 7141eb8..68cf5cc 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -2166,7 +2166,7 @@ InitBufferPoolAccess(void) MemSet(&hash_ctl, 0, sizeof(hash_ctl)); hash_ctl.keysize = sizeof(int32); - hash_ctl.entrysize = sizeof(PrivateRefCountArray); + hash_ctl.entrysize = sizeof(PrivateRefCountEntry); PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl, HASH_ELEM | HASH_BLOBS); diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 91218d0..97e8962 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -2313,6 +2313,29 @@ HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids) PGPROC * BackendPidGetProc(int pid) { + PGPROC *result; + + if (pid == 0) /* never match dummy PGPROCs */ + return NULL; + + LWLockAcquire(ProcArrayLock, LW_SHARED); + + result = BackendPidGetProcWithLock(pid); + + LWLockRelease(ProcArrayLock); + + return result; +} + +/* + * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID + * + * Same as above, except caller must be holding ProcArrayLock. The found + * entry, if any, can be assumed to be valid as long as the lock remains held. + */ +PGPROC * +BackendPidGetProcWithLock(int pid) +{ PGPROC *result = NULL; ProcArrayStruct *arrayP = procArray; int index; @@ -2320,8 +2343,6 @@ BackendPidGetProc(int pid) if (pid == 0) /* never match dummy PGPROCs */ return NULL; - LWLockAcquire(ProcArrayLock, LW_SHARED); - for (index = 0; index < arrayP->numProcs; index++) { PGPROC *proc = &allProcs[arrayP->pgprocnos[index]]; @@ -2333,8 +2354,6 @@ BackendPidGetProc(int pid) } } - LWLockRelease(ProcArrayLock); - return result; } diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 3f5df9a..d6e2bdb 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -922,7 +922,8 @@ LogStandbySnapshot(void) * The definitions of RunningTransactionsData and xl_xact_running_xacts * are similar. We keep them separate because xl_xact_running_xacts * is a contiguous chunk of memory and never exists fully until it is - * assembled in WAL. + * assembled in WAL. Progress of WAL activity is not updated when + * this record is logged. */ static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) @@ -946,7 +947,8 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) XLogRegisterData((char *) CurrRunningXacts->xids, (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId)); - recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS); + recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS, + XLOG_INSERT_NO_PROGRESS); if (CurrRunningXacts->subxid_overflow) elog(trace_recovery(DEBUG2), @@ -994,7 +996,8 @@ LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks) XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks)); XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock)); - (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK); + (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK, + XLOG_INSERT_NO_PROGRESS); } /* diff --git a/src/backend/storage/lmgr/README b/src/backend/storage/lmgr/README index c399618..56b0a12 100644 --- a/src/backend/storage/lmgr/README +++ b/src/backend/storage/lmgr/README @@ -650,27 +650,16 @@ those cases so that they no longer use heavyweight locking in the first place (which is not a crazy idea, given that such lock acquisitions are not expected to deadlock and that heavyweight lock acquisition is fairly slow anyway). -Group locking adds four new members to each PGPROC: lockGroupLeaderIdentifier, -lockGroupLeader, lockGroupMembers, and lockGroupLink. The first is simply a -safety mechanism. A newly started parallel worker has to try to join the -leader's lock group, but it has no guarantee that the group leader is still -alive by the time it gets started. We try to ensure that the parallel leader -dies after all workers in normal cases, but also that the system could survive -relatively intact if that somehow fails to happen. This is one of the -precautions against such a scenario: the leader relays its PGPROC and also its -PID to the worker, and the worker fails to join the lock group unless the -given PGPROC still has the same PID. We assume that PIDs are not recycled -quickly enough for this interlock to fail. - -A PGPROC's lockGroupLeader is NULL for processes not involved in parallel -query. When a process wants to cooperate with parallel workers, it becomes a -lock group leader, which means setting this field to point to its own -PGPROC. When a parallel worker starts up, it points this field at the leader, -with the above-mentioned interlock. The lockGroupMembers field is only used in +Group locking adds three new members to each PGPROC: lockGroupLeader, +lockGroupMembers, and lockGroupLink. A PGPROC's lockGroupLeader is NULL for +processes not involved in parallel query. When a process wants to cooperate +with parallel workers, it becomes a lock group leader, which means setting +this field to point to its own PGPROC. When a parallel worker starts up, it +points this field at the leader. The lockGroupMembers field is only used in the leader; it is a list of the member PGPROCs of the lock group (the leader and all workers). The lockGroupLink field is the list link for this list. -All four of these fields are considered to be protected by a lock manager +All three of these fields are considered to be protected by a lock manager partition lock. The partition lock that protects these fields within a given lock group is chosen by taking the leader's pgprocno modulo the number of lock manager partitions. This unusual arrangement has a major advantage: the @@ -679,6 +668,18 @@ change while the deadlock detector is running, because it knows that it holds all the lock manager locks. Also, holding this single lock allows safe manipulation of the lockGroupMembers list for the lock group. +We need an additional interlock when setting these fields, because a newly +started parallel worker has to try to join the leader's lock group, but it +has no guarantee that the group leader is still alive by the time it gets +started. We try to ensure that the parallel leader dies after all workers +in normal cases, but also that the system could survive relatively intact +if that somehow fails to happen. This is one of the precautions against +such a scenario: the leader relays its PGPROC and also its PID to the +worker, and the worker fails to join the lock group unless the given PGPROC +still has the same PID and is still a lock group leader. We assume that +PIDs are not recycled quickly enough for this interlock to fail. + + User Locks (Advisory Locks) --------------------------- diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index fef59a2..a458c68 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -21,7 +21,7 @@ * * Interface: * - * InitLocks(), GetLocksMethodTable(), + * InitLocks(), GetLocksMethodTable(), GetLockTagsMethodTable(), * LockAcquire(), LockRelease(), LockReleaseAll(), * LockCheckConflicts(), GrantLock() * @@ -41,6 +41,7 @@ #include "pg_trace.h" #include "pgstat.h" #include "storage/proc.h" +#include "storage/procarray.h" #include "storage/sinvaladt.h" #include "storage/spin.h" #include "storage/standby.h" @@ -356,6 +357,8 @@ static void CleanUpLock(LOCK *lock, PROCLOCK *proclock, static void LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc, LOCKTAG *locktag, LOCKMODE lockmode, bool decrement_strong_lock_count); +static void GetSingleProcBlockerStatusData(PGPROC *blocked_proc, + BlockedProcsData *data); /* @@ -462,6 +465,18 @@ GetLocksMethodTable(const LOCK *lock) return LockMethods[lockmethodid]; } +/* + * Fetch the lock method table associated with a given locktag + */ +LockMethod +GetLockTagsMethodTable(const LOCKTAG *locktag) +{ + LOCKMETHODID lockmethodid = (LOCKMETHODID) locktag->locktag_lockmethodid; + + Assert(0 < lockmethodid && lockmethodid < lengthof(LockMethods)); + return LockMethods[lockmethodid]; +} + /* * Compute the hash code associated with a LOCKTAG. @@ -3406,7 +3421,10 @@ GetLockStatusData(void) * impractical (in particular, note MAX_SIMUL_LWLOCKS). It shouldn't * matter too much, because none of these locks can be involved in lock * conflicts anyway - anything that might must be present in the main lock - * table. + * table. (For the same reason, we don't sweat about making leaderPid + * completely valid. We cannot safely dereference another backend's + * lockGroupLeader field without holding all lock partition locks, and + * it's not worth that.) */ for (i = 0; i < ProcGlobal->allProcCount; ++i) { @@ -3439,6 +3457,7 @@ GetLockStatusData(void) instance->backend = proc->backendId; instance->lxid = proc->lxid; instance->pid = proc->pid; + instance->leaderPid = proc->pid; instance->fastpath = true; el++; @@ -3466,6 +3485,7 @@ GetLockStatusData(void) instance->backend = proc->backendId; instance->lxid = proc->lxid; instance->pid = proc->pid; + instance->leaderPid = proc->pid; instance->fastpath = true; el++; @@ -3517,6 +3537,7 @@ GetLockStatusData(void) instance->backend = proc->backendId; instance->lxid = proc->lxid; instance->pid = proc->pid; + instance->leaderPid = proclock->groupLeader->pid; instance->fastpath = false; el++; @@ -3538,6 +3559,197 @@ GetLockStatusData(void) } /* + * GetBlockerStatusData - Return a summary of the lock manager's state + * concerning locks that are blocking the specified PID or any member of + * the PID's lock group, for use in a user-level reporting function. + * + * For each PID within the lock group that is awaiting some heavyweight lock, + * the return data includes an array of LockInstanceData objects, which are + * the same data structure used by GetLockStatusData; but unlike that function, + * this one reports only the PROCLOCKs associated with the lock that that PID + * is blocked on. (Hence, all the locktags should be the same for any one + * blocked PID.) In addition, we return an array of the PIDs of those backends + * that are ahead of the blocked PID in the lock's wait queue. These can be + * compared with the PIDs in the LockInstanceData objects to determine which + * waiters are ahead of or behind the blocked PID in the queue. + * + * If blocked_pid isn't a valid backend PID or nothing in its lock group is + * waiting on any heavyweight lock, return empty arrays. + * + * The design goal is to hold the LWLocks for as short a time as possible; + * thus, this function simply makes a copy of the necessary data and releases + * the locks, allowing the caller to contemplate and format the data for as + * long as it pleases. + */ +BlockedProcsData * +GetBlockerStatusData(int blocked_pid) +{ + BlockedProcsData *data; + PGPROC *proc; + int i; + + data = (BlockedProcsData *) palloc(sizeof(BlockedProcsData)); + + /* + * Guess how much space we'll need, and preallocate. Most of the time + * this will avoid needing to do repalloc while holding the LWLocks. (We + * assume, but check with an Assert, that MaxBackends is enough entries + * for the procs[] array; the other two could need enlargement, though.) + */ + data->nprocs = data->nlocks = data->npids = 0; + data->maxprocs = data->maxlocks = data->maxpids = MaxBackends; + data->procs = (BlockedProcData *) palloc(sizeof(BlockedProcData) * data->maxprocs); + data->locks = (LockInstanceData *) palloc(sizeof(LockInstanceData) * data->maxlocks); + data->waiter_pids = (int *) palloc(sizeof(int) * data->maxpids); + + /* + * In order to search the ProcArray for blocked_pid and assume that that + * entry won't immediately disappear under us, we must hold ProcArrayLock. + * In addition, to examine the lock grouping fields of any other backend, + * we must hold all the hash partition locks. (Only one of those locks is + * actually relevant for any one lock group, but we can't know which one + * ahead of time.) It's fairly annoying to hold all those locks + * throughout this, but it's no worse than GetLockStatusData(), and it + * does have the advantage that we're guaranteed to return a + * self-consistent instantaneous state. + */ + LWLockAcquire(ProcArrayLock, LW_SHARED); + + proc = BackendPidGetProcWithLock(blocked_pid); + + /* Nothing to do if it's gone */ + if (proc != NULL) + { + /* + * Acquire lock on the entire shared lock data structure. See notes + * in GetLockStatusData(). + */ + for (i = 0; i < NUM_LOCK_PARTITIONS; i++) + LWLockAcquire(LockHashPartitionLockByIndex(i), LW_SHARED); + + if (proc->lockGroupLeader == NULL) + { + /* Easy case, proc is not a lock group member */ + GetSingleProcBlockerStatusData(proc, data); + } + else + { + /* Examine all procs in proc's lock group */ + dlist_iter iter; + + dlist_foreach(iter, &proc->lockGroupLeader->lockGroupMembers) + { + PGPROC *memberProc; + + memberProc = dlist_container(PGPROC, lockGroupLink, iter.cur); + GetSingleProcBlockerStatusData(memberProc, data); + } + } + + /* + * And release locks. See notes in GetLockStatusData(). + */ + for (i = NUM_LOCK_PARTITIONS; --i >= 0;) + LWLockRelease(LockHashPartitionLockByIndex(i)); + + Assert(data->nprocs <= data->maxprocs); + } + + LWLockRelease(ProcArrayLock); + + return data; +} + +/* Accumulate data about one possibly-blocked proc for GetBlockerStatusData */ +static void +GetSingleProcBlockerStatusData(PGPROC *blocked_proc, BlockedProcsData *data) +{ + LOCK *theLock = blocked_proc->waitLock; + BlockedProcData *bproc; + SHM_QUEUE *procLocks; + PROCLOCK *proclock; + PROC_QUEUE *waitQueue; + PGPROC *proc; + int queue_size; + int i; + + /* Nothing to do if this proc is not blocked */ + if (theLock == NULL) + return; + + /* Set up a procs[] element */ + bproc = &data->procs[data->nprocs++]; + bproc->pid = blocked_proc->pid; + bproc->first_lock = data->nlocks; + bproc->first_waiter = data->npids; + + /* + * We may ignore the proc's fast-path arrays, since nothing in those could + * be related to a contended lock. + */ + + /* Collect all PROCLOCKs associated with theLock */ + procLocks = &(theLock->procLocks); + proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, + offsetof(PROCLOCK, lockLink)); + while (proclock) + { + PGPROC *proc = proclock->tag.myProc; + LOCK *lock = proclock->tag.myLock; + LockInstanceData *instance; + + if (data->nlocks >= data->maxlocks) + { + data->maxlocks += MaxBackends; + data->locks = (LockInstanceData *) + repalloc(data->locks, sizeof(LockInstanceData) * data->maxlocks); + } + + instance = &data->locks[data->nlocks]; + memcpy(&instance->locktag, &lock->tag, sizeof(LOCKTAG)); + instance->holdMask = proclock->holdMask; + if (proc->waitLock == lock) + instance->waitLockMode = proc->waitLockMode; + else + instance->waitLockMode = NoLock; + instance->backend = proc->backendId; + instance->lxid = proc->lxid; + instance->pid = proc->pid; + instance->leaderPid = proclock->groupLeader->pid; + instance->fastpath = false; + data->nlocks++; + + proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->lockLink, + offsetof(PROCLOCK, lockLink)); + } + + /* Enlarge waiter_pids[] if it's too small to hold all wait queue PIDs */ + waitQueue = &(theLock->waitProcs); + queue_size = waitQueue->size; + + if (queue_size > data->maxpids - data->npids) + { + data->maxpids = Max(data->maxpids + MaxBackends, + data->npids + queue_size); + data->waiter_pids = (int *) repalloc(data->waiter_pids, + sizeof(int) * data->maxpids); + } + + /* Collect PIDs from the lock's wait queue, stopping at blocked_proc */ + proc = (PGPROC *) waitQueue->links.next; + for (i = 0; i < queue_size; i++) + { + if (proc == blocked_proc) + break; + data->waiter_pids[data->npids++] = proc->pid; + proc = (PGPROC *) proc->links.next; + } + + bproc->num_locks = data->nlocks - bproc->first_lock; + bproc->num_waiters = data->npids - bproc->first_waiter; +} + +/* * Returns a list of currently held AccessExclusiveLocks, for use by * LogStandbySnapshot(). The result is a palloc'd array, * with the number of elements returned into *nlocks. diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 114fba0..6453b88 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -401,7 +401,6 @@ InitProcess(void) pg_atomic_init_u32(&MyProc->procArrayGroupNext, INVALID_PGPROCNO); /* Check that group locking fields are in a proper initial state. */ - Assert(MyProc->lockGroupLeaderIdentifier == 0); Assert(MyProc->lockGroupLeader == NULL); Assert(dlist_is_empty(&MyProc->lockGroupMembers)); @@ -565,7 +564,6 @@ InitAuxiliaryProcess(void) SwitchToSharedLatch(); /* Check that group locking fields are in a proper initial state. */ - Assert(MyProc->lockGroupLeaderIdentifier == 0); Assert(MyProc->lockGroupLeader == NULL); Assert(dlist_is_empty(&MyProc->lockGroupMembers)); @@ -822,7 +820,6 @@ ProcKill(int code, Datum arg) dlist_delete(&MyProc->lockGroupLink); if (dlist_is_empty(&leader->lockGroupMembers)) { - leader->lockGroupLeaderIdentifier = 0; leader->lockGroupLeader = NULL; if (leader != MyProc) { @@ -1771,7 +1768,6 @@ BecomeLockGroupLeader(void) leader_lwlock = LockHashPartitionLockByProc(MyProc); LWLockAcquire(leader_lwlock, LW_EXCLUSIVE); MyProc->lockGroupLeader = MyProc; - MyProc->lockGroupLeaderIdentifier = MyProcPid; dlist_push_head(&MyProc->lockGroupMembers, &MyProc->lockGroupLink); LWLockRelease(leader_lwlock); } @@ -1795,6 +1791,9 @@ BecomeLockGroupMember(PGPROC *leader, int pid) /* Group leader can't become member of group */ Assert(MyProc != leader); + /* Can't already be a member of a group */ + Assert(MyProc->lockGroupLeader == NULL); + /* PID must be valid. */ Assert(pid != 0); @@ -1808,9 +1807,10 @@ BecomeLockGroupMember(PGPROC *leader, int pid) leader_lwlock = LockHashPartitionLockByProc(leader); LWLockAcquire(leader_lwlock, LW_EXCLUSIVE); - /* Try to join the group */ - if (leader->lockGroupLeaderIdentifier == pid) + /* Is this the leader we're looking for? */ + if (leader->pid == pid && leader->lockGroupLeader == leader) { + /* OK, join the group */ ok = true; MyProc->lockGroupLeader = leader; dlist_push_tail(&leader->lockGroupMembers, &MyProc->lockGroupLink); diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index 73c78e9..6bcab81 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -18,6 +18,7 @@ #include "funcapi.h" #include "miscadmin.h" #include "storage/predicate_internals.h" +#include "utils/array.h" #include "utils/builtins.h" @@ -99,7 +100,7 @@ pg_lock_status(PG_FUNCTION_ARGS) oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ - /* this had better match pg_locks view in system_views.sql */ + /* this had better match function's declaration in pg_proc.h */ tupdesc = CreateTemplateTupleDesc(NUM_LOCK_STATUS_COLUMNS, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", TEXTOID, -1, 0); @@ -395,6 +396,128 @@ pg_lock_status(PG_FUNCTION_ARGS) /* + * pg_blocking_pids - produce an array of the PIDs blocking given PID + * + * The reported PIDs are those that hold a lock conflicting with blocked_pid's + * current request (hard block), or are requesting such a lock and are ahead + * of blocked_pid in the lock's wait queue (soft block). + * + * In parallel-query cases, we report all PIDs blocking any member of the + * given PID's lock group, and the reported PIDs are those of the blocking + * PIDs' lock group leaders. This allows callers to compare the result to + * lists of clients' pg_backend_pid() results even during a parallel query. + * + * Parallel query makes it possible for there to be duplicate PIDs in the + * result (either because multiple waiters are blocked by same PID, or + * because multiple blockers have same group leader PID). We do not bother + * to eliminate such duplicates from the result. + * + * We need not consider predicate locks here, since those don't block anything. + */ +Datum +pg_blocking_pids(PG_FUNCTION_ARGS) +{ + int blocked_pid = PG_GETARG_INT32(0); + Datum *arrayelems; + int narrayelems; + BlockedProcsData *lockData; /* state data from lmgr */ + int i, + j; + + /* Collect a snapshot of lock manager state */ + lockData = GetBlockerStatusData(blocked_pid); + + /* We can't need more output entries than there are reported PROCLOCKs */ + arrayelems = (Datum *) palloc(lockData->nlocks * sizeof(Datum)); + narrayelems = 0; + + /* For each blocked proc in the lock group ... */ + for (i = 0; i < lockData->nprocs; i++) + { + BlockedProcData *bproc = &lockData->procs[i]; + LockInstanceData *instances = &lockData->locks[bproc->first_lock]; + int *preceding_waiters = &lockData->waiter_pids[bproc->first_waiter]; + LockInstanceData *blocked_instance; + LockMethod lockMethodTable; + int conflictMask; + + /* + * Locate the blocked proc's own entry in the LockInstanceData array. + * There should be exactly one matching entry. + */ + blocked_instance = NULL; + for (j = 0; j < bproc->num_locks; j++) + { + LockInstanceData *instance = &(instances[j]); + + if (instance->pid == bproc->pid) + { + Assert(blocked_instance == NULL); + blocked_instance = instance; + } + } + Assert(blocked_instance != NULL); + + lockMethodTable = GetLockTagsMethodTable(&(blocked_instance->locktag)); + conflictMask = lockMethodTable->conflictTab[blocked_instance->waitLockMode]; + + /* Now scan the PROCLOCK data for conflicting procs */ + for (j = 0; j < bproc->num_locks; j++) + { + LockInstanceData *instance = &(instances[j]); + + /* A proc never blocks itself, so ignore that entry */ + if (instance == blocked_instance) + continue; + /* Members of same lock group never block each other, either */ + if (instance->leaderPid == blocked_instance->leaderPid) + continue; + + if (conflictMask & instance->holdMask) + { + /* hard block: blocked by lock already held by this entry */ + } + else if (instance->waitLockMode != NoLock && + (conflictMask & LOCKBIT_ON(instance->waitLockMode))) + { + /* conflict in lock requests; who's in front in wait queue? */ + bool ahead = false; + int k; + + for (k = 0; k < bproc->num_waiters; k++) + { + if (preceding_waiters[k] == instance->pid) + { + /* soft block: this entry is ahead of blocked proc */ + ahead = true; + break; + } + } + if (!ahead) + continue; /* not blocked by this entry */ + } + else + { + /* not blocked by this entry */ + continue; + } + + /* blocked by this entry, so emit a record */ + arrayelems[narrayelems++] = Int32GetDatum(instance->leaderPid); + } + } + + /* Assert we didn't overrun arrayelems[] */ + Assert(narrayelems <= lockData->nlocks); + + /* Construct array, using hardwired knowledge about int4 type */ + PG_RETURN_ARRAYTYPE_P(construct_array(arrayelems, narrayelems, + INT4OID, + sizeof(int32), true, 'i')); +} + + +/* * Functions for manipulating advisory locks * * We make use of the locktag fields as follows: diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index 6a63b5e..146ad3b 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -767,7 +767,7 @@ write_relmap_file(bool shared, RelMapFile *newmap, XLogRegisterData((char *) (&xlrec), MinSizeOfRelmapUpdate); XLogRegisterData((char *) newmap, sizeof(RelMapFile)); - lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE); + lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE, 0); /* As always, WAL must hit the disk before the data update does */ XLogFlush(lsn); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index ecd30ce..7844844 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -210,7 +210,9 @@ extern CheckpointStatsData CheckpointStats; struct XLogRecData; -extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, XLogRecPtr fpw_lsn); +extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, + XLogRecPtr fpw_lsn, + uint8 flags); extern void XLogFlush(XLogRecPtr RecPtr); extern bool XLogBackgroundFlush(void); extern bool XLogNeedsFlush(XLogRecPtr RecPtr); @@ -261,6 +263,8 @@ extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p) extern XLogRecPtr GetRedoRecPtr(void); extern XLogRecPtr GetInsertRecPtr(void); extern XLogRecPtr GetFlushRecPtr(void); +extern XLogRecPtr GetProgressRecPtr(void); +extern XLogRecPtr GetLastCheckpointRecPtr(void); extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch); extern void RemovePromoteSignalFiles(void); diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index cc0177e..271a26e 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -38,10 +38,14 @@ #define REGBUF_KEEP_DATA 0x10/* include data even if a full-page image is * taken */ +/* flags for XLogInsert */ +#define XLOG_INSERT_NO_PROGRESS 0x01 /* do not update progress LSN + * when inserting record */ + /* prototypes for public functions in xloginsert.c: */ extern void XLogBeginInsert(void); extern void XLogIncludeOrigin(void); -extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info); +extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, uint8 flags); extern void XLogEnsureRecordSpace(int nbuffers, int ndatas); extern void XLogRegisterData(char *data, int len); extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 8687abb..aff12d3 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201602201 +#define CATALOG_VERSION_NO 201602221 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 59c50d9..62b9125 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -3012,6 +3012,8 @@ DATA(insert OID = 3329 ( pg_show_all_file_settings PGNSP PGUID 12 1 1000 0 0 f DESCR("show config file settings"); DATA(insert OID = 1371 ( pg_lock_status PGNSP PGUID 12 1 1000 0 0 f f f f t t v s 0 0 2249 "" "{25,26,26,23,21,25,28,26,26,21,25,23,25,16,16}" "{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}" "{locktype,database,relation,page,tuple,virtualxid,transactionid,classid,objid,objsubid,virtualtransaction,pid,mode,granted,fastpath}" _null_ _null_ pg_lock_status _null_ _null_ _null_ )); DESCR("view system lock information"); +DATA(insert OID = 2561 ( pg_blocking_pids PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 1007 "23" _null_ _null_ _null_ _null_ _null_ pg_blocking_pids _null_ _null_ _null_ )); +DESCR("get array of PIDs of sessions blocking specified backend PID"); DATA(insert OID = 1065 ( pg_prepared_xact PGNSP PGUID 12 1 1000 0 0 f f f f t t v s 0 0 2249 "" "{28,25,1184,26,26}" "{o,o,o,o,o}" "{transaction,gid,prepared,ownerid,dbid}" _null_ _null_ pg_prepared_xact _null_ _null_ _null_ )); DESCR("view two-phase transactions"); DATA(insert OID = 3819 ( pg_get_multixact_members PGNSP PGUID 12 1 1000 0 0 f f f f t t v s 1 0 2249 "28" "{28,28,25}" "{i,o,o}" "{multixid,xid,mode}" _null_ _null_ pg_get_multixact_members _null_ _null_ _null_ )); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 703eaf2..788d50a 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -346,7 +346,7 @@ typedef struct PROCLOCK PROCLOCKTAG tag; /* unique identifier of proclock object */ /* data */ - PGPROC *groupLeader; /* group leader, or NULL if no lock group */ + PGPROC *groupLeader; /* proc's lock group leader, or proc itself */ LOCKMASK holdMask; /* bitmask for lock types currently held */ LOCKMASK releaseMask; /* bitmask for lock types to be released */ SHM_QUEUE lockLink; /* list link in LOCK's list of proclocks */ @@ -423,21 +423,48 @@ typedef struct LOCALLOCK typedef struct LockInstanceData { - LOCKTAG locktag; /* locked object */ + LOCKTAG locktag; /* tag for locked object */ LOCKMASK holdMask; /* locks held by this PGPROC */ LOCKMODE waitLockMode; /* lock awaited by this PGPROC, if any */ BackendId backend; /* backend ID of this PGPROC */ LocalTransactionId lxid; /* local transaction ID of this PGPROC */ int pid; /* pid of this PGPROC */ + int leaderPid; /* pid of group leader; = pid if no group */ bool fastpath; /* taken via fastpath? */ } LockInstanceData; typedef struct LockData { int nelements; /* The length of the array */ - LockInstanceData *locks; + LockInstanceData *locks; /* Array of per-PROCLOCK information */ } LockData; +typedef struct BlockedProcData +{ + int pid; /* pid of a blocked PGPROC */ + /* Per-PROCLOCK information about PROCLOCKs of the lock the pid awaits */ + /* (these fields refer to indexes in BlockedProcsData.locks[]) */ + int first_lock; /* index of first relevant LockInstanceData */ + int num_locks; /* number of relevant LockInstanceDatas */ + /* PIDs of PGPROCs that are ahead of "pid" in the lock's wait queue */ + /* (these fields refer to indexes in BlockedProcsData.waiter_pids[]) */ + int first_waiter; /* index of first preceding waiter */ + int num_waiters; /* number of preceding waiters */ +} BlockedProcData; + +typedef struct BlockedProcsData +{ + BlockedProcData *procs; /* Array of per-blocked-proc information */ + LockInstanceData *locks; /* Array of per-PROCLOCK information */ + int *waiter_pids; /* Array of PIDs of other blocked PGPROCs */ + int nprocs; /* # of valid entries in procs[] array */ + int maxprocs; /* Allocated length of procs[] array */ + int nlocks; /* # of valid entries in locks[] array */ + int maxlocks; /* Allocated length of locks[] array */ + int npids; /* # of valid entries in waiter_pids[] array */ + int maxpids; /* Allocated length of waiter_pids[] array */ +} BlockedProcsData; + /* Result codes for LockAcquire() */ typedef enum @@ -489,6 +516,7 @@ typedef enum */ extern void InitLocks(void); extern LockMethod GetLocksMethodTable(const LOCK *lock); +extern LockMethod GetLockTagsMethodTable(const LOCKTAG *locktag); extern uint32 LockTagHashCode(const LOCKTAG *locktag); extern bool DoLockModesConflict(LOCKMODE mode1, LOCKMODE mode2); extern LockAcquireResult LockAcquire(const LOCKTAG *locktag, @@ -521,6 +549,7 @@ extern void GrantAwaitedLock(void); extern void RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode); extern Size LockShmemSize(void); extern LockData *GetLockStatusData(void); +extern BlockedProcsData *GetBlockerStatusData(int blocked_pid); extern xl_standby_lock *GetRunningTransactionLocks(int *nlocks); extern const char *GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index a9405ce..dbcdd3f 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -166,7 +166,6 @@ struct PGPROC * Support for lock groups. Use LockHashPartitionLockByProc on the group * leader to get the LWLock protecting these fields. */ - int lockGroupLeaderIdentifier; /* MyProcPid, if I'm a leader */ PGPROC *lockGroupLeader; /* lock group leader, if I'm a member */ dlist_head lockGroupMembers; /* list of members, if I'm a leader */ dlist_node lockGroupLink; /* my member link, if I'm a member */ diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index 1fbf4f3..dd37c0c 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -61,6 +61,7 @@ extern VirtualTransactionId *GetVirtualXIDsDelayingChkpt(int *nvxids); extern bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids); extern PGPROC *BackendPidGetProc(int pid); +extern PGPROC *BackendPidGetProcWithLock(int pid); extern int BackendXidGetPid(TransactionId xid); extern bool IsBackendPid(int pid); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 94c1881..7ec93c9 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -1157,6 +1157,7 @@ extern Datum row_security_active_name(PG_FUNCTION_ARGS); /* lockfuncs.c */ extern Datum pg_lock_status(PG_FUNCTION_ARGS); +extern Datum pg_blocking_pids(PG_FUNCTION_ARGS); extern Datum pg_advisory_lock_int8(PG_FUNCTION_ARGS); extern Datum pg_advisory_xact_lock_int8(PG_FUNCTION_ARGS); extern Datum pg_advisory_lock_shared_int8(PG_FUNCTION_ARGS); diff --git a/src/test/isolation/isolationtester.c b/src/test/isolation/isolationtester.c index 0a9d25c..6461ae8 100644 --- a/src/test/isolation/isolationtester.c +++ b/src/test/isolation/isolationtester.c @@ -227,27 +227,12 @@ main(int argc, char **argv) */ initPQExpBuffer(&wait_query); appendPQExpBufferStr(&wait_query, - "SELECT 1 FROM pg_locks holder, pg_locks waiter " - "WHERE NOT waiter.granted AND waiter.pid = $1 " - "AND holder.granted " - "AND holder.pid <> $1 AND holder.pid IN ("); + "SELECT pg_catalog.pg_blocking_pids($1) && '{"); /* The spec syntax requires at least one session; assume that here. */ appendPQExpBufferStr(&wait_query, backend_pids[1]); for (i = 2; i < nconns; i++) - appendPQExpBuffer(&wait_query, ", %s", backend_pids[i]); - appendPQExpBufferStr(&wait_query, - ") " - - "AND holder.locktype IS NOT DISTINCT FROM waiter.locktype " - "AND holder.database IS NOT DISTINCT FROM waiter.database " - "AND holder.relation IS NOT DISTINCT FROM waiter.relation " - "AND holder.page IS NOT DISTINCT FROM waiter.page " - "AND holder.tuple IS NOT DISTINCT FROM waiter.tuple " - "AND holder.virtualxid IS NOT DISTINCT FROM waiter.virtualxid " - "AND holder.transactionid IS NOT DISTINCT FROM waiter.transactionid " - "AND holder.classid IS NOT DISTINCT FROM waiter.classid " - "AND holder.objid IS NOT DISTINCT FROM waiter.objid " - "AND holder.objsubid IS NOT DISTINCT FROM waiter.objsubid "); + appendPQExpBuffer(&wait_query, ",%s", backend_pids[i]); + appendPQExpBufferStr(&wait_query, "}'::integer[]"); res = PQprepare(conns[0], PREP_WAITING, wait_query.data, 0, NULL); if (PQresultStatus(res) != PGRES_COMMAND_OK) @@ -745,21 +730,22 @@ try_complete_step(Step *step, int flags) /* If it's OK for the step to block, check whether it has. */ if (flags & STEP_NONBLOCK) { - int ntuples; + bool waiting; res = PQexecPrepared(conns[0], PREP_WAITING, 1, &backend_pids[step->session + 1], NULL, NULL, 0); - if (PQresultStatus(res) != PGRES_TUPLES_OK) + if (PQresultStatus(res) != PGRES_TUPLES_OK || + PQntuples(res) != 1) { fprintf(stderr, "lock wait query failed: %s", PQerrorMessage(conn)); exit_nicely(); } - ntuples = PQntuples(res); + waiting = ((PQgetvalue(res, 0, 0))[0] == 't'); PQclear(res); - if (ntuples >= 1) /* waiting to acquire a lock */ + if (waiting) /* waiting to acquire a lock */ { if (!(flags & STEP_RETRY)) printf("step %s: %s \n",