From caed7a11517799676a50570bad4b4d3bb412e42a Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Thu, 6 Oct 2022 12:23:25 -0400 Subject: [PATCH v32 1/3] Track IO operation statistics locally Introduce "IOOp", an IO operation done by a backend, and "IOContext", the IO source, target, or type done by a backend. For example, the checkpointer may write a shared buffer out. This would be counted as an IOOp "written" on an IOContext IOCONTEXT_SHARED by BackendType "checkpointer". Each IOOp (acquire, hit, read, write, extend, fsync) is counted per IOContext (bulkread, bulkwrite, local, shared, or vacuum) through a call to pgstat_count_io_op(). The primary concern of these statistics is IO operations on data blocks during the course of normal database operations. IO operations done by, for example, the archiver or syslogger are not counted in these statistics. WAL IO, temporary file IO, and IO done directly though smgr* functions (such as when building an index) are not yet counted but would be useful future additions. IOCONTEXT_LOCAL and IOCONTEXT_SHARED IOContexts concern operations on local and shared buffers. The IOCONTEXT_BULKREAD, IOCONTEXT_BULKWRITE, and IOCONTEXT_VACUUM IOContexts concern IO operations on buffers as part of a BufferAccessStrategy. IOOP_ACQUIRE IOOps are counted in IOCONTEXT_SHARED and IOCONTEXT_LOCAL IOContexts whenever a buffer is acquired through [Local]BufferAlloc(). IOOP_ACQUIRE IOOps are counted in the BufferAccessStrategy IOContexts whenever a buffer already in the strategy ring is reused. IOOP_WRITE IOOps are counted in the BufferAccessStrategy IOContexts whenever the reused dirty buffer is written out. Stats on IOOps in all IOContexts for a given backend are counted in a backend's local memory. A subsequent commit will expose functions for aggregating and viewing these stats. Author: Melanie Plageman Reviewed-by: Andres Freund Reviewed-by: Justin Pryzby Reviewed-by: Kyotaro Horiguchi Reviewed-by: Maciek Sakrejda Reviewed-by: Lukas Fittl Discussion: /message-id/flat/20200124195226.lth52iydq2n2uilq%40alap3.anarazel.de --- src/backend/postmaster/checkpointer.c | 13 ++ src/backend/storage/buffer/bufmgr.c | 59 +++++- src/backend/storage/buffer/freelist.c | 52 ++++- src/backend/storage/buffer/localbuf.c | 5 + src/backend/storage/sync/sync.c | 2 + src/backend/utils/activity/Makefile | 1 + src/backend/utils/activity/meson.build | 1 + src/backend/utils/activity/pgstat_io_ops.c | 229 +++++++++++++++++++++ src/include/pgstat.h | 61 ++++++ src/include/storage/buf_internals.h | 2 +- src/include/storage/bufmgr.h | 7 +- src/tools/pgindent/typedefs.list | 4 + 12 files changed, 423 insertions(+), 13 deletions(-) create mode 100644 src/backend/utils/activity/pgstat_io_ops.c diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 5fc076fc14..4ea4e6a298 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -1116,6 +1116,19 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type) if (!AmBackgroundWriterProcess()) CheckpointerShmem->num_backend_fsync++; LWLockRelease(CheckpointerCommLock); + + /* + * We have no way of knowing if the current IOContext is + * IOCONTEXT_SHARED or IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] at this + * point, so count the fsync as being in the IOCONTEXT_SHARED + * IOContext. This is probably okay, because the number of backend + * fsyncs doesn't say anything about the efficacy of the + * BufferAccessStrategy. And counting both fsyncs done in + * IOCONTEXT_SHARED and IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] under + * IOCONTEXT_SHARED is likely clearer when investigating the number of + * backend fsyncs. + */ + pgstat_count_io_op(IOOP_FSYNC, IOCONTEXT_SHARED); return false; } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 6b95381481..1c14e305c1 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -482,7 +482,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr); -static void FlushBuffer(BufferDesc *buf, SMgrRelation reln); +static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOContext io_context); static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, @@ -823,6 +823,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BufferDesc *bufHdr; Block bufBlock; bool found; + IOContext io_context; bool isExtend; bool isLocalBuf = SmgrIsTemp(smgr); @@ -833,6 +834,13 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, isExtend = (blockNum == P_NEW); + if (strategy) + io_context = IOContextForStrategy(strategy); + else if (isLocalBuf) + io_context = IOCONTEXT_LOCAL; + else + io_context = IOCONTEXT_SHARED; + TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum, smgr->smgr_rlocator.locator.spcOid, smgr->smgr_rlocator.locator.dbOid, @@ -886,6 +894,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, /* if it was already in the buffer pool, we're done */ if (found) { + pgstat_count_io_op(IOOP_HIT, io_context); + if (!isExtend) { /* Just need to update stats before we exit */ @@ -986,10 +996,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, */ Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */ - bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr); + if (isLocalBuf) + bufBlock = LocalBufHdrGetBlock(bufHdr); + else + bufBlock = BufHdrGetBlock(bufHdr); if (isExtend) { + pgstat_count_io_op(IOOP_EXTEND, io_context); /* new buffers are zero-filled */ MemSet((char *) bufBlock, 0, BLCKSZ); /* don't set checksum for all-zero page */ @@ -1020,6 +1034,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, smgrread(smgr, forkNum, blockNum, (char *) bufBlock); + pgstat_count_io_op(IOOP_READ, io_context); + if (track_io_timing) { INSTR_TIME_SET_CURRENT(io_time); @@ -1190,6 +1206,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, /* Loop here in case we have to try another victim buffer */ for (;;) { + bool from_ring; + /* * Ensure, while the spinlock's not yet held, that there's a free * refcount entry. @@ -1200,7 +1218,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * Select a victim buffer. The buffer is returned with its header * spinlock still held! */ - buf = StrategyGetBuffer(strategy, &buf_state); + buf = StrategyGetBuffer(strategy, &buf_state, &from_ring); Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0); @@ -1237,6 +1255,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf), LW_SHARED)) { + IOContext io_context; + /* * If using a nondefault strategy, and writing the buffer * would require a WAL flush, let the strategy decide whether @@ -1263,13 +1283,28 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, } } + /* + * When a strategy is in use, if the target dirty buffer is an + * existing strategy buffer being reused, count this as a + * strategy write for the purposes of IO Operations statistics + * tracking. + * + * All dirty shared buffers upon first being added to the ring + * will be counted as shared buffer writes. + * + * When a strategy is not in use, the write can only be a + * "regular" write of a dirty shared buffer. + */ + + io_context = from_ring ? IOContextForStrategy(strategy) : IOCONTEXT_SHARED; + /* OK, do the I/O */ TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum, smgr->smgr_rlocator.locator.spcOid, smgr->smgr_rlocator.locator.dbOid, smgr->smgr_rlocator.locator.relNumber); - FlushBuffer(buf, NULL); + FlushBuffer(buf, NULL, io_context); LWLockRelease(BufferDescriptorGetContentLock(buf)); ScheduleBufferTagForWriteback(&BackendWritebackContext, @@ -2570,7 +2605,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context) PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOCONTEXT_SHARED); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); @@ -2820,7 +2855,7 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, * as the second parameter. If not, pass NULL. */ static void -FlushBuffer(BufferDesc *buf, SMgrRelation reln) +FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOContext io_context) { XLogRecPtr recptr; ErrorContextCallback errcallback; @@ -2900,6 +2935,8 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln) */ bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum); + pgstat_count_io_op(IOOP_WRITE, io_context); + if (track_io_timing) INSTR_TIME_SET_CURRENT(io_start); @@ -3551,6 +3588,8 @@ FlushRelationBuffers(Relation rel) localpage, false); + pgstat_count_io_op(IOOP_WRITE, IOCONTEXT_LOCAL); + buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED); pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); @@ -3586,7 +3625,7 @@ FlushRelationBuffers(Relation rel) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, RelationGetSmgr(rel)); + FlushBuffer(bufHdr, RelationGetSmgr(rel), IOCONTEXT_SHARED); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr); } @@ -3684,7 +3723,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, srelent->srel); + FlushBuffer(bufHdr, srelent->srel, IOCONTEXT_SHARED); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr); } @@ -3894,7 +3933,7 @@ FlushDatabaseBuffers(Oid dbid) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOCONTEXT_SHARED); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr); } @@ -3921,7 +3960,7 @@ FlushOneBuffer(Buffer buffer) Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr))); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOCONTEXT_SHARED); } /* diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 990e081aae..5fd65c17d1 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -15,6 +15,7 @@ */ #include "postgres.h" +#include "pgstat.h" #include "port/atomics.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" @@ -198,13 +199,15 @@ have_free_buffer(void) * return the buffer with the buffer header spinlock still held. */ BufferDesc * -StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) +StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring) { BufferDesc *buf; int bgwprocno; int trycounter; uint32 local_buf_state; /* to avoid repeated (de-)referencing */ + *from_ring = false; + /* * If given a strategy object, see whether it can select a buffer. We * assume strategy objects don't need buffer_strategy_lock. @@ -213,7 +216,23 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) { buf = GetBufferFromRing(strategy, buf_state); if (buf != NULL) + { + /* + * When a BufferAccessStrategy is in use, reused buffers from the + * strategy ring will be counted as IOCONTEXT_BULKREAD, + * IOCONTEXT_BULKWRITE, or IOCONTEXT_VACUUM acquisitions for the + * purposes of IO Operation statistics tracking. + * + * However, even when a strategy is in use, if a new buffer must + * be acquired from shared buffers and added to the ring, this is + * counted instead as an IOCONTEXT_SHARED acquisition. So, only + * reused buffers are counted as having been acquired in a + * BufferAccessStrategy IOContext. + */ + *from_ring = true; + pgstat_count_io_op(IOOP_ACQUIRE, IOContextForStrategy(strategy)); return buf; + } } /* @@ -247,6 +266,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) * the rate of buffer consumption. Note that buffers recycled by a * strategy object are intentionally not counted here. */ + pgstat_count_io_op(IOOP_ACQUIRE, IOCONTEXT_SHARED); pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1); /* @@ -670,6 +690,36 @@ AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf) strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf); } +/* + * Utility function returning the IOContext of a given BufferAccessStrategy's + * strategy ring. + */ +IOContext +IOContextForStrategy(BufferAccessStrategy strategy) +{ + Assert(strategy); + + switch (strategy->btype) + { + case BAS_NORMAL: + + /* + * Currently, GetAccessStrategy() returns NULL for + * BufferAccessStrategyType BAS_NORMAL, so this case is unlikely + * to be hit. + */ + return IOCONTEXT_SHARED; + case BAS_BULKREAD: + return IOCONTEXT_BULKREAD; + case BAS_BULKWRITE: + return IOCONTEXT_BULKWRITE; + case BAS_VACUUM: + return IOCONTEXT_VACUUM; + } + + elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype); +} + /* * StrategyRejectBuffer -- consider rejecting a dirty buffer * diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 30d67d1c40..c2548f2b0b 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -18,6 +18,7 @@ #include "access/parallel.h" #include "catalog/catalog.h" #include "executor/instrument.h" +#include "pgstat.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" #include "utils/guc_hooks.h" @@ -196,6 +197,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, LocalRefCount[b]++; ResourceOwnerRememberBuffer(CurrentResourceOwner, BufferDescriptorGetBuffer(bufHdr)); + + pgstat_count_io_op(IOOP_ACQUIRE, IOCONTEXT_LOCAL); break; } } @@ -226,6 +229,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, localpage, false); + pgstat_count_io_op(IOOP_WRITE, IOCONTEXT_LOCAL); + /* Mark not-dirty now in case we error out below */ buf_state &= ~BM_DIRTY; pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c index 9d6a9e9109..5718b52fb5 100644 --- a/src/backend/storage/sync/sync.c +++ b/src/backend/storage/sync/sync.c @@ -432,6 +432,8 @@ ProcessSyncRequests(void) total_elapsed += elapsed; processed++; + pgstat_count_io_op(IOOP_FSYNC, IOCONTEXT_SHARED); + if (log_checkpoints) elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f ms", processed, diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile index a2e8507fd6..0098785089 100644 --- a/src/backend/utils/activity/Makefile +++ b/src/backend/utils/activity/Makefile @@ -22,6 +22,7 @@ OBJS = \ pgstat_checkpointer.o \ pgstat_database.o \ pgstat_function.o \ + pgstat_io_ops.o \ pgstat_relation.o \ pgstat_replslot.o \ pgstat_shmem.o \ diff --git a/src/backend/utils/activity/meson.build b/src/backend/utils/activity/meson.build index 5b3b558a67..1038324c32 100644 --- a/src/backend/utils/activity/meson.build +++ b/src/backend/utils/activity/meson.build @@ -7,6 +7,7 @@ backend_sources += files( 'pgstat_checkpointer.c', 'pgstat_database.c', 'pgstat_function.c', + 'pgstat_io_ops.c', 'pgstat_relation.c', 'pgstat_replslot.c', 'pgstat_shmem.c', diff --git a/src/backend/utils/activity/pgstat_io_ops.c b/src/backend/utils/activity/pgstat_io_ops.c new file mode 100644 index 0000000000..e1750b965f --- /dev/null +++ b/src/backend/utils/activity/pgstat_io_ops.c @@ -0,0 +1,229 @@ +/* ------------------------------------------------------------------------- + * + * pgstat_io_ops.c + * Implementation of IO operation statistics. + * + * This file contains the implementation of IO operation statistics. It is kept + * separate from pgstat.c to enforce the line between the statistics access / + * storage implementation and the details about individual types of + * statistics. + * + * Copyright (c) 2021-2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/activity/pgstat_io_ops.c + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/pgstat_internal.h" + +static PgStat_IOContextOps pending_IOOpStats; + +void +pgstat_count_io_op(IOOp io_op, IOContext io_context) +{ + PgStat_IOOpCounters *pending_counters; + + Assert(io_context < IOCONTEXT_NUM_TYPES); + Assert(io_op < IOOP_NUM_TYPES); + Assert(pgstat_expect_io_op(MyBackendType, io_context, io_op)); + + pending_counters = &pending_IOOpStats.data[io_context]; + + switch (io_op) + { + case IOOP_ACQUIRE: + pending_counters->acquires++; + break; + case IOOP_EXTEND: + pending_counters->extends++; + break; + case IOOP_FSYNC: + pending_counters->fsyncs++; + break; + case IOOP_HIT: + pending_counters->hits++; + break; + case IOOP_READ: + pending_counters->reads++; + break; + case IOOP_WRITE: + pending_counters->writes++; + break; + } + +} + +const char * +pgstat_io_context_desc(IOContext io_context) +{ + switch (io_context) + { + case IOCONTEXT_BULKREAD: + return "bulkread"; + case IOCONTEXT_BULKWRITE: + return "bulkwrite"; + case IOCONTEXT_LOCAL: + return "local"; + case IOCONTEXT_SHARED: + return "shared"; + case IOCONTEXT_VACUUM: + return "vacuum"; + } + + elog(ERROR, "unrecognized IOContext value: %d", io_context); +} + +const char * +pgstat_io_op_desc(IOOp io_op) +{ + switch (io_op) + { + case IOOP_ACQUIRE: + return "acquire"; + case IOOP_EXTEND: + return "extend"; + case IOOP_FSYNC: + return "fsync"; + case IOOP_HIT: + return "hit"; + case IOOP_READ: + return "read"; + case IOOP_WRITE: + return "write"; + } + + elog(ERROR, "unrecognized IOOp value: %d", io_op); +} + +/* +* IO Operation statistics are not collected for all BackendTypes. +* +* The following BackendTypes do not participate in the cumulative stats +* subsystem or do not do IO operations worth reporting statistics on: +* - Syslogger because it is not connected to shared memory +* - Archiver because most relevant archiving IO is delegated to a +* specialized command or module +* - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now +* +* Function returns true if BackendType participates in the cumulative stats +* subsystem for IO Operations and false if it does not. +*/ +bool +pgstat_io_op_stats_collected(BackendType bktype) +{ + return bktype != B_INVALID && bktype != B_ARCHIVER && bktype != B_LOGGER && + bktype != B_WAL_RECEIVER && bktype != B_WAL_WRITER; +} + +/* + * Some BackendTypes do not perform IO operations in certain IOContexts. Check + * that the given BackendType is expected to do IO in the given IOContext. + */ +bool +pgstat_bktype_io_context_valid(BackendType bktype, IOContext io_context) +{ + bool no_local; + + /* + * In core Postgres, only regular backends and WAL Sender processes + * executing queries should use local buffers. Parallel workers will not + * use local buffers (see InitLocalBuffers()); however, extensions + * leveraging background workers have no such limitation, so track IO + * Operations in IOCONTEXT_LOCAL for BackendType B_BG_WORKER. + */ + no_local = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || bktype + == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER || bktype == + B_STANDALONE_BACKEND || bktype == B_STARTUP; + + if (io_context == IOCONTEXT_LOCAL && no_local) + return false; + + /* + * Some BackendTypes do not currently perform any IO operations in certain + * IOContexts, and, while it may not be inherently incorrect for them to + * do so, excluding those rows from the view makes the view easier to use. + */ + if ((io_context == IOCONTEXT_BULKREAD || io_context == IOCONTEXT_BULKWRITE + || io_context == IOCONTEXT_VACUUM) && (bktype == B_CHECKPOINTER + || bktype == B_BG_WRITER)) + return false; + + if (io_context == IOCONTEXT_VACUUM && bktype == B_AUTOVAC_LAUNCHER) + return false; + + if (io_context == IOCONTEXT_BULKWRITE && (bktype == B_AUTOVAC_WORKER || + bktype == B_AUTOVAC_LAUNCHER)) + return false; + + return true; +} + +/* + * Some BackendTypes will never do certain IOOps and some IOOps should not + * occur in certain IOContexts. Check that the given IOOp is valid for the + * given BackendType in the given IOContext. Note that there are currently no + * cases of an IOOp being invalid for a particular BackendType only within a + * certain IOContext. + */ +bool +pgstat_io_op_valid(BackendType bktype, IOContext io_context, IOOp io_op) +{ + bool strategy_io_context; + + /* + * Some BackendTypes should never track IO Operation statistics. + */ + Assert(pgstat_io_op_stats_collected(bktype)); + + /* + * Some BackendTypes will not do certain IOOps. + */ + if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) && + (io_op == IOOP_READ || io_op == IOOP_ACQUIRE || io_op == IOOP_HIT)) + return false; + + if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || bktype == + B_CHECKPOINTER) && io_op == IOOP_EXTEND) + return false; + + /* + * Some IOOps are not valid in certain IOContexts + */ + if (io_op == IOOP_EXTEND && io_context == IOCONTEXT_BULKREAD) + return false; + + /* + * Temporary tables using local buffers are not logged and thus do not + * require fsync'ing. + * + * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are + * counted in the IOCONTEXT_SHARED IOContext. See comment in + * ForwardSyncRequest() for more details. + */ + strategy_io_context = io_context == IOCONTEXT_BULKREAD || io_context == + IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM; + + if ((io_context == IOCONTEXT_LOCAL || strategy_io_context) && + io_op == IOOP_FSYNC) + return false; + + return true; +} + +bool +pgstat_expect_io_op(BackendType bktype, IOContext io_context, IOOp io_op) +{ + if (!pgstat_io_op_stats_collected(bktype)) + return false; + + if (!pgstat_bktype_io_context_valid(bktype, io_context)) + return false; + + if (!(pgstat_io_op_valid(bktype, io_context, io_op))) + return false; + + return true; +} diff --git a/src/include/pgstat.h b/src/include/pgstat.h index ad7334a0d2..155b0b2d48 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -14,6 +14,7 @@ #include "datatype/timestamp.h" #include "portability/instr_time.h" #include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */ +#include "storage/buf.h" #include "utils/backend_progress.h" /* for backward compatibility */ #include "utils/backend_status.h" /* for backward compatibility */ #include "utils/relcache.h" @@ -276,6 +277,48 @@ typedef struct PgStat_CheckpointerStats PgStat_Counter buf_fsync_backend; } PgStat_CheckpointerStats; +/* + * Types related to counting IO Operations for various IO Contexts + */ + +typedef enum IOOp +{ + IOOP_ACQUIRE = 0, + IOOP_EXTEND, + IOOP_FSYNC, + IOOP_HIT, + IOOP_READ, + IOOP_WRITE, +} IOOp; + +#define IOOP_NUM_TYPES (IOOP_WRITE + 1) + +typedef enum IOContext +{ + IOCONTEXT_BULKREAD = 0, + IOCONTEXT_BULKWRITE, + IOCONTEXT_LOCAL, + IOCONTEXT_SHARED, + IOCONTEXT_VACUUM, +} IOContext; + +#define IOCONTEXT_NUM_TYPES (IOCONTEXT_VACUUM + 1) + +typedef struct PgStat_IOOpCounters +{ + PgStat_Counter acquires; + PgStat_Counter extends; + PgStat_Counter fsyncs; + PgStat_Counter hits; + PgStat_Counter reads; + PgStat_Counter writes; +} PgStat_IOOpCounters; + +typedef struct PgStat_IOContextOps +{ + PgStat_IOOpCounters data[IOCONTEXT_NUM_TYPES]; +} PgStat_IOContextOps; + typedef struct PgStat_StatDBEntry { PgStat_Counter n_xact_commit; @@ -453,6 +496,24 @@ extern void pgstat_report_checkpointer(void); extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void); +/* + * Functions in pgstat_io_ops.c + */ + +extern void pgstat_count_io_op(IOOp io_op, IOContext io_context); +extern const char *pgstat_io_context_desc(IOContext io_context); +extern const char *pgstat_io_op_desc(IOOp io_op); + +/* Validation functions in pgstat_io_ops.c */ +extern bool pgstat_io_op_stats_collected(BackendType bktype); +extern bool pgstat_bktype_io_context_valid(BackendType bktype, IOContext io_context); +extern bool pgstat_io_op_valid(BackendType bktype, IOContext io_context, IOOp io_op); +extern bool pgstat_expect_io_op(BackendType bktype, IOContext io_context, IOOp io_op); + +/* IO stats translation function in freelist.c */ +extern IOContext IOContextForStrategy(BufferAccessStrategy bas); + + /* * Functions in pgstat_database.c */ diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 406db6be78..50d7e586e9 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -392,7 +392,7 @@ extern void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag * /* freelist.c */ extern BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy, - uint32 *buf_state); + uint32 *buf_state, bool *from_ring); extern void StrategyFreeBuffer(BufferDesc *buf); extern bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 6f4dfa0960..d0eed71f63 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -23,7 +23,12 @@ typedef void *Block; -/* Possible arguments for GetAccessStrategy() */ +/* + * Possible arguments for GetAccessStrategy(). + * + * If adding a new BufferAccessStrategyType, also add a new IOContext so + * statistics on IO operations using this strategy are tracked. + */ typedef enum BufferAccessStrategyType { BAS_NORMAL, /* Normal random access */ diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 97c9bc1861..67218ec6f2 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1106,7 +1106,9 @@ ID INFIX INT128 INTERFACE_INFO +IOContext IOFuncSelector +IOOp IPCompareMethod ITEM IV @@ -2026,6 +2028,8 @@ PgStat_FetchConsistency PgStat_FunctionCallUsage PgStat_FunctionCounts PgStat_HashKey +PgStat_IOContextOps +PgStat_IOOpCounters PgStat_Kind PgStat_KindInfo PgStat_LocalState -- 2.34.1