From 4567da933b25a9e23fe1a72a6994d3a9b7bc1ea4 Mon Sep 17 00:00:00 2001 From: amit Date: Mon, 7 Mar 2016 14:38:34 +0900 Subject: [PATCH 2/3] WIP: Implement progress reporting for VACUUM command. This basically utilizes the pgstat_progress* API to report a handful of paramters to indicate its progress. lazy_vacuum_rel() and lazy_scan_heap() have been altered to report command start, command target table, and the following parameters: processing phase, number of heap blocks, number of index blocks (all indexes), current heap block number in the main scan loop (whenever changes), index blocks vacuumed (once per finished index vacuum), and number of index vacuum passes (every time when all indexes are vacuumed). Following processing phases are identified and reported whenever one changes to another: 'scanning heap', 'vacuuming indexes', 'vacuuming heap', and 'cleanup'. TODO: find a way to report index pages vacuumed in a more granular manner than the current report per index vacuumed. A view named pg_stat_vacuum_progress has been added that shows these values. --- doc/src/sgml/monitoring.sgml | 106 ++++++++++++++++++++++++++++++++++ src/backend/catalog/system_views.sql | 24 ++++++++ src/backend/commands/vacuumlazy.c | 73 +++++++++++++++++++++++- src/test/regress/expected/rules.out | 24 ++++++++ 4 files changed, 226 insertions(+), 1 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 85459d0..544f959 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -507,6 +507,12 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser yet included in pg_stat_user_functions). + + pg_stat_progress_vacuumpg_stat_progress_vacuum + One row for each backend (including autovacuum worker processes) running + VACUUM, showing current progress in terms of heap pages it + has finished processing. + @@ -1822,6 +1828,106 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser controls exactly which functions are tracked. + + <structname>pg_stat_progress_vacuum</structname> View + + + + Column + Type + Description + + + + + + pid + integer + Process ID of backend + + + database + name + Name of the database this backend is connected to + + + tablename + name + Schema-qualified name of the table being vacuumed + + + processing_phase + text + Current processing phase of vacuum. + Possible values are: + + + + scanning heap + + + + + vacuuming indexes + + + + + vacuuming heap + + + + + cleanup + + + + + + + total_heap_blocks + integer + Total number of heap blocks in the table + + + current_heap_block + integer + Current heap block being processed + + + total_index_blocks + integer + Total number of index blocks to be processed + + + index_blocks_done + integer + Number of index blocks processed in current vacuum round + + + index_vacuum_count + integer + Number of times index vacuum round has been performed so far + + + percent_done + numeric + + Amount of work finished in percent in terms of table blocks processed + + + + +
+ + + The pg_stat_progress_vacuum view will contain + one row for each backend (including autovacuum worker processes), showing + parameters that can help determine the progress of VACUUM + command running in it. Note that the backends running + VACUUM FULL are not shown. + + diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index abf9a70..2ce6e00 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -971,3 +971,27 @@ RETURNS jsonb LANGUAGE INTERNAL STRICT IMMUTABLE AS 'jsonb_set'; + +CREATE VIEW pg_stat_progress_vacuum AS + SELECT + S.pid AS pid, + D.datname AS database, + quote_ident(N.nspname) || '.' || quote_ident(C.relname) AS tablename, + CASE S.param1 + WHEN 1 THEN 'scanning heap' + WHEN 2 THEN 'vacuuming indexes' + WHEN 3 THEN 'vacuuming heap' + WHEN 4 THEN 'cleanup' + ELSE 'unknown phase' + END AS processing_phase, + S.param2 AS total_heap_blocks, + S.param3 AS current_heap_block, + S.param4 AS total_index_blocks, + S.param5 AS index_blocks_done, + S.param6 AS index_vacuum_count, + CASE S.param2 + WHEN 0 THEN round(100.0, 2) + ELSE round((S.param3 + 1) * 100.0 / S.param2, 2) + END AS percent_done + FROM pg_database D, pg_class C, pg_namespace N, pg_stat_get_progress_info('VACUUM') AS S + WHERE S.datid = D.oid AND S.relid = C.oid AND C.relnamespace = N.oid; diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 61d2edd..0771b91 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -97,6 +97,29 @@ */ #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32) +/* + * Progress parameters of (lazy) vacuum reported to pgstat progress tracking + * facility + */ +#define PROG_PARAM_VAC_PHASE 0 +#define PROG_PARAM_VAC_HEAP_BLKS 1 +#define PROG_PARAM_VAC_CURR_HEAP_BLK 2 +#define PROG_PARAM_VAC_IDX_BLKS 3 +#define PROG_PARAM_VAC_IDX_BLKS_DONE 4 +#define PROG_PARAM_VAC_IDX_VAC_COUNT 5 + +/* + * Following distinct phases of lazy vacuum are identified. #1, #2 and #3 + * run in a cyclical manner due to possibly limited memory to work with, + * whereby #1 is periodically interrupted to run #2, followed by #3, and + * back to #1. Cycle repeats until all blocks of the relation have been + * covered by #1. + */ +#define LV_PHASE_SCAN_HEAP 1 +#define LV_PHASE_VACUUM_INDEX 2 +#define LV_PHASE_VACUUM_HEAP 3 +#define LV_PHASE_CLEANUP 4 + typedef struct LVRelStats { /* hasindex = true means two-pass strategy; false means one-pass */ @@ -437,7 +460,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, Relation *Irel, int nindexes, bool scan_all) { BlockNumber nblocks, - blkno; + blkno, + total_index_blks, + *current_index_blks; HeapTupleData tuple; char *relname; BlockNumber empty_pages, @@ -478,6 +503,24 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, lazy_space_alloc(vacrelstats, nblocks); frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage); + /* We're about to begin heap scan. */ + pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_SCAN_HEAP); + + /* total_heap_blks */ + pgstat_progress_update_param(PROG_PARAM_VAC_HEAP_BLKS, nblocks); + + /* total_index_blks */ + current_index_blks = (BlockNumber *) palloc(nindexes * sizeof(BlockNumber)); + total_index_blks = 0; + for (i = 0; i < nindexes; i++) + { + BlockNumber nblocks = RelationGetNumberOfBlocks(Irel[i]); + + current_index_blks[i] = nblocks; + total_index_blks += nblocks; + } + pgstat_progress_update_param(PROG_PARAM_VAC_IDX_BLKS, total_index_blks); + /* * We want to skip pages that don't require vacuuming according to the * visibility map, but only when we can skip at least SKIP_PAGES_THRESHOLD @@ -585,6 +628,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, vacuum_delay_point(); + /* current_heap_blkno: 0..nblocks-1 */ + pgstat_progress_update_param(PROG_PARAM_VAC_CURR_HEAP_BLK, blkno); + /* * If we are close to overrunning the available space for dead-tuple * TIDs, pause and do a cycle of vacuuming before we tackle this page. @@ -608,11 +654,22 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, vacuum_log_cleanup_info(onerel, vacrelstats); /* Remove index entries */ + pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_VACUUM_INDEX); for (i = 0; i < nindexes; i++) + { lazy_vacuum_index(Irel[i], &indstats[i], vacrelstats); + + pgstat_progress_update_param(PROG_PARAM_VAC_IDX_BLKS_DONE, + current_index_blks[i]); + } + + pgstat_progress_update_param(PROG_PARAM_VAC_IDX_VAC_COUNT, + vacrelstats->num_index_scans+1); + /* Remove tuples from heap */ + pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_VACUUM_HEAP); lazy_vacuum_heap(onerel, vacrelstats); /* @@ -622,6 +679,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, */ vacrelstats->num_dead_tuples = 0; vacrelstats->num_index_scans++; + + /* Going back to scanning the heap */ + pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_SCAN_HEAP); } /* @@ -1151,16 +1211,27 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, vacuum_log_cleanup_info(onerel, vacrelstats); /* Remove index entries */ + pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_VACUUM_INDEX); for (i = 0; i < nindexes; i++) + { lazy_vacuum_index(Irel[i], &indstats[i], vacrelstats); + + pgstat_progress_update_param(PROG_PARAM_VAC_IDX_BLKS_DONE, + current_index_blks[i]); + } + pgstat_progress_update_param(PROG_PARAM_VAC_IDX_VAC_COUNT, + vacrelstats->num_index_scans + 1); + /* Remove tuples from heap */ + pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_VACUUM_HEAP); lazy_vacuum_heap(onerel, vacrelstats); vacrelstats->num_index_scans++; } /* Do post-vacuum cleanup and statistics update for each index */ + pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_CLEANUP); for (i = 0; i < nindexes; i++) lazy_cleanup_index(Irel[i], indstats[i], vacrelstats); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 81bc5c9..6cd496b 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1746,6 +1746,30 @@ pg_stat_database_conflicts| SELECT d.oid AS datid, pg_stat_get_db_conflict_bufferpin(d.oid) AS confl_bufferpin, pg_stat_get_db_conflict_startup_deadlock(d.oid) AS confl_deadlock FROM pg_database d; +pg_stat_progress_vacuum| SELECT s.pid, + d.datname AS database, + ((quote_ident((n.nspname)::text) || '.'::text) || quote_ident((c.relname)::text)) AS tablename, + CASE s.param1 + WHEN 1 THEN 'scanning heap'::text + WHEN 2 THEN 'vacuuming indexes'::text + WHEN 3 THEN 'vacuuming heap'::text + WHEN 4 THEN 'cleanup'::text + ELSE 'unknown phase'::text + END AS processing_phase, + s.param2 AS total_heap_blocks, + s.param3 AS current_heap_block, + s.param4 AS total_index_blocks, + s.param5 AS index_blocks_done, + s.param6 AS index_vacuum_count, + CASE s.param2 + WHEN 0 THEN round(100.0, 2) + ELSE round(((((s.param3 + 1))::numeric * 100.0) / (s.param2)::numeric), 2) + END AS percent_done + FROM pg_database d, + pg_class c, + pg_namespace n, + pg_stat_get_progress_info('VACUUM'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10) + WHERE ((s.datid = d.oid) AND (s.relid = c.oid) AND (c.relnamespace = n.oid)); pg_stat_replication| SELECT s.pid, s.usesysid, u.rolname AS usename, -- 1.7.1