From 918554a966c4cf45da330ce9b8e2356cbc152fb7 Mon Sep 17 00:00:00 2001 From: rafaelthca Date: Thu, 6 Mar 2025 17:34:16 -0300 Subject: [PATCH v7] Proposal for progressive explains This proposal introduces a feature to print execution plans of active queries in an in-memory shared hash object so that other sessions can visualize via new view pg_stat_progress_explain. Plans are only printed if new GUC parameter progressive_explain is enabled. When progressive_explain is set to 'explain' the plan will be printed only once at the beginning of the query. If set to 'analyze' the QueryDesc will be adjusted adding instrumentation flags. In that case the plan will be printed on a fixed interval controlled by new GUC parameter progressive_explain_interval including all instrumentation stats computed so far (per node rows and execution time). New view: - pg_stat_progress_explain - pid: PID of the process running the query - last_print: timestamp when plan was last printed - query_plan: the actual plan (limited read privileges) New GUCs: - progressive_explain: if progressive plans are printed for local session. - type: enum - default: off - values: [off, explain, analyze] - context: user - progressive_explain_min_duration: min query duration until progressive explain starts. - type: int - default: 1s - min: 0 - context: user - progressive_explain_interval: interval between each explain print. - type: int - default: 1s - min: 10ms - context: user - progressive_explain_format: format used to print the plans. - type: enum - default: text - values: [TEXT, XML, JSON, or YAML] - context: user - progressive_explain_settings: controls whether information about modified configuration is added to the printed plan. - type: bool - default: off - context: user - progressive_explain_verbose: controls whether verbose details are added to the printed plan. - type: bool - default: off - context: user - progressive_explain_buffers: controls whether buffers details are added to the printed plan. - type: bool - default: off - context: user - progressive_explain_timing: controls whether per node timing details are added to the printed plan. - type: bool - default: off - context: user - progressive_explain_wal: controls whether WAL record generation details are added to the printed plan. - type: bool - default: off - context: user --- doc/src/sgml/config.sgml | 134 +++ doc/src/sgml/monitoring.sgml | 64 ++ doc/src/sgml/perform.sgml | 96 ++ src/backend/catalog/system_views.sql | 5 + src/backend/commands/explain.c | 822 +++++++++++++++++- src/backend/executor/execMain.c | 24 + src/backend/executor/execProcnode.c | 10 +- src/backend/executor/instrument.c | 20 +- src/backend/storage/ipc/ipci.c | 7 + src/backend/storage/lmgr/lwlock.c | 1 + .../utils/activity/wait_event_names.txt | 1 + src/backend/utils/init/postinit.c | 19 + src/backend/utils/misc/guc_tables.c | 125 +++ src/backend/utils/misc/postgresql.conf.sample | 13 + src/include/catalog/pg_proc.dat | 10 + src/include/commands/explain.h | 44 + src/include/executor/execdesc.h | 1 + src/include/executor/instrument.h | 1 + src/include/nodes/execnodes.h | 6 + src/include/storage/lwlock.h | 1 + src/include/storage/lwlocklist.h | 1 + src/include/utils/guc.h | 9 + src/include/utils/timeout.h | 2 + .../test_misc/t/008_progressive_explain.pl | 130 +++ src/test/regress/expected/rules.out | 4 + src/tools/pgindent/typedefs.list | 4 + 26 files changed, 1504 insertions(+), 50 deletions(-) create mode 100644 src/test/modules/test_misc/t/008_progressive_explain.pl diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index d2fa5f7d1a9..6f0b0d06c7b 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8483,6 +8483,140 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + progressive_explain (enum) + + progressive_explain configuration parameter + + + + + Determines whether progressive explains are enabled and how + they are executed; see . + Possible values are off, explain + and analyze. The default is off. + + + + + + progressive_explain_min_duration (integer) + + progressive_explain_min_duration configuration parameter + + + + + Sets the threshold (in milliseconds) until progressive explain is + printed for the first time. The default is 1s. + + + + + + progressive_explain_interval (integer) + + progressive_explain_interval configuration parameter + + + + + Sets the interval (in milliseconds) between each instrumented + progressive explain. The default is 1s. + + + + + + progressive_explain_buffers (boolean) + + progressive_explain_buffers configuration parameter + + + + + Controls whether information about buffers is added to + progressive explains. Equivalent to the BUFFERS option of + EXPLAIN. The default is off. + + + + + + progressive_explain_timing (boolean) + + progressive_explain_timing configuration parameter + + + + + Controls whether information about per node timing is added + to progressive explains. Equivalent to the TIMING option of + EXPLAIN. The default is off. + + + + + + progressive_explain_wal (boolean) + + progressive_explain_wal configuration parameter + + + + + Controls whether information about WAL record generation is + added to progressive explains. Equivalent to the WAL option of + EXPLAIN. The default is off. + + + + + + progressive_explain_verbose (boolean) + + progressive_explain_verbose configuration parameter + + + + + Controls whether verbose details are added to progressive explains. + Equivalent to the VERBOSE option of EXPLAIN. The default is + off. + + + + + + progressive_explain_settings (boolean) + + progressive_explain_settings configuration parameter + + + + + Controls whether information about modified configuration is added to + progressive explains. Equivalent to the SETTINGS option of EXPLAIN. + The default is off. + + + + + + progressive_explain_format (enum) + + progressive_explain_format configuration parameter + + + + + Selects the EXPLAIN output format to be used with progressive + explains. Equivalent to the FORMAT option of EXPLAIN. The default + is text. + + + + diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 16646f560e8..8a9934d53e1 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -6793,6 +6793,70 @@ FROM pg_stat_get_backend_idset() AS backendid; + + EXPLAIN Progress Reporting + + + pg_stat_progress_explain + + + + Whenever a client backend or parallel worker is running a query with + enabled, the + pg_stat_progress_explain view will contain a + corresponding row with query plan details; see + . The table below describe the + information that will be reported. + + + + <structname>pg_stat_progress_explain</structname> View + + + + + Column Type + + + Description + + + + + + + + pid integer + + + Process ID of a client backend or parallel worker. + + + + + + last_print timestamp with time zone + + + Timestamp when plan was last printed. + + + + + + query_plan text + + + The progressive explain text. + + + + + +
+ +
+ diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml index be4b49f62b5..50eaf5533a4 100644 --- a/doc/src/sgml/perform.sgml +++ b/doc/src/sgml/perform.sgml @@ -1109,6 +1109,102 @@ EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE unique1 < 100 AND unique2 > 9000 + + Progressive <command>EXPLAIN</command> + + + The query plan created by the planner for any given active query can + be visualized by any session via + view when is enabled in the + client backend or parallel worker executing query and after min duration + specified by has + passed. Settings , + and + control which additional + instrumentaton details are collected and included in the output while + , + and + + define how the plan is printed and which details are added there. + + + + When is set to explain + the plan will be printed once at the beginning of the query. + + + + +SET progressive_explain = 'explain'; +SET + +SELECT * FROM test t1 INNER JOIN test t2 ON t1.c1=t2.c1; + + + + +SELECT * FROM pg_stat_progress_explain; + pid | last_print | query_plan +------+-------------------------------+------------------------------------------------------------------------------ + 5307 | 2025-02-18 09:37:41.781459-03 | Hash Join (cost=327879.85..878413.95 rows=9999860 width=18) + + | | Hash Cond: (t1.c1 = t2.c1) + + | | -> Seq Scan on test t1 (cost=0.00..154053.60 rows=9999860 width=9) + + | | -> Hash (cost=154053.60..154053.60 rows=9999860 width=9) + + | | -> Seq Scan on test t2 (cost=0.00..154053.60 rows=9999860 width=9)+ + | | +(1 row) + + + + + Setting to analyze + will enable instrumentation and the detailed plan is printed on a fixed interval + controlled by , including + per node accumulated row count and other statistics. + + + + Progressive explains include additional information per node to help analyzing + execution progress: + + + + + current: the plan node currently being processed. + + + + + never executed: a plan node not processed yet. + + + + + + +SET progressive_explain = 'analyze'; +SET + +SELECT * FROM test t1 INNER JOIN test t2 ON t1.c1=t2.c1; + + + + +SELECT * FROM pg_stat_progress_explain; + pid | last_print | query_plan +------+-------------------------------+------------------------------------------------------------------------------------------------------------------------------------ + 5307 | 2025-02-18 09:36:03.580721-03 | Hash Join (cost=327879.85..878413.95 rows=9999860 width=18) (actual time=2010.504..2998.111 rows=38603 loops=1) + + | | Hash Cond: (t1.c1 = t2.c1) + + | | -> Seq Scan on test t1 (cost=0.00..154053.60 rows=9999860 width=9) (actual time=0.068..303.963 rows=4928320 loops=1) (current)+ + | | -> Hash (cost=154053.60..154053.60 rows=9999860 width=9) (actual time=2009.824..2009.824 rows=10000000 loops=1) + + | | -> Seq Scan on test t2 (cost=0.00..154053.60 rows=9999860 width=9) (actual time=0.035..640.890 rows=10000000 loops=1) + + | | +(1 row) + + + + + diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index a4d2cfdcaf5..dd746078ea7 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1334,6 +1334,11 @@ CREATE VIEW pg_stat_progress_copy AS FROM pg_stat_get_progress_info('COPY') AS S LEFT JOIN pg_database D ON S.datid = D.oid; +CREATE VIEW pg_stat_progress_explain AS + SELECT + * + FROM pg_stat_progress_explain(true); + CREATE VIEW pg_user_mappings AS SELECT U.oid AS umid, diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index d8a7232cedb..7fd6d664bb5 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -21,6 +21,7 @@ #include "commands/explain_format.h" #include "commands/prepare.h" #include "foreign/fdwapi.h" +#include "funcapi.h" #include "jit/jit.h" #include "libpq/pqformat.h" #include "libpq/protocol.h" @@ -32,6 +33,7 @@ #include "rewrite/rewriteHandler.h" #include "storage/bufmgr.h" #include "tcop/tcopprot.h" +#include "utils/backend_status.h" #include "utils/builtins.h" #include "utils/guc_tables.h" #include "utils/json.h" @@ -39,17 +41,28 @@ #include "utils/rel.h" #include "utils/ruleutils.h" #include "utils/snapmgr.h" +#include "utils/timeout.h" #include "utils/tuplesort.h" #include "utils/typcache.h" #include "utils/xml.h" +#define PROGRESSIVE_EXPLAIN_ALLOC_SIZE 4096 + /* Hook for plugins to get control in ExplainOneQuery() */ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; /* Hook for plugins to get control in explain_get_index_name() */ explain_get_index_name_hook_type explain_get_index_name_hook = NULL; +/* Shared hash to store progressive explains */ +static HTAB *progressiveExplainArray = NULL; + +/* Pointer to running query */ +static QueryDesc *activeQueryDesc = NULL; + +/* Flag set by timeouts to control when to print progressive explains */ +bool ProgressiveExplainPending = false; /* * Various places within need to convert bytes to kilobytes. Round these up @@ -128,7 +141,7 @@ static void show_hashagg_info(AggState *aggstate, ExplainState *es); static void show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es); static void show_instrumentation_count(const char *qlabel, int which, - PlanState *planstate, ExplainState *es); + Instrumentation *instr, ExplainState *es); static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es); static const char *explain_get_index_name(Oid indexId); static bool peek_buffer_usage(ExplainState *es, const BufferUsage *usage); @@ -154,6 +167,13 @@ static ExplainWorkersState *ExplainCreateWorkersState(int num_workers); static void ExplainOpenWorker(int n, ExplainState *es); static void ExplainCloseWorker(int n, ExplainState *es); static void ExplainFlushWorkersState(ExplainState *es); +static void ProgressiveExplainInit(QueryDesc *queryDesc); +static void ProgressiveExplainPrint(QueryDesc *queryDesc); +static void ProgressiveExplainCleanup(QueryDesc *queryDesc); +static TupleTableSlot *ExecProcNodeExplain(PlanState *node); +static void WrapExecProcNodeWithExplain(PlanState *ps); +static void UnwrapExecProcNodeWithExplain(PlanState *ps); +static void ProgressiveExplainReleaseFunc(void *); @@ -366,6 +386,8 @@ NewExplainState(void) es->costs = true; /* Prepare output buffer. */ es->str = makeStringInfo(); + /* An explain state is not progressive by default */ + es->progressive = false; return es; } @@ -1490,6 +1512,7 @@ ExplainNode(PlanState *planstate, List *ancestors, const char *partialmode = NULL; const char *operation = NULL; const char *custom_name = NULL; + Instrumentation *local_instr = NULL; ExplainWorkersState *save_workers_state = es->workers_state; int save_indent = es->indent; bool haschildren; @@ -1953,17 +1976,38 @@ ExplainNode(PlanState *planstate, List *ancestors, * instrumentation results the user didn't ask for. But we do the * InstrEndLoop call anyway, if possible, to reduce the number of cases * auto_explain has to contend with. + * + * For regular explains instrumentation clean up is called directly in the + * main instrumentation objects. Progressive explains need to clone + * instrumentation object and forcibly end the loop in nodes that may be + * running. */ if (planstate->instrument) - InstrEndLoop(planstate->instrument); + { + /* Progressive explain. Use auxiliary instrumentation object */ + if (es->progressive) + { + local_instr = es->pe_local_instr; + *local_instr = *planstate->instrument; + + /* Force end loop even if node is in progress */ + InstrEndLoopForce(local_instr); + } + /* Use main instrumentation */ + else + { + local_instr = planstate->instrument; + InstrEndLoop(local_instr); + } + } if (es->analyze && - planstate->instrument && planstate->instrument->nloops > 0) + local_instr && local_instr->nloops > 0) { - double nloops = planstate->instrument->nloops; - double startup_ms = 1000.0 * planstate->instrument->startup / nloops; - double total_ms = 1000.0 * planstate->instrument->total / nloops; - double rows = planstate->instrument->ntuples / nloops; + double nloops = local_instr->nloops; + double startup_ms = 1000.0 * local_instr->startup / nloops; + double total_ms = 1000.0 * local_instr->total / nloops; + double rows = local_instr->ntuples / nloops; if (es->format == EXPLAIN_FORMAT_TEXT) { @@ -1973,6 +2017,9 @@ ExplainNode(PlanState *planstate, List *ancestors, appendStringInfo(es->str, "time=%.3f..%.3f ", startup_ms, total_ms); appendStringInfo(es->str, "rows=%.2f loops=%.0f)", rows, nloops); + + if (es->progressive && planstate == es->pe_curr_node) + appendStringInfo(es->str, " (current)"); } else { @@ -1985,6 +2032,10 @@ ExplainNode(PlanState *planstate, List *ancestors, } ExplainPropertyFloat("Actual Rows", NULL, rows, 2, es); ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es); + + /* Progressive explain. Add current node flag is applicable */ + if (es->progressive && planstate == es->pe_curr_node) + ExplainPropertyBool("Current", true, es); } } else if (es->analyze) @@ -2089,29 +2140,29 @@ ExplainNode(PlanState *planstate, List *ancestors, "Index Cond", planstate, ancestors, es); if (((IndexScan *) plan)->indexqualorig) show_instrumentation_count("Rows Removed by Index Recheck", 2, - planstate, es); + local_instr, es); show_scan_qual(((IndexScan *) plan)->indexorderbyorig, "Order By", planstate, ancestors, es); show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); break; case T_IndexOnlyScan: show_scan_qual(((IndexOnlyScan *) plan)->indexqual, "Index Cond", planstate, ancestors, es); if (((IndexOnlyScan *) plan)->recheckqual) show_instrumentation_count("Rows Removed by Index Recheck", 2, - planstate, es); + local_instr, es); show_scan_qual(((IndexOnlyScan *) plan)->indexorderby, "Order By", planstate, ancestors, es); show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); if (es->analyze) ExplainPropertyFloat("Heap Fetches", NULL, - planstate->instrument->ntuples2, 0, es); + local_instr->ntuples2, 0, es); break; case T_BitmapIndexScan: show_scan_qual(((BitmapIndexScan *) plan)->indexqualorig, @@ -2122,11 +2173,11 @@ ExplainNode(PlanState *planstate, List *ancestors, "Recheck Cond", planstate, ancestors, es); if (((BitmapHeapScan *) plan)->bitmapqualorig) show_instrumentation_count("Rows Removed by Index Recheck", 2, - planstate, es); + local_instr, es); show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); show_tidbitmap_info((BitmapHeapScanState *) planstate, es); break; case T_SampleScan: @@ -2143,7 +2194,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); if (IsA(plan, CteScan)) show_ctescan_info(castNode(CteScanState, planstate), es); break; @@ -2154,7 +2205,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); ExplainPropertyInteger("Workers Planned", NULL, gather->num_workers, es); @@ -2178,7 +2229,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); ExplainPropertyInteger("Workers Planned", NULL, gm->num_workers, es); @@ -2212,7 +2263,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); break; case T_TableFuncScan: if (es->verbose) @@ -2226,7 +2277,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); show_table_func_scan_info(castNode(TableFuncScanState, planstate), es); break; @@ -2244,7 +2295,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); } break; case T_TidRangeScan: @@ -2261,14 +2312,14 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); } break; case T_ForeignScan: show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); show_foreignscan_info((ForeignScanState *) planstate, es); break; case T_CustomScan: @@ -2278,7 +2329,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); if (css->methods->ExplainCustomScan) css->methods->ExplainCustomScan(css, ancestors, es); } @@ -2288,11 +2339,11 @@ ExplainNode(PlanState *planstate, List *ancestors, "Join Filter", planstate, ancestors, es); if (((NestLoop *) plan)->join.joinqual) show_instrumentation_count("Rows Removed by Join Filter", 1, - planstate, es); + local_instr, es); show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 2, - planstate, es); + local_instr, es); break; case T_MergeJoin: show_upper_qual(((MergeJoin *) plan)->mergeclauses, @@ -2301,11 +2352,11 @@ ExplainNode(PlanState *planstate, List *ancestors, "Join Filter", planstate, ancestors, es); if (((MergeJoin *) plan)->join.joinqual) show_instrumentation_count("Rows Removed by Join Filter", 1, - planstate, es); + local_instr, es); show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 2, - planstate, es); + local_instr, es); break; case T_HashJoin: show_upper_qual(((HashJoin *) plan)->hashclauses, @@ -2314,11 +2365,11 @@ ExplainNode(PlanState *planstate, List *ancestors, "Join Filter", planstate, ancestors, es); if (((HashJoin *) plan)->join.joinqual) show_instrumentation_count("Rows Removed by Join Filter", 1, - planstate, es); + local_instr, es); show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 2, - planstate, es); + local_instr, es); break; case T_Agg: show_agg_keys(castNode(AggState, planstate), ancestors, es); @@ -2326,13 +2377,13 @@ ExplainNode(PlanState *planstate, List *ancestors, show_hashagg_info((AggState *) planstate, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); break; case T_WindowAgg: show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); show_upper_qual(((WindowAgg *) plan)->runConditionOrig, "Run Condition", planstate, ancestors, es); show_windowagg_info(castNode(WindowAggState, planstate), es); @@ -2342,7 +2393,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); break; case T_Sort: show_sort_keys(castNode(SortState, planstate), ancestors, es); @@ -2364,7 +2415,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); break; case T_ModifyTable: show_modifytable_info(castNode(ModifyTableState, planstate), ancestors, @@ -2409,10 +2460,10 @@ ExplainNode(PlanState *planstate, List *ancestors, } /* Show buffer/WAL usage */ - if (es->buffers && planstate->instrument) - show_buffer_usage(es, &planstate->instrument->bufusage); - if (es->wal && planstate->instrument) - show_wal_usage(es, &planstate->instrument->walusage); + if (es->buffers && local_instr) + show_buffer_usage(es, &local_instr->bufusage); + if (es->wal && local_instr) + show_wal_usage(es, &local_instr->walusage); /* Prepare per-worker buffer/WAL usage */ if (es->workers_state && (es->buffers || es->wal) && es->verbose) @@ -3929,19 +3980,19 @@ show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es) */ static void show_instrumentation_count(const char *qlabel, int which, - PlanState *planstate, ExplainState *es) + Instrumentation *instr, ExplainState *es) { double nfiltered; double nloops; - if (!es->analyze || !planstate->instrument) + if (!es->analyze || !instr) return; if (which == 2) - nfiltered = planstate->instrument->nfiltered2; + nfiltered = instr->nfiltered2; else - nfiltered = planstate->instrument->nfiltered1; - nloops = planstate->instrument->nloops; + nfiltered = instr->nfiltered1; + nloops = instr->nloops; /* In text mode, suppress zero counts; they're not interesting enough */ if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) @@ -4612,7 +4663,7 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, { show_upper_qual((List *) node->onConflictWhere, "Conflict Filter", &mtstate->ps, ancestors, es); - show_instrumentation_count("Rows Removed by Conflict Filter", 1, &mtstate->ps, es); + show_instrumentation_count("Rows Removed by Conflict Filter", 1, (&mtstate->ps)->instrument, es); } /* EXPLAIN ANALYZE display of actual outcome for each tuple proposed */ @@ -4621,11 +4672,24 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, double total; double insert_path; double other_path; + Instrumentation *local_instr; - InstrEndLoop(outerPlanState(mtstate)->instrument); + /* Progressive explain. Use auxiliary instrumentation object */ + if (es->progressive) + { + local_instr = es->pe_local_instr; + *local_instr = *outerPlanState(mtstate)->instrument; + /* Force end loop even if node is in progress */ + InstrEndLoopForce(local_instr); + } + else + { + local_instr = outerPlanState(mtstate)->instrument; + InstrEndLoop(local_instr); + } /* count the number of source rows */ - total = outerPlanState(mtstate)->instrument->ntuples; + total = local_instr->ntuples; other_path = mtstate->ps.instrument->ntuples2; insert_path = total - other_path; @@ -4645,11 +4709,24 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, double update_path; double delete_path; double skipped_path; + Instrumentation *local_instr; - InstrEndLoop(outerPlanState(mtstate)->instrument); + /* Progressive explain. Use auxiliary instrumentation object */ + if (es->progressive) + { + local_instr = es->pe_local_instr; + *local_instr = *outerPlanState(mtstate)->instrument; + /* Force end loop even if node is in progress */ + InstrEndLoopForce(local_instr); + } + else + { + local_instr = outerPlanState(mtstate)->instrument; + InstrEndLoop(local_instr); + } /* count the number of source rows */ - total = outerPlanState(mtstate)->instrument->ntuples; + total = local_instr->ntuples; insert_path = mtstate->mt_merge_inserted; update_path = mtstate->mt_merge_updated; delete_path = mtstate->mt_merge_deleted; @@ -4931,3 +5008,654 @@ ExplainFlushWorkersState(ExplainState *es) pfree(wstate->worker_state_save); pfree(wstate); } + +/* + * ProgressiveExplainSetup + * Adjusts QueryDesc with instrumentation for progressive explains. + * + * If progressive explain is enabled and configured to collect + * instrumentation details, we adjust QueryDesc accordingly even if + * the query was not initiated with EXPLAIN ANALYZE. This will + * directly affect query execution and add computation overhead. + */ +void +ProgressiveExplainSetup(QueryDesc *queryDesc) +{ + /* Adjust instrumentation if enabled */ + if (progressive_explain == PROGRESSIVE_EXPLAIN_ANALYZE) + { + if (progressive_explain_timing) + queryDesc->instrument_options |= INSTRUMENT_TIMER; + else + queryDesc->instrument_options |= INSTRUMENT_ROWS; + if (progressive_explain_buffers) + queryDesc->instrument_options |= INSTRUMENT_BUFFERS; + if (progressive_explain_wal) + queryDesc->instrument_options |= INSTRUMENT_WAL; + } +} + +/* + * ProgressiveExplainStart + * Progressive explain start point. + */ +void +ProgressiveExplainStart(QueryDesc *queryDesc) +{ + activeQueryDesc = queryDesc; + queryDesc->pe_es = NULL; + + /* Timeout is only needed if duration > 0 */ + if (progressive_explain_min_duration == 0) + ProgressiveExplainInit(queryDesc); + else + enable_timeout_after(PROGRESSIVE_EXPLAIN_STARTUP_TIMEOUT, + progressive_explain_min_duration); +} + +/* + * ExecProcNodeWithExplain + * Responsible for initialization of all structures related to progressive + * explains. + * + * We define a ExplainState that will be reused in every iteration of + * plan prints. + * + * Progressive explain plans are printed in shared memory via DSAs. Each + * A dynamic shared memory area is created to hold the progressive plans. + * Each backend printing plans has its own DSA, which is shared with other + * backends via the global progressive explain hash through dsa_handle and + * dsa_pointer pointers. + * + * A memory context release callback is defined for manual resource release + * in case of query cancellations. + * + * A periodic timeout is configured to print the plan in fixed intervals if + * progressive explain is configured with instrumentation enabled. Otherwise + * the plain plan is printed once. + */ +void +ProgressiveExplainInit(QueryDesc *queryDesc) +{ + ExplainState *es; + progressiveExplainHashKey key; + progressiveExplainHashEntry *entry; + bool found; + + /* Configure memory context release callback */ + MemoryContextCallback *queryDescReleaseCallback; + + queryDescReleaseCallback = (MemoryContextCallback *) + palloc0(sizeof(MemoryContextCallback)); + queryDescReleaseCallback->func = ProgressiveExplainReleaseFunc; + queryDescReleaseCallback->arg = NULL; + MemoryContextRegisterResetCallback(CurrentMemoryContext, + queryDescReleaseCallback); + + /* Initialize ExplainState to be used for all prints */ + es = NewExplainState(); + queryDesc->pe_es = es; + + /* Local instrumentation object to be reused for every node */ + es->pe_local_instr = palloc0(sizeof(Instrumentation)); + + /* + * Mark ExplainState as progressive so that the plan printing function + * adjusts logic accordingly. + */ + es->progressive = true; + + es->analyze = (queryDesc->instrument_options && + (progressive_explain == PROGRESSIVE_EXPLAIN_ANALYZE)); + es->buffers = (es->analyze && progressive_explain_buffers); + es->wal = (es->analyze && progressive_explain_wal); + es->timing = (es->analyze && progressive_explain_timing); + es->summary = (es->analyze); + es->format = progressive_explain_format; + es->verbose = progressive_explain_verbose; + es->settings = progressive_explain_settings; + + /* Define the DSA and share through the hash */ + es->pe_a = dsa_create(LWTRANCHE_PROGRESSIVE_EXPLAIN_DSA); + + /* Exclusive access is needed to update the hash */ + LWLockAcquire(ExplainHashLock, LW_EXCLUSIVE); + + /* Find or create an entry with desired hash code */ + key.pid = MyProcPid; + entry = (progressiveExplainHashEntry *) hash_search(progressiveExplainArray, + &key, + HASH_ENTER, + &found); + + entry->h = dsa_get_handle(es->pe_a); + entry->p = (dsa_pointer) NULL; + + LWLockRelease(ExplainHashLock); + + /* Enable timeout only if instrumentation is enabled */ + if (es->analyze) + enable_timeout_every(PROGRESSIVE_EXPLAIN_TIMEOUT, + TimestampTzPlusMilliseconds(GetCurrentTimestamp(), + progressive_explain_interval), + progressive_explain_interval); + + /* Printing progressive plan for the first time */ + ProgressiveExplainPrint(queryDesc); +} + +/* + * ProgressiveExplainTrigger + * Called by the timeout handler to start printing progressive + * explain plans. + */ +void +ProgressiveExplainTrigger(void) +{ + WrapExecProcNodeWithExplain(activeQueryDesc->planstate); +} + +/* + * ProgressiveExplainUpdate + * Updates progressive explain for instrumented runs. + */ +void +ProgressiveExplainUpdate(PlanState *node) +{ + /* Track the current PlanState */ + node->state->query_desc->pe_es->pe_curr_node = node; + ProgressiveExplainPrint(node->state->query_desc); + node->state->query_desc->pe_es->pe_curr_node = NULL; + + /* Reset timeout flag */ + ProgressiveExplainPending = false; +} + +/* + * ProgressiveExplainPrint + * Prints progressive explain in memory. + * + * This function resets the reusable ExplainState, prints the + * plan and updates the DSA with new data. + * + * DSA memory allocation is also done here. Amount of shared + * memory allocated depends on size of currently printed plan. + * There may be reallocations in subsequent calls if new plans + * don't fit in the existing area. + */ +void +ProgressiveExplainPrint(QueryDesc *queryDesc) +{ + bool alloc_needed = false; + + /* Produce a plan only if descriptor is being tracked */ + if (queryDesc && + queryDesc->planstate) + { + QueryDesc *currentQueryDesc = queryDesc; + + progressiveExplainHashKey key; + progressiveExplainHashEntry *entry; + progressiveExplainData *pe_data; + ExplainState *es = queryDesc->pe_es; + + /* Reset the string to be reused */ + resetStringInfo(es->str); + + /* Print the plan */ + ExplainBeginOutput(es); + ExplainPrintPlan(es, currentQueryDesc); + ExplainEndOutput(es); + + /* Exclusive access is needed to update the hash */ + key.pid = MyProcPid; + LWLockAcquire(ExplainHashLock, LW_EXCLUSIVE); + entry = (progressiveExplainHashEntry *) hash_search(progressiveExplainArray, + &key, + HASH_FIND, + NULL); + + /* Entry exists */ + if (entry) + { + /* Plan was never printed */ + if (!entry->p) + alloc_needed = true; + else + { + pe_data = dsa_get_address(es->pe_a, + entry->p); + + /* + * Plan does not fit in existing shared memory area. + * Reallocation is needed. + */ + if (strlen(es->str->data) > + add_size(strlen(pe_data->plan), + PROGRESSIVE_EXPLAIN_ALLOC_SIZE)) + { + dsa_free(es->pe_a, entry->p); + alloc_needed = true; + } + } + + if (alloc_needed) + { + /* + * The allocated size combines the length of the currently + * printed query plan with an additional delta defined by + * PROGRESSIVE_EXPLAIN_ALLOC_SIZE. This strategy prevents + * having to reallocate the segment very often, which would be + * needed in case the length of the next printed exceeds the + * previously allocated size. + */ + entry->p = dsa_allocate(es->pe_a, + add_size(sizeof(progressiveExplainData), + add_size(strlen(es->str->data), + PROGRESSIVE_EXPLAIN_ALLOC_SIZE))); + pe_data = dsa_get_address(es->pe_a, entry->p); + pe_data->pid = MyProcPid; + } + + /* Update shared memory with new data */ + strcpy(pe_data->plan, es->str->data); + pe_data->last_print = GetCurrentTimestamp(); + } + + LWLockRelease(ExplainHashLock); + } +} + +/* + * ProgressiveExplainFinish + * Finalizes query execution with progressive explain enabled. + */ +void +ProgressiveExplainFinish(QueryDesc *queryDesc) +{ + ProgressiveExplainCleanup(queryDesc); +} + +/* + * ProgressiveExplainCleanup + * Cleanup routine when progressive explain is enabled. + * + * We need deal with structures not automatically released by the memory + * context removal. Current tasks are: + * - remove local backend from progressive explain hash + * - detach from DSA used to store shared data + */ +void +ProgressiveExplainCleanup(QueryDesc *queryDesc) +{ + progressiveExplainHashKey key; + + /* Startup timeout hasn't triggered yet, just disable it */ + if (get_timeout_active(PROGRESSIVE_EXPLAIN_STARTUP_TIMEOUT)) + { + disable_timeout(PROGRESSIVE_EXPLAIN_STARTUP_TIMEOUT, false); + } + /* Initial progressive explain was done, clean everything */ + else + { + /* Stop timeout */ + disable_timeout(PROGRESSIVE_EXPLAIN_TIMEOUT, false); + + /* Reset timeout flag */ + ProgressiveExplainPending = false; + + key.pid = MyProcPid; + LWLockAcquire(ExplainHashLock, LW_EXCLUSIVE); + if (queryDesc && queryDesc->pe_es->pe_a) + dsa_detach(queryDesc->pe_es->pe_a); + hash_search(progressiveExplainArray, &key, HASH_REMOVE, NULL); + LWLockRelease(ExplainHashLock); + } +} + +/* + * ExecProcNodeWithExplain + * ExecProcNode wrapper that initializes progressive explain + * and uwraps ExecProdNode to the original function. + */ +static TupleTableSlot * +ExecProcNodeExplain(PlanState *node) +{ + /* Initialize progressive explain */ + ProgressiveExplainInit(node->state->query_desc); + + /* Unwrap exec proc node for all nodes */ + UnwrapExecProcNodeWithExplain(node->state->query_desc->planstate); + + /* + * Since unwrapping has already done, call ExecProcNode() not + * ExecProcNodeOriginal(). + */ + return node->ExecProcNode(node); +} + +/* + * ExecProcNode wrapper that performs instrumentation calls and prints + * progressive explains. By keeping this a separate function, we add + * overhead only when progressive explain is enabled + */ +TupleTableSlot * +ExecProcNodeInstrExplain(PlanState *node) +{ + TupleTableSlot *result; + + InstrStartNode(node->instrument); + + /* + * Update progressive after timeout is reached. + */ + if (ProgressiveExplainPending) + ProgressiveExplainUpdate(node); + + result = node->ExecProcNodeReal(node); + + InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0); + + return result; +} + +/* + * WrapMultiExecProcNodesWithExplain - + * Wrap array of PlanStates ExecProcNodes with ExecProcNodeWithExplain + */ +static void +WrapMultiExecProcNodesWithExplain(PlanState **planstates, int nplans) +{ + int i; + + for (i = 0; i < nplans; i++) + WrapExecProcNodeWithExplain(planstates[i]); +} + +/* + * WrapCustomPlanChildExecProcNodesWithExplain - + * Wrap CustomScanstate children's ExecProcNodes with ExecProcNodeWithExplain + */ +static void +WrapCustomPlanChildExecProcNodesWithExplain(CustomScanState *css) +{ + ListCell *cell; + + foreach(cell, css->custom_ps) + WrapExecProcNodeWithExplain((PlanState *) lfirst(cell)); +} + +/* + * WrapExecProcNodeWithExplain - + * Wrap ExecProcNode with ExecProcNodeWithExplain recursively + */ +static void +WrapExecProcNodeWithExplain(PlanState *ps) +{ + /* wrapping can be done only once */ + if (ps->ExecProcNodeOriginal != NULL) + return; + + check_stack_depth(); + + ps->ExecProcNodeOriginal = ps->ExecProcNode; + ps->ExecProcNode = ExecProcNodeExplain; + + if (ps->lefttree != NULL) + WrapExecProcNodeWithExplain(ps->lefttree); + if (ps->righttree != NULL) + WrapExecProcNodeWithExplain(ps->righttree); + + /* special child plans */ + switch (nodeTag(ps->plan)) + { + case T_Append: + ereport(DEBUG1, errmsg("wrapping Append")); + WrapMultiExecProcNodesWithExplain(((AppendState *) ps)->appendplans, + ((AppendState *) ps)->as_nplans); + break; + case T_MergeAppend: + ereport(DEBUG1, errmsg("wrapping MergeAppend")); + WrapMultiExecProcNodesWithExplain(((MergeAppendState *) ps)->mergeplans, + ((MergeAppendState *) ps)->ms_nplans); + break; + case T_BitmapAnd: + ereport(DEBUG1, errmsg("wrapping BitmapAndState")); + WrapMultiExecProcNodesWithExplain(((BitmapAndState *) ps)->bitmapplans, + ((BitmapAndState *) ps)->nplans); + break; + case T_BitmapOr: + ereport(DEBUG1, errmsg("wrapping BitmapOrtate")); + WrapMultiExecProcNodesWithExplain(((BitmapOrState *) ps)->bitmapplans, + ((BitmapOrState *) ps)->nplans); + break; + case T_SubqueryScan: + ereport(DEBUG1, errmsg("wrapping Subquery")); + WrapExecProcNodeWithExplain(((SubqueryScanState *) ps)->subplan); + break; + case T_CustomScan: + ereport(DEBUG1, errmsg("wrapping CustomScanState")); + WrapCustomPlanChildExecProcNodesWithExplain((CustomScanState *) ps); + break; + default: + break; + } +} + +/* + * UnwrapMultiExecProcNodesWithExplain - + * Unwrap array of PlanStates ExecProcNodes with ExecProcNodeWithExplain + */ +static void +UnwrapMultiExecProcNodesWithExplain(PlanState **planstates, int nplans) +{ + int i; + + for (i = 0; i < nplans; i++) + UnwrapExecProcNodeWithExplain(planstates[i]); +} + +/* + * UnwrapCustomPlanChildExecProcNodesWithExplain - + * Unwrap CustomScanstate children's ExecProcNodes with ExecProcNodeWithExplain + */ +static void +UnwrapCustomPlanChildExecProcNodesWithExplain(CustomScanState *css) +{ + ListCell *cell; + + foreach(cell, css->custom_ps) + UnwrapExecProcNodeWithExplain((PlanState *) lfirst(cell)); +} + +/* + * UnwrapExecProcNodeWithExplain - + * Unwrap ExecProcNode with ExecProcNodeWithExplain recursively + */ +static void +UnwrapExecProcNodeWithExplain(PlanState *ps) +{ + Assert(ps->ExecProcNodeOriginal != NULL); + + check_stack_depth(); + + ps->ExecProcNode = ps->ExecProcNodeOriginal; + ps->ExecProcNodeOriginal = NULL; + + if (ps->lefttree != NULL) + UnwrapExecProcNodeWithExplain(ps->lefttree); + if (ps->righttree != NULL) + UnwrapExecProcNodeWithExplain(ps->righttree); + + /* special child plans */ + switch (nodeTag(ps->plan)) + { + case T_Append: + ereport(DEBUG1, errmsg("unwrapping Append")); + UnwrapMultiExecProcNodesWithExplain(((AppendState *) ps)->appendplans, + ((AppendState *) ps)->as_nplans); + break; + case T_MergeAppend: + ereport(DEBUG1, errmsg("unwrapping MergeAppend")); + UnwrapMultiExecProcNodesWithExplain(((MergeAppendState *) ps)->mergeplans, + ((MergeAppendState *) ps)->ms_nplans); + break; + case T_BitmapAnd: + ereport(DEBUG1, errmsg("unwrapping BitmapAndState")); + UnwrapMultiExecProcNodesWithExplain(((BitmapAndState *) ps)->bitmapplans, + ((BitmapAndState *) ps)->nplans); + break; + case T_BitmapOr: + ereport(DEBUG1, errmsg("unwrapping BitmapOrtate")); + UnwrapMultiExecProcNodesWithExplain(((BitmapOrState *) ps)->bitmapplans, + ((BitmapOrState *) ps)->nplans); + break; + case T_SubqueryScan: + ereport(DEBUG1, errmsg("unwrapping Subquery")); + UnwrapExecProcNodeWithExplain(((SubqueryScanState *) ps)->subplan); + break; + case T_CustomScan: + ereport(DEBUG1, errmsg("unwrapping CustomScanState")); + UnwrapCustomPlanChildExecProcNodesWithExplain((CustomScanState *) ps); + break; + default: + break; + } +} + +/* + * ProgressiveExplainReleaseFunc + * Memory context release callback function to remove + * plan from explain hash and disable the timeout. + */ +static void +ProgressiveExplainReleaseFunc(void *arg) +{ + /* Remove row from hash */ + progressiveExplainHashKey key; + progressiveExplainHashEntry *entry; + + key.pid = MyProcPid; + LWLockAcquire(ExplainHashLock, LW_SHARED); + entry = (progressiveExplainHashEntry *) hash_search(progressiveExplainArray, + &key, + HASH_FIND, + NULL); + LWLockRelease(ExplainHashLock); + if (entry) + ProgressiveExplainCleanup(NULL); +} + +/* + * ProgressiveExplainHashShmemSize + * Compute shared memory space needed for explain hash. + */ +Size +ProgressiveExplainHashShmemSize(void) +{ + Size size = 0; + long max_table_size; + + max_table_size = add_size(MaxBackends, + max_parallel_workers); + size = add_size(size, + hash_estimate_size(max_table_size, + sizeof(progressiveExplainHashEntry))); + + return size; +} + +/* + * InitProgressiveExplainHash + * Initialize hash used to store data of progressive explains. + */ +void +InitProgressiveExplainHash(void) +{ + HASHCTL info; + + info.keysize = sizeof(progressiveExplainHashKey); + info.entrysize = sizeof(progressiveExplainHashEntry); + + progressiveExplainArray = ShmemInitHash("progressive explain hash", + 50, 50, + &info, + HASH_ELEM | HASH_BLOBS); +} + +/* + * pg_stat_progress_explain + * Return the progress of progressive explains. + */ +Datum +pg_stat_progress_explain(PG_FUNCTION_ARGS) +{ +#define EXPLAIN_ACTIVITY_COLS 3 + HASH_SEQ_STATUS hash_seq; + progressiveExplainHashEntry *entry; + dsa_area *a; + progressiveExplainData *ped; + + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + + InitMaterializedSRF(fcinfo, 0); + + LWLockAcquire(ExplainHashLock, LW_SHARED); + + hash_seq_init(&hash_seq, progressiveExplainArray); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + Datum values[EXPLAIN_ACTIVITY_COLS] = {0}; + bool nulls[EXPLAIN_ACTIVITY_COLS] = {0}; + + /* + * We don't look at a DSA that doesn't contain data yet, or at our own + * row. + */ + if (!DsaPointerIsValid(entry->p) || + MyProcPid == entry->key.pid) + continue; + + a = dsa_attach(entry->h); + ped = dsa_get_address(a, entry->p); + + values[0] = ped->pid; + values[1] = TimestampTzGetDatum(ped->last_print); + + if (superuser()) + values[2] = CStringGetTextDatum(ped->plan); + else + { + int num_backends = pgstat_fetch_stat_numbackends(); + int curr_backend; + + /* 1-based index */ + for (curr_backend = 1; curr_backend <= num_backends; curr_backend++) + { + LocalPgBackendStatus *local_beentry; + PgBackendStatus *beentry; + + /* Get the next one in the list */ + local_beentry = pgstat_get_local_beentry_by_index(curr_backend); + beentry = &local_beentry->backendStatus; + + if (beentry->st_procpid == ped->pid) + { + if (beentry->st_userid == GetUserId()) + values[2] = CStringGetTextDatum(ped->plan); + else + values[2] = CStringGetTextDatum(""); + break; + } + } + } + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + + dsa_detach(a); + + } + LWLockRelease(ExplainHashLock); + + return (Datum) 0; +} diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 0493b7d5365..52b8b2bd1f7 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -43,6 +43,7 @@ #include "access/xact.h" #include "catalog/namespace.h" #include "catalog/partition.h" +#include "commands/explain.h" #include "commands/matview.h" #include "commands/trigger.h" #include "executor/executor.h" @@ -157,6 +158,12 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) /* caller must ensure the query's snapshot is active */ Assert(GetActiveSnapshot() == queryDesc->snapshot); + /* + * Setup progressive explain if enabled. + */ + if (progressive_explain == PROGRESSIVE_EXPLAIN_ANALYZE) + ProgressiveExplainSetup(queryDesc); + /* * If the transaction is read-only, we need to check if any writes are * planned to non-temporary tables. EXPLAIN is considered read-only. @@ -182,6 +189,11 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) estate = CreateExecutorState(); queryDesc->estate = estate; + /* + * Adding back reference to QueryDesc + */ + estate->query_desc = queryDesc; + oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); /* @@ -267,6 +279,12 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) */ InitPlan(queryDesc, eflags); + /* + * Start progressive explain if enabled. + */ + if (progressive_explain != PROGRESSIVE_EXPLAIN_NONE) + ProgressiveExplainStart(queryDesc); + MemoryContextSwitchTo(oldcontext); return ExecPlanStillValid(queryDesc->estate); @@ -516,6 +534,12 @@ standard_ExecutorFinish(QueryDesc *queryDesc) MemoryContextSwitchTo(oldcontext); + /* + * Finish progressive explain if enabled. + */ + if (progressive_explain != PROGRESSIVE_EXPLAIN_NONE) + ProgressiveExplainFinish(queryDesc); + estate->es_finished = true; } diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index f5f9cfbeead..3af8e9d964d 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -72,6 +72,7 @@ */ #include "postgres.h" +#include "commands/explain.h" #include "executor/executor.h" #include "executor/nodeAgg.h" #include "executor/nodeAppend.h" @@ -118,6 +119,7 @@ #include "executor/nodeWorktablescan.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" +#include "utils/guc.h" static TupleTableSlot *ExecProcNodeFirst(PlanState *node); static TupleTableSlot *ExecProcNodeInstr(PlanState *node); @@ -461,8 +463,14 @@ ExecProcNodeFirst(PlanState *node) * does instrumentation. Otherwise we can dispense with all wrappers and * have ExecProcNode() directly call the relevant function from now on. */ + if (node->instrument) - node->ExecProcNode = ExecProcNodeInstr; + { + if (progressive_explain == PROGRESSIVE_EXPLAIN_ANALYZE) + node->ExecProcNode = ExecProcNodeInstrExplain; + else + node->ExecProcNode = ExecProcNodeInstr; + } else node->ExecProcNode = node->ExecProcNodeReal; diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index 56e635f4700..6a160ee254f 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -25,6 +25,8 @@ static WalUsage save_pgWalUsage; static void BufferUsageAdd(BufferUsage *dst, const BufferUsage *add); static void WalUsageAdd(WalUsage *dst, WalUsage *add); +static void InstrEndLoopInternal(Instrumentation *instr, bool force); + /* Allocate new instrumentation structure(s) */ Instrumentation * @@ -137,7 +139,7 @@ InstrUpdateTupleCount(Instrumentation *instr, double nTuples) /* Finish a run cycle for a plan node */ void -InstrEndLoop(Instrumentation *instr) +InstrEndLoopInternal(Instrumentation *instr, bool force) { double totaltime; @@ -145,7 +147,7 @@ InstrEndLoop(Instrumentation *instr) if (!instr->running) return; - if (!INSTR_TIME_IS_ZERO(instr->starttime)) + if (!INSTR_TIME_IS_ZERO(instr->starttime) && !force) elog(ERROR, "InstrEndLoop called on running node"); /* Accumulate per-cycle statistics into totals */ @@ -164,6 +166,20 @@ InstrEndLoop(Instrumentation *instr) instr->tuplecount = 0; } +/* Safely finish a run cycle for a plan node */ +void +InstrEndLoop(Instrumentation *instr) +{ + InstrEndLoopInternal(instr, false); +} + +/* Forcibly finish a run cycle for a plan node */ +void +InstrEndLoopForce(Instrumentation *instr) +{ + InstrEndLoopInternal(instr, true); +} + /* aggregate instrumentation information */ void InstrAggNode(Instrumentation *dst, Instrumentation *add) diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 174eed70367..15d8a3b28a8 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -25,6 +25,7 @@ #include "access/xlogprefetcher.h" #include "access/xlogrecovery.h" #include "commands/async.h" +#include "commands/explain.h" #include "miscadmin.h" #include "pgstat.h" #include "postmaster/autovacuum.h" @@ -148,6 +149,7 @@ CalculateShmemSize(int *num_semaphores) size = add_size(size, WaitEventCustomShmemSize()); size = add_size(size, InjectionPointShmemSize()); size = add_size(size, SlotSyncShmemSize()); + size = add_size(size, ProgressiveExplainHashShmemSize()); /* include additional requested shmem from preload libraries */ size = add_size(size, total_addin_request); @@ -300,6 +302,11 @@ CreateOrAttachShmemStructs(void) */ PredicateLockShmemInit(); + /* + * Set up instrumented explain hash table + */ + InitProgressiveExplainHash(); + /* * Set up process table */ diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 8adf2730277..2b4393d3635 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -176,6 +176,7 @@ static const char *const BuiltinTrancheNames[] = { [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU", [LWTRANCHE_XACT_SLRU] = "XactSLRU", [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA", + [LWTRANCHE_PROGRESSIVE_EXPLAIN_DSA] = "ProgressiveExplainDSA", }; StaticAssertDecl(lengthof(BuiltinTrancheNames) == diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt index e199f071628..890acf02da9 100644 --- a/src/backend/utils/activity/wait_event_names.txt +++ b/src/backend/utils/activity/wait_event_names.txt @@ -346,6 +346,7 @@ WALSummarizer "Waiting to read or update WAL summarization state." DSMRegistry "Waiting to read or update the dynamic shared memory registry." InjectionPoint "Waiting to read or update information related to injection points." SerialControl "Waiting to read or update shared pg_serial state." +ExplainHash "Waiting to access backend explain shared hash table." # # END OF PREDEFINED LWLOCKS (DO NOT CHANGE THIS LINE) diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index ee1a9d5d98b..6aee6f08b75 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -33,6 +33,7 @@ #include "catalog/pg_database.h" #include "catalog/pg_db_role_setting.h" #include "catalog/pg_tablespace.h" +#include "commands/explain.h" #include "libpq/auth.h" #include "libpq/libpq-be.h" #include "mb/pg_wchar.h" @@ -80,6 +81,8 @@ static void TransactionTimeoutHandler(void); static void IdleSessionTimeoutHandler(void); static void IdleStatsUpdateTimeoutHandler(void); static void ClientCheckTimeoutHandler(void); +static void ProgressiveExplainStartupTimeoutHandler(void); +static void ProgressiveExplainTimeoutHandler(void); static bool ThereIsAtLeastOneRole(void); static void process_startup_options(Port *port, bool am_superuser); static void process_settings(Oid databaseid, Oid roleid); @@ -757,6 +760,10 @@ InitPostgres(const char *in_dbname, Oid dboid, RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler); RegisterTimeout(IDLE_STATS_UPDATE_TIMEOUT, IdleStatsUpdateTimeoutHandler); + RegisterTimeout(PROGRESSIVE_EXPLAIN_TIMEOUT, + ProgressiveExplainTimeoutHandler); + RegisterTimeout(PROGRESSIVE_EXPLAIN_STARTUP_TIMEOUT, + ProgressiveExplainStartupTimeoutHandler); } /* @@ -1418,6 +1425,18 @@ ClientCheckTimeoutHandler(void) SetLatch(MyLatch); } +static void +ProgressiveExplainStartupTimeoutHandler(void) +{ + ProgressiveExplainTrigger(); +} + +static void +ProgressiveExplainTimeoutHandler(void) +{ + ProgressiveExplainPending = true; +} + /* * Returns true if at least one role is defined in this database cluster. */ diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index ad25cbb39c5..e6cd1d8781c 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -40,6 +40,7 @@ #include "catalog/storage.h" #include "commands/async.h" #include "commands/event_trigger.h" +#include "commands/explain.h" #include "commands/tablespace.h" #include "commands/trigger.h" #include "commands/user.h" @@ -476,6 +477,22 @@ static const struct config_enum_entry wal_compression_options[] = { {NULL, 0, false} }; +static const struct config_enum_entry explain_format_options[] = { + {"text", EXPLAIN_FORMAT_TEXT, false}, + {"xml", EXPLAIN_FORMAT_XML, false}, + {"json", EXPLAIN_FORMAT_JSON, false}, + {"yaml", EXPLAIN_FORMAT_YAML, false}, + {NULL, 0, false} +}; + +static const struct config_enum_entry progressive_explain_options[] = { + {"off", PROGRESSIVE_EXPLAIN_NONE, false}, + {"explain", PROGRESSIVE_EXPLAIN_EXPLAIN, false}, + {"analyze", PROGRESSIVE_EXPLAIN_ANALYZE, false}, + {"false", PROGRESSIVE_EXPLAIN_NONE, true}, + {NULL, 0, false} +}; + /* * Options for enum values stored in other modules */ @@ -530,6 +547,15 @@ int log_parameter_max_length_on_error = 0; int log_temp_files = -1; double log_statement_sample_rate = 1.0; double log_xact_sample_rate = 0; +int progressive_explain = PROGRESSIVE_EXPLAIN_NONE; +bool progressive_explain_verbose = false; +bool progressive_explain_settings = false; +bool progressive_explain_timing = false; +bool progressive_explain_buffers = false; +bool progressive_explain_wal = false; +int progressive_explain_min_duration = 1000; +int progressive_explain_interval = 1000; +int progressive_explain_format = EXPLAIN_FORMAT_TEXT; char *backtrace_functions; int temp_file_limit = -1; @@ -2118,6 +2144,61 @@ struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"progressive_explain_verbose", PGC_USERSET, STATS_MONITORING, + gettext_noop("Controls whether verbose details are added to progressive explains."), + gettext_noop("Equivalent to the VERBOSE option of EXPLAIN."), + GUC_EXPLAIN + }, + &progressive_explain_verbose, + false, + NULL, NULL, NULL + }, + + { + {"progressive_explain_settings", PGC_USERSET, STATS_MONITORING, + gettext_noop("Controls whether information about modified configuration is added to progressive explains."), + gettext_noop("Equivalent to the SETTINGS option of EXPLAIN."), + GUC_EXPLAIN + }, + &progressive_explain_settings, + false, + NULL, NULL, NULL + }, + + { + {"progressive_explain_timing", PGC_USERSET, STATS_MONITORING, + gettext_noop("Controls whether information about per node timing is added to progressive explains."), + gettext_noop("Equivalent to the TIMING option of EXPLAIN."), + GUC_EXPLAIN + }, + &progressive_explain_timing, + true, + NULL, NULL, NULL + }, + + { + {"progressive_explain_buffers", PGC_USERSET, STATS_MONITORING, + gettext_noop("Controls whether information about buffers is added to progressive explains."), + gettext_noop("Equivalent to the BUFFERS option of EXPLAIN."), + GUC_EXPLAIN + }, + &progressive_explain_buffers, + false, + NULL, NULL, NULL + }, + + { + {"progressive_explain_wal", PGC_USERSET, STATS_MONITORING, + gettext_noop("Controls whether information about WAL record generation is added to progressive explains."), + gettext_noop("Equivalent to the WAL option of EXPLAIN."), + GUC_EXPLAIN + }, + &progressive_explain_wal, + false, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL @@ -3785,6 +3866,30 @@ struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"progressive_explain_interval", PGC_USERSET, STATS_MONITORING, + gettext_noop("Sets the interval between instrumented progressive " + "explains."), + NULL, + GUC_UNIT_MS + }, + &progressive_explain_interval, + 1000, 10, INT_MAX, + NULL, NULL, NULL + }, + + { + {"progressive_explain_min_duration", PGC_USERSET, STATS_MONITORING, + gettext_noop("Min query duration to start printing instrumented " + "progressive explains."), + NULL, + GUC_UNIT_MS + }, + &progressive_explain_min_duration, + 1000, 0, INT_MAX, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL @@ -5299,6 +5404,26 @@ struct config_enum ConfigureNamesEnum[] = NULL, NULL, NULL }, + { + {"progressive_explain_format", PGC_USERSET, STATS_MONITORING, + gettext_noop("Selects the EXPLAIN output format to be used with progressive explains."), + gettext_noop("Equivalent to the FORMAT option of EXPLAIN.") + }, + &progressive_explain_format, + EXPLAIN_FORMAT_TEXT, explain_format_options, + NULL, NULL, NULL + }, + + { + {"progressive_explain", PGC_USERSET, STATS_MONITORING, + gettext_noop("Enables progressive explains."), + gettext_noop("Explain output is visible via pg_stat_progress_explain.") + }, + &progressive_explain, + PROGRESSIVE_EXPLAIN_NONE, progressive_explain_options, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 2d1de9c37bd..7da505564d4 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -655,6 +655,19 @@ #log_executor_stats = off +# - Progressive Explain - + +#progressive_explain = off +#progressive_explain_min_duration = 1s +#progressive_explain_interval = 1s +#progressive_explain_format = text +#progressive_explain_settings = off +#progressive_explain_verbose = off +#progressive_explain_buffers = off +#progressive_explain_wal = off +#progressive_explain_timing = off + + #------------------------------------------------------------------------------ # VACUUMING #------------------------------------------------------------------------------ diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 134b3dd8689..67be2633e50 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12450,4 +12450,14 @@ proargtypes => 'int4', prosrc => 'gist_stratnum_common' }, +{ oid => '8770', + descr => 'statistics: information about progress of backends running statements', + proname => 'pg_stat_progress_explain', prorows => '100', proisstrict => 'f', + proretset => 't', provolatile => 's', proparallel => 'r', + prorettype => 'record', proargtypes => 'bool', + proallargtypes => '{bool,int4,timestamptz,text}', + proargmodes => '{i,o,o,o}', + proargnames => '{mode,pid,last_print,query_plan}', + prosrc => 'pg_stat_progress_explain' }, + ] diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h index 64547bd9b9c..50e90815936 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -13,6 +13,7 @@ #ifndef EXPLAIN_H #define EXPLAIN_H +#include "datatype/timestamp.h" #include "executor/executor.h" #include "lib/stringinfo.h" #include "parser/parse_node.h" @@ -32,6 +33,13 @@ typedef enum ExplainFormat EXPLAIN_FORMAT_YAML, } ExplainFormat; +typedef enum ProgressiveExplain +{ + PROGRESSIVE_EXPLAIN_NONE, + PROGRESSIVE_EXPLAIN_EXPLAIN, + PROGRESSIVE_EXPLAIN_ANALYZE, +} ProgressiveExplain; + typedef struct ExplainWorkersState { int num_workers; /* # of worker processes the plan used */ @@ -67,12 +75,37 @@ typedef struct ExplainState List *deparse_cxt; /* context list for deparsing expressions */ Bitmapset *printed_subplans; /* ids of SubPlans we've printed */ bool hide_workers; /* set if we find an invisible Gather */ + bool progressive; /* set if tracking a progressive explain */ int rtable_size; /* length of rtable excluding the RTE_GROUP * entry */ /* state related to the current plan node */ ExplainWorkersState *workers_state; /* needed if parallel plan */ + + /* state related to progressive explains */ + struct PlanState *pe_curr_node; + struct Instrumentation *pe_local_instr; + dsa_area *pe_a; } ExplainState; +typedef struct progressiveExplainHashKey +{ + int pid; /* PID */ +} progressiveExplainHashKey; + +typedef struct progressiveExplainHashEntry +{ + progressiveExplainHashKey key; /* hash key of entry - MUST BE FIRST */ + dsa_handle h; + dsa_pointer p; +} progressiveExplainHashEntry; + +typedef struct progressiveExplainData +{ + int pid; + TimestampTz last_print; + char plan[]; +} progressiveExplainData; + /* Hook for plugins to get control in ExplainOneQuery() */ typedef void (*ExplainOneQuery_hook_type) (Query *query, int cursorOptions, @@ -120,4 +153,15 @@ extern void ExplainPrintJITSummary(ExplainState *es, QueryDesc *queryDesc); extern void ExplainQueryText(ExplainState *es, QueryDesc *queryDesc); extern void ExplainQueryParameters(ExplainState *es, ParamListInfo params, int maxlen); +extern void ProgressiveExplainSetup(QueryDesc *queryDesc); +extern void ProgressiveExplainStart(QueryDesc *queryDesc); +extern void ProgressiveExplainTrigger(void); +extern void ProgressiveExplainUpdate(PlanState *node); +extern void ProgressiveExplainFinish(QueryDesc *queryDesc); +extern Size ProgressiveExplainHashShmemSize(void); +extern void InitProgressiveExplainHash(void); +extern TupleTableSlot *ExecProcNodeInstrExplain(PlanState *node); + +extern bool ProgressiveExplainPending; + #endif /* EXPLAIN_H */ diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h index ba53305ad42..a6d2362c6c9 100644 --- a/src/include/executor/execdesc.h +++ b/src/include/executor/execdesc.h @@ -48,6 +48,7 @@ typedef struct QueryDesc TupleDesc tupDesc; /* descriptor for result tuples */ EState *estate; /* executor's query-wide state */ PlanState *planstate; /* tree of per-plan-node state */ + struct ExplainState *pe_es; /* progressive explain state if enabled */ /* This field is set by ExecutePlan */ bool already_executed; /* true if previously executed */ diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index 03653ab6c6c..21de2a5632d 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -109,6 +109,7 @@ extern void InstrStartNode(Instrumentation *instr); extern void InstrStopNode(Instrumentation *instr, double nTuples); extern void InstrUpdateTupleCount(Instrumentation *instr, double nTuples); extern void InstrEndLoop(Instrumentation *instr); +extern void InstrEndLoopForce(Instrumentation *instr); extern void InstrAggNode(Instrumentation *dst, Instrumentation *add); extern void InstrStartParallelQuery(void); extern void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index a323fa98bbb..3ace9a88636 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -57,6 +57,7 @@ struct ExprState; struct ExprContext; struct RangeTblEntry; /* avoid including parsenodes.h here */ struct ExprEvalStep; /* avoid including execExpr.h everywhere */ +struct QueryDesc; /* avoid including execdesc.h here */ struct CopyMultiInsertBuffer; struct LogicalTapeSet; @@ -763,6 +764,9 @@ typedef struct EState */ List *es_insert_pending_result_relations; List *es_insert_pending_modifytables; + + /* Reference to query descriptor */ + struct QueryDesc *query_desc; } EState; @@ -1159,6 +1163,8 @@ typedef struct PlanState ExecProcNodeMtd ExecProcNode; /* function to return next tuple */ ExecProcNodeMtd ExecProcNodeReal; /* actual function, if above is a * wrapper */ + ExecProcNodeMtd ExecProcNodeOriginal; /* pointer to original function + * another wrapper was added */ Instrumentation *instrument; /* Optional runtime stats for this node */ WorkerInstrumentation *worker_instrument; /* per-worker instrumentation */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 13a7dc89980..eef4af1a3e7 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -217,6 +217,7 @@ typedef enum BuiltinTrancheIds LWTRANCHE_SUBTRANS_SLRU, LWTRANCHE_XACT_SLRU, LWTRANCHE_PARALLEL_VACUUM_DSA, + LWTRANCHE_PROGRESSIVE_EXPLAIN_DSA, LWTRANCHE_FIRST_USER_DEFINED, } BuiltinTrancheIds; diff --git a/src/include/storage/lwlocklist.h b/src/include/storage/lwlocklist.h index cf565452382..43f10a51862 100644 --- a/src/include/storage/lwlocklist.h +++ b/src/include/storage/lwlocklist.h @@ -83,3 +83,4 @@ PG_LWLOCK(49, WALSummarizer) PG_LWLOCK(50, DSMRegistry) PG_LWLOCK(51, InjectionPoint) PG_LWLOCK(52, SerialControl) +PG_LWLOCK(53, ExplainHash) diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 1233e07d7da..365c933ab00 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -278,6 +278,15 @@ extern PGDLLIMPORT int log_min_duration_statement; extern PGDLLIMPORT int log_temp_files; extern PGDLLIMPORT double log_statement_sample_rate; extern PGDLLIMPORT double log_xact_sample_rate; +extern PGDLLIMPORT int progressive_explain; +extern PGDLLIMPORT int progressive_explain_min_duration; +extern PGDLLIMPORT int progressive_explain_interval; +extern PGDLLIMPORT int progressive_explain_format; +extern PGDLLIMPORT bool progressive_explain_verbose; +extern PGDLLIMPORT bool progressive_explain_settings; +extern PGDLLIMPORT bool progressive_explain_timing; +extern PGDLLIMPORT bool progressive_explain_buffers; +extern PGDLLIMPORT bool progressive_explain_wal; extern PGDLLIMPORT char *backtrace_functions; extern PGDLLIMPORT int temp_file_limit; diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h index 7b19beafdc9..ea66a0505d9 100644 --- a/src/include/utils/timeout.h +++ b/src/include/utils/timeout.h @@ -36,6 +36,8 @@ typedef enum TimeoutId IDLE_STATS_UPDATE_TIMEOUT, CLIENT_CONNECTION_CHECK_TIMEOUT, STARTUP_PROGRESS_TIMEOUT, + PROGRESSIVE_EXPLAIN_STARTUP_TIMEOUT, + PROGRESSIVE_EXPLAIN_TIMEOUT, /* First user-definable timeout reason */ USER_TIMEOUT, /* Maximum number of timeout reasons */ diff --git a/src/test/modules/test_misc/t/008_progressive_explain.pl b/src/test/modules/test_misc/t/008_progressive_explain.pl new file mode 100644 index 00000000000..db6adfa89d8 --- /dev/null +++ b/src/test/modules/test_misc/t/008_progressive_explain.pl @@ -0,0 +1,130 @@ +# Copyright (c) 2023-2025, PostgreSQL Global Development Group +# +# Test progressive explain +# +# We need to make sure pg_stat_progress_explain does not show rows for the local +# session, even if progressive explain is enabled. For other sessions pg_stat_progress_explain +# should contain data for a PID only if progressive_explain is enabled and a query +# is running. Data needs to be removed when query finishes (or gets cancelled). + +use strict; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Initialize node +my $node = PostgreSQL::Test::Cluster->new('progressive_explain'); + +$node->init; +# Configure progressive explain to be logged immediatelly +$node->append_conf('postgresql.conf', 'progressive_explain_min_duration = 0'); +$node->start; + +# Test for local session +sub test_local_session +{ + my $setting = $_[0]; + # Make sure local session does not appear in pg_stat_progress_explain + my $count = $node->safe_psql( + 'postgres', qq[ + SET progressive_explain='$setting'; + SELECT count(*) from pg_stat_progress_explain WHERE pid = pg_backend_pid() + ]); + + ok($count == "0", + "Session cannot see its own explain with progressive_explain set to ${setting}"); +} + +# Tests for peer session +sub test_peer_session +{ + my $setting = $_[0]; + my $ret; + + # Start a background session and get its PID + my $background_psql = $node->background_psql( + 'postgres', + on_error_stop => 0); + + my $pid = $background_psql->query_safe( + qq[ + SELECT pg_backend_pid(); + ]); + + # Configure progressive explain in background session and run a simple query + # letting it finish + $background_psql->query_safe( + qq[ + SET progressive_explain='$setting'; + SELECT 1; + ]); + + # Check that pg_stat_progress_explain contains no row for the PID that finished + # its query gracefully + $ret = $node->safe_psql( + 'postgres', qq[ + SELECT count(*) FROM pg_stat_progress_explain where pid = $pid + ]); + + ok($ret == "0", + "pg_stat_progress_explain empty for completed query with progressive_explain set to ${setting}"); + + # Start query in background session and leave it running + $background_psql->query_until( + qr/start/, q( + \echo start + SELECT pg_sleep(600); + )); + + $ret = $node->safe_psql( + 'postgres', qq[ + SELECT count(*) FROM pg_stat_progress_explain where pid = $pid + ]); + + # If progressive_explain is disabled pg_stat_progress_explain should not contain + # row for PID + if ($setting eq 'off') { + ok($ret == "0", + "pg_stat_progress_explain empty for running query with progressive_explain set to ${setting}"); + } + # 1 row for pid is expected for running query + else { + ok($ret == "1", + "pg_stat_progress_explain with 1 row for running query with progressive_explain set to ${setting}"); + } + + # Terminate running query and make sure it is gone + $node->safe_psql( + 'postgres', qq[ + SELECT pg_cancel_backend($pid); + ]); + + $node->poll_query_until( + 'postgres', qq[ + SELECT count(*) = 0 FROM pg_stat_activity + WHERE pid = $pid and state = 'active' + ]); + + # Check again pg_stat_progress_explain and confirm that the existing row is + # now gone + $ret = $node->safe_psql( + 'postgres', qq[ + SELECT count(*) FROM pg_stat_progress_explain where pid = $pid + ]); + + ok($ret == "0", + "pg_stat_progress_explain empty for canceled query with progressive_explain set to ${setting}"); +} + +# Run tests for the local session +test_local_session('off'); +test_local_session('explain'); +test_local_session('analyze'); + +# Run tests for peer session +test_peer_session('off'); +test_peer_session('explain'); +test_peer_session('analyze'); + +$node->stop; +done_testing(); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 62f69ac20b2..4968db5607f 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2041,6 +2041,10 @@ pg_stat_progress_create_index| SELECT s.pid, s.param15 AS partitions_done FROM (pg_stat_get_progress_info('CREATE INDEX'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) LEFT JOIN pg_database d ON ((s.datid = d.oid))); +pg_stat_progress_explain| SELECT pid, + last_print, + query_plan + FROM pg_stat_progress_explain(true) pg_stat_progress_explain(pid, last_print, query_plan); pg_stat_progress_vacuum| SELECT s.pid, s.datid, d.datname, diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9840060997f..f1a74454187 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2267,6 +2267,7 @@ ProcessUtilityContext ProcessUtility_hook_type ProcessingMode ProgressCommandType +ProgressiveExplain ProjectSet ProjectSetPath ProjectSetState @@ -3870,6 +3871,9 @@ process_sublinks_context proclist_head proclist_mutable_iter proclist_node +progressiveExplainData +progressiveExplainHashEntry +progressiveExplainHashKey promptStatus_t pthread_barrier_t pthread_cond_t -- 2.43.0