From c34d0114833219ef7b1b2d4283522ce9d4e6ffe3 Mon Sep 17 00:00:00 2001 From: Vigneshwaran C Date: Tue, 9 Nov 2021 16:28:03 +0530 Subject: [PATCH v12 1/2] Print backtrace of specified postgres process. The idea here is to implement & expose pg_print_backtrace function, internally what this function does is, the connected backend will send SIGUSR1 signal by setting PROCSIG_PRINT_BACKTRACE to postgres backend whose pid matches the specified process id. Once the backend process receives this signal it will print the backtrace of the process to the log file based on the logging configuration, if logging is disabled backtrace will be printed to the console where postmaster was started. --- doc/src/sgml/func.sgml | 83 ++++++++++++++++++++ src/backend/catalog/system_functions.sql | 2 + src/backend/postmaster/autovacuum.c | 4 + src/backend/storage/ipc/procsignal.c | 41 ++++++++++ src/backend/storage/ipc/signalfuncs.c | 48 +++++++++++ src/backend/tcop/postgres.c | 4 + src/backend/utils/error/elog.c | 20 ++++- src/backend/utils/init/globals.c | 1 + src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.dat | 5 ++ src/include/miscadmin.h | 1 + src/include/storage/procsignal.h | 3 +- src/include/utils/elog.h | 2 + src/test/regress/expected/misc_functions.out | 42 ++++++++++ src/test/regress/sql/misc_functions.sql | 31 ++++++++ 15 files changed, 283 insertions(+), 6 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 24447c0017..8d147825eb 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -25345,6 +25345,32 @@ SELECT collation for ('foo' COLLATE "de_DE"); + + + + pg_print_backtrace + + pg_print_backtrace ( pid integer ) + boolean + + + Requests to log the backtrace of the + backend + with the specified process ID. The backtrace will be logged at message + level LOG. It will appear in the server log based on + the log configuration set (See + for more information), but will not be sent to the client regardless of + . A backtrace will identify + where exactly the backend process is currently executing. This may be + useful to developers to diagnose stuck processes and other problems. + This feature is not supported for the postmaster, logger, checkpointer, + walwriter, background writer or statistics collector process. This + feature will be available if PostgreSQL was built with the ability to + capture backtrace. If not available, the function will emit a warning + and return false. + + + @@ -25458,6 +25484,63 @@ LOG: Grand total: 1651920 bytes in 201 blocks; 622360 free (88 chunks); 1029560 because it may generate a large number of log messages. + + pg_print_backtrace can be used to log the backtrace of + a backend process. For example: + +postgres=# select pg_print_backtrace(pg_backend_pid()); + pg_print_backtrace +-------------------- + t +(1 row) + +The backtrace will be logged to the log file if logging is enabled, if logging +is disabled backtrace will be logged to the console where the postmaster was +started. For example: + +2021-01-27 11:33:50.247 IST [111735] LOG: current backtrace: + postgres: postgresdba postgres [local] SELECT(set_backtrace+0x38) [0xae06c5] + postgres: postgresdba postgres [local] SELECT(ProcessInterrupts+0x788) [0x950c34] + postgres: postgresdba postgres [local] SELECT() [0x761e89] + postgres: postgresdba postgres [local] SELECT() [0x71bbda] + postgres: postgresdba postgres [local] SELECT() [0x71e380] + postgres: postgresdba postgres [local] SELECT(standard_ExecutorRun+0x1d6) [0x71c1fe] + postgres: postgresdba postgres [local] SELECT(ExecutorRun+0x55) [0x71c026] + postgres: postgresdba postgres [local] SELECT() [0x953fc5] + postgres: postgresdba postgres [local] SELECT(PortalRun+0x262) [0x953c7e] + postgres: postgresdba postgres [local] SELECT() [0x94db78] + postgres: postgresdba postgres [local] SELECT(PostgresMain+0x7d7) [0x951e72] + postgres: postgresdba postgres [local] SELECT() [0x896b2f] + postgres: postgresdba postgres [local] SELECT() [0x8964b5] + postgres: postgresdba postgres [local] SELECT() [0x892a79] + postgres: postgresdba postgres [local] SELECT(PostmasterMain+0x116b) [0x892350] + postgres: postgresdba postgres [local] SELECT() [0x795f72] + /lib64/libc.so.6(__libc_start_main+0xf5) [0x7f2107bbd505] + postgres: postgresdba postgres [local] SELECT() [0x4842a9] + + You can get the file name and line number by using gdb/addr2line in + linux platforms, as a prerequisite users must ensure gdb/addr2line is + already installed: + +1) "info line *address" from gdb on postgres executable. For example: +gdb ./postgres +(gdb) info line *0x71c25d +Line 378 of "execMain.c" starts at address 0x71c25d <standard_ExecutorRun+470> and ends at 0x71c263 <standard_ExecutorRun+476>. +OR +2) Using "addr2line -e postgres address", For example: +addr2line -e ./postgres 0x71c25d +/home/postgresdba/src/backend/executor/execMain.c:378 + + If PostgreSQL was not built with the ability to capture backtrace. The + function will emit a warning and return false, for example: + +WARNING: backtrace generation is not supported by this installation + pg_print_backtrace +-------------------- + f + + + diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql index 54c93b16c4..072c5952fa 100644 --- a/src/backend/catalog/system_functions.sql +++ b/src/backend/catalog/system_functions.sql @@ -701,6 +701,8 @@ REVOKE EXECUTE ON FUNCTION pg_ls_dir(text,boolean,boolean) FROM public; REVOKE EXECUTE ON FUNCTION pg_log_backend_memory_contexts(integer) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION pg_print_backtrace(integer) FROM PUBLIC; + -- -- We also set up some things as accessible to standard roles. -- diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 96332320a7..f1da744d0f 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -840,6 +840,10 @@ HandleAutoVacLauncherInterrupts(void) if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + /* Process printing backtrace */ + if (PrintBacktracePending) + ProcessPrintBacktraceInterrupt(); + /* Process sinval catchup interrupts that happened while sleeping */ ProcessCatchupInterrupt(); } diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index 6e69398cdd..80fa6d4990 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -612,6 +612,44 @@ ProcessBarrierPlaceholder(void) return true; } +/* + * HandlePrintBacktraceInterrupt - Handle receipt of an interrupt indicating a + * print backtrace. + * + * All the actual work is deferred to ProcessPrintBacktraceInterrupt(), + * because we cannot safely emit a log message inside the signal handler. + */ +static void +HandlePrintBacktraceInterrupt(void) +{ + InterruptPending = true; + PrintBacktracePending = true; + /* latch will be set by procsignal_sigusr1_handler */ +} + +/* + * ProcessPrintBacktraceInterrupt - Perform logging of backtrace of this + * backend process. + * + * Any backend that participates in ProcSignal signaling must arrange + * to call this function if we see PrintBacktracePending set. + * It is called from CHECK_FOR_INTERRUPTS(), which is enough because + * the target process for logging of backtrace is a backend. + */ +void +ProcessPrintBacktraceInterrupt(void) +{ + PrintBacktracePending = false; + + /* + * Use LOG_SERVER_ONLY to prevent this message from being sent to the + * connected client. + */ + ereport(LOG_SERVER_ONLY, + (errmsg("logging backtrace of PID %d", MyProcPid))); + set_backtrace(NULL, 0); +} + /* * CheckProcSignal - check to see if a particular reason has been * signaled, and clear the signal flag. Should be called after receiving @@ -661,6 +699,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) if (CheckProcSignal(PROCSIG_LOG_MEMORY_CONTEXT)) HandleLogMemoryContextInterrupt(); + if (CheckProcSignal(PROCSIG_PRINT_BACKTRACE)) + HandlePrintBacktraceInterrupt(); + if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_DATABASE)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_DATABASE); diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c index de69d60e79..df942d6015 100644 --- a/src/backend/storage/ipc/signalfuncs.c +++ b/src/backend/storage/ipc/signalfuncs.c @@ -23,6 +23,7 @@ #include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "tcop/tcopprot.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -298,3 +299,50 @@ pg_rotate_logfile_v2(PG_FUNCTION_ARGS) SendPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE); PG_RETURN_BOOL(true); } + +/* + * pg_print_backtrace - print backtrace of backend process. + * + * By default, only superusers can print backtrace. Additional roles can be + * permitted with GRANT. + */ +Datum +pg_print_backtrace(PG_FUNCTION_ARGS) +{ +#ifdef HAVE_BACKTRACE_SYMBOLS + int pid = PG_GETARG_INT32(0); + PGPROC *proc; + + /* + * BackendPidGetProc returns NULL if the pid isn't valid; but by the time + * we reach kill(), a process for which we get a valid proc here might have + * terminated on its own. There's no way to acquire a lock on an arbitrary + * process to prevent that. But since this mechanism is usually used to + * debug a backend running and consuming lots of CPU cycles, that it might + * end on its own first and its backtrace are not logged is not a problem. + */ + proc = BackendPidGetProc(pid); + if (proc == NULL) + { + ereport(WARNING, + (errmsg("PID %d is not a PostgreSQL server process", pid))); + PG_RETURN_BOOL(false); + } + + /* + * Send SIGUSR1 to postgres backend whose pid matches pid by + * setting PROCSIG_PRINT_BACKTRACE, the backend process will print + * the backtrace once the signal is received. + */ + if (!SendProcSignal(pid, PROCSIG_PRINT_BACKTRACE, proc->backendId)) + PG_RETURN_BOOL(true); + else + ereport(WARNING, + (errmsg("could not send signal to process %d: %m", pid))); /* return false below */ +#else + ereport(WARNING, + (errmsg("backtrace generation is not supported by this installation"))); +#endif + + PG_RETURN_BOOL(false); +} diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 0775abe35d..624b3f8453 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -3366,6 +3366,10 @@ ProcessInterrupts(void) if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + + /* Process printing backtrace */ + if (PrintBacktracePending) + ProcessPrintBacktraceInterrupt(); } diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index f33729513a..05fe1bb5e7 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -172,7 +172,6 @@ static char formatted_log_time[FORMATTED_TS_LEN]; static const char *err_gettext(const char *str) pg_attribute_format_arg(1); -static pg_noinline void set_backtrace(ErrorData *edata, int num_skip); static void set_errdata_field(MemoryContextData *cxt, char **ptr, const char *str); static void write_console(const char *line, int len); static void setup_formatted_log_time(void); @@ -949,9 +948,10 @@ errbacktrace(void) * Compute backtrace data and add it to the supplied ErrorData. num_skip * specifies how many inner frames to skip. Use this to avoid showing the * internal backtrace support functions in the backtrace. This requires that - * this and related functions are not inlined. + * this and related functions are not inlined. If edata pointer is valid + * backtrace information will be set in edata. */ -static void +void set_backtrace(ErrorData *edata, int num_skip) { StringInfoData errtrace; @@ -978,7 +978,19 @@ set_backtrace(ErrorData *edata, int num_skip) "backtrace generation is not supported by this installation"); #endif - edata->backtrace = errtrace.data; + if (edata) + edata->backtrace = errtrace.data; + else + { + /* + * LOG_SERVER_ONLY is used intentionally to make sure this information + * is not sent to client based on client_min_messages. We don't want + * to mess up a different session as pg_print_backtrace will be + * sending SIGNAL to a different backend. + */ + elog(LOG_SERVER_ONLY, "current backtrace:%s", errtrace.data); + pfree(errtrace.data); + } } /* diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 381d9e548d..e49f5331a0 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -36,6 +36,7 @@ volatile sig_atomic_t IdleInTransactionSessionTimeoutPending = false; volatile sig_atomic_t IdleSessionTimeoutPending = false; volatile sig_atomic_t ProcSignalBarrierPending = false; volatile sig_atomic_t LogMemoryContextPending = false; +volatile sig_atomic_t PrintBacktracePending = false; volatile uint32 InterruptHoldoffCount = 0; volatile uint32 QueryCancelHoldoffCount = 0; volatile uint32 CritSectionCount = 0; diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 49e8e59129..9f3e7b7503 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202111091 +#define CATALOG_VERSION_NO 202111141 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index d068d6532e..47872558b8 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11689,4 +11689,9 @@ prorettype => 'bytea', proargtypes => 'pg_brin_minmax_multi_summary', prosrc => 'brin_minmax_multi_summary_send' }, +# function to get the backtrace of server process +{ oid => '6105', descr => 'print backtrace of process', + proname => 'pg_print_backtrace', provolatile => 'v', prorettype => 'bool', + proargtypes => 'int4', prosrc => 'pg_print_backtrace' }, + ] diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 90a3016065..0da18326f5 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -94,6 +94,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending; extern PGDLLIMPORT volatile sig_atomic_t LogMemoryContextPending; +extern PGDLLIMPORT volatile sig_atomic_t PrintBacktracePending; extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending; extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost; diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index eec186be2e..731c4fa970 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -35,6 +35,7 @@ typedef enum PROCSIG_WALSND_INIT_STOPPING, /* ask walsenders to prepare for shutdown */ PROCSIG_BARRIER, /* global barrier interrupt */ PROCSIG_LOG_MEMORY_CONTEXT, /* ask backend to log the memory contexts */ + PROCSIG_PRINT_BACKTRACE, /* ask backend to print the current backtrace */ /* Recovery conflict reasons */ PROCSIG_RECOVERY_CONFLICT_DATABASE, @@ -70,7 +71,7 @@ extern int SendProcSignal(pid_t pid, ProcSignalReason reason, extern uint64 EmitProcSignalBarrier(ProcSignalBarrierType type); extern void WaitForProcSignalBarrier(uint64 generation); extern void ProcessProcSignalBarrier(void); - +extern void ProcessPrintBacktraceInterrupt(void); extern void procsignal_sigusr1_handler(SIGNAL_ARGS); #endif /* PROCSIGNAL_H */ diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h index f53607e12e..c63d25716a 100644 --- a/src/include/utils/elog.h +++ b/src/include/utils/elog.h @@ -453,4 +453,6 @@ extern void set_syslog_parameters(const char *ident, int facility); */ extern void write_stderr(const char *fmt,...) pg_attribute_printf(1, 2); +extern pg_noinline void set_backtrace(ErrorData *edata, int num_skip); + #endif /* ELOG_H */ diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out index 71d316cad3..c75cef00e7 100644 --- a/src/test/regress/expected/misc_functions.out +++ b/src/test/regress/expected/misc_functions.out @@ -176,6 +176,48 @@ REVOKE EXECUTE ON FUNCTION pg_log_backend_memory_contexts(integer) FROM regress_log_memory; DROP ROLE regress_log_memory; -- +-- pg_print_backtrace() +-- +-- Backtrace are logged and they are not returned to the function. +-- Furthermore, their contents can vary depending on the timing. However, +-- we can at least verify that the code doesn't fail, and that the +-- permissions are set properly. +-- +SELECT pg_print_backtrace(pg_backend_pid()); + pg_print_backtrace +-------------------- + t +(1 row) + +CREATE ROLE regress_print_backtrace; +SELECT has_function_privilege('regress_print_backtrace', + 'pg_print_backtrace(integer)', 'EXECUTE'); -- no + has_function_privilege +------------------------ + f +(1 row) + +GRANT EXECUTE ON FUNCTION pg_print_backtrace(integer) + TO regress_print_backtrace; +SELECT has_function_privilege('regress_print_backtrace', + 'pg_print_backtrace(integer)', 'EXECUTE'); -- yes + has_function_privilege +------------------------ + t +(1 row) + +SET ROLE regress_print_backtrace; +SELECT pg_print_backtrace(pg_backend_pid()); + pg_print_backtrace +-------------------- + t +(1 row) + +RESET ROLE; +REVOKE EXECUTE ON FUNCTION pg_print_backtrace(integer) + FROM regress_print_backtrace; +DROP ROLE regress_print_backtrace; +-- -- Test some built-in SRFs -- -- The outputs of these are variable, so we can't just print their results diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql index 8c23874b3f..d63d2e2ddb 100644 --- a/src/test/regress/sql/misc_functions.sql +++ b/src/test/regress/sql/misc_functions.sql @@ -61,6 +61,37 @@ REVOKE EXECUTE ON FUNCTION pg_log_backend_memory_contexts(integer) DROP ROLE regress_log_memory; +-- +-- pg_print_backtrace() +-- +-- Backtrace are logged and they are not returned to the function. +-- Furthermore, their contents can vary depending on the timing. However, +-- we can at least verify that the code doesn't fail, and that the +-- permissions are set properly. +-- + +SELECT pg_print_backtrace(pg_backend_pid()); + +CREATE ROLE regress_print_backtrace; + +SELECT has_function_privilege('regress_print_backtrace', + 'pg_print_backtrace(integer)', 'EXECUTE'); -- no + +GRANT EXECUTE ON FUNCTION pg_print_backtrace(integer) + TO regress_print_backtrace; + +SELECT has_function_privilege('regress_print_backtrace', + 'pg_print_backtrace(integer)', 'EXECUTE'); -- yes + +SET ROLE regress_print_backtrace; +SELECT pg_print_backtrace(pg_backend_pid()); +RESET ROLE; + +REVOKE EXECUTE ON FUNCTION pg_print_backtrace(integer) + FROM regress_print_backtrace; + +DROP ROLE regress_print_backtrace; + -- -- Test some built-in SRFs -- -- 2.30.2