From e173fac2bef56b165cac96398f8b38e91233c59f Mon Sep 17 00:00:00 2001 From: Vignesh C Date: Tue, 19 Jan 2021 16:44:37 +0530 Subject: [PATCH v2] Print backtrace of postgres process that are part of this instance. The idea here is to implement & expose pg_print_callstack function, internally what this function does is, the connected backend will send SIGUSR1 signal by setting PMSIGNAL_BACKTRACE_EMIT to the postmaster process. Postmaster process will send SIGUSR1 signal to process by setting PROCSIG_BACKTRACE_PRINT. Once the process receives this signal it will log the backtrace of the process. As syslogger process & Stats process don't have access to ProcSignal, it was discussed and concluded to skip these processes. --- doc/src/sgml/func.sgml | 23 +++++++++++ src/backend/postmaster/autovacuum.c | 4 ++ src/backend/postmaster/checkpointer.c | 5 +++ src/backend/postmaster/interrupt.c | 5 +++ src/backend/postmaster/postmaster.c | 26 ++++++++++++ src/backend/storage/ipc/procsignal.c | 50 +++++++++++++++++++++++ src/backend/storage/ipc/signalfuncs.c | 74 +++++++++++++++++++++++++++++++++++ src/backend/tcop/postgres.c | 38 ++++++++++++++++++ src/backend/utils/init/globals.c | 1 + src/bin/pg_ctl/t/005_backtrace.pl | 73 ++++++++++++++++++++++++++++++++++ src/include/catalog/pg_proc.dat | 8 ++++ src/include/miscadmin.h | 2 + src/include/storage/pmsignal.h | 2 + src/include/storage/procsignal.h | 5 +++ src/include/tcop/tcopprot.h | 1 + 15 files changed, 317 insertions(+) create mode 100644 src/bin/pg_ctl/t/005_backtrace.pl diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index fd0370a..8bc2b46 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -24668,6 +24668,29 @@ SELECT collation for ('foo' COLLATE "de_DE"); however only superusers can terminate superuser backends. + + + + + pg_print_callstack + + pg_print_callstack ( [pid integer] ) + boolean + + + Signals the server processes to print the callstack. + pg_print_callstack without an argument prints the + callstack of all the server processes in this instance. To request + callstack of a particular process from this instance, the corresponding + process id should be passed as an argument. Callstack will be printed + based on the log configuration set. See + for more information. This + feature is not supported for stats collector and syslogger processes. + Only superusers can use pg_print_callstack to print + the callstack. + + + diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 47e60ca..aa87eda 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -836,6 +836,10 @@ HandleAutoVacLauncherInterrupts(void) if (ProcSignalBarrierPending) ProcessProcSignalBarrier(); + /* Process printing back trace */ + if (PrintBacktracePending) + LogBackTrace(); + /* Process sinval catchup interrupts that happened while sleeping */ ProcessCatchupInterrupt(); } diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 54a818b..6655de3 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -57,6 +57,7 @@ #include "storage/shmem.h" #include "storage/smgr.h" #include "storage/spin.h" +#include "tcop/tcopprot.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/resowner.h" @@ -547,6 +548,10 @@ HandleCheckpointerInterrupts(void) if (ProcSignalBarrierPending) ProcessProcSignalBarrier(); + /* Process printing back trace */ + if (PrintBacktracePending) + LogBackTrace(); + if (ConfigReloadPending) { ConfigReloadPending = false; diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c index dd9136a..a6397ab 100644 --- a/src/backend/postmaster/interrupt.c +++ b/src/backend/postmaster/interrupt.c @@ -21,6 +21,7 @@ #include "storage/ipc.h" #include "storage/latch.h" #include "storage/procsignal.h" +#include "tcop/tcopprot.h" #include "utils/guc.h" volatile sig_atomic_t ConfigReloadPending = false; @@ -41,6 +42,10 @@ HandleMainLoopInterrupts(void) ProcessConfigFile(PGC_SIGHUP); } + /* Process printing back trace */ + if (PrintBacktracePending) + LogBackTrace(); + if (ShutdownRequestPending) proc_exit(0); } diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 7de27ee..32d3bec 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -5116,6 +5116,28 @@ ExitPostmaster(int status) proc_exit(status); } +bool +HandlePrintCallStack(int bt_pid) +{ + return EmitProcSignalPrintCallStack(bt_pid); +} + +/* + * IsCallStackSupportedPid -- check if callstack printing for this process is + * supported + * + * Call stack not supported for stats collector and syslogger process. + */ +bool +IsCallStackSupportedPid(int pid) +{ + if ((PgStatPID != 0 && pid == PgStatPID) || + (SysLoggerPID != 0 && pid == SysLoggerPID)) + return false; + + return true; +} + /* * sigusr1_handler - handle signal conditions from child processes */ @@ -5132,6 +5154,10 @@ sigusr1_handler(SIGNAL_ARGS) PG_SETMASK(&BlockSig); #endif + /* Process backtrace emit signal. */ + if (CheckPostmasterSignal(PMSIGNAL_BACKTRACE_EMIT)) + HandlePrintCallStack(0); + /* * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in * unexpected states. If the startup process quickly starts up, completes diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index 583efae..61ff8f5 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -302,6 +302,38 @@ SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId) } /* + * EmitProcSignalPrintCallStack + * + * Send SIGUSR1 to all postgres backends by setting PROCSIG_BACKTRACE_PRINT, the + * postgres processes will print the backtrace once the signal is received. + */ +bool +EmitProcSignalPrintCallStack(int bt_pid) +{ + bool found = false; + for (int i = NumProcSignalSlots - 1; i >= 0; i--) + { + volatile ProcSignalSlot *slot = &ProcSignal->psh_slot[i]; + pid_t pid = slot->pss_pid; + + if (pid != 0 && (pid == bt_pid || bt_pid == 0)) + { + found = true; + + /* see SendProcSignal for details */ + slot->pss_signalFlags[PROCSIG_BACKTRACE_PRINT] = true; + + /* Signal SIGUSR1 to the process, so that they print backtrace. */ + kill(pid, SIGUSR1); + if (bt_pid != 0) + return found; + } + } + + return found; +} + +/* * EmitProcSignalBarrier * Send a signal to every Postgres process * @@ -441,6 +473,21 @@ HandleProcSignalBarrierInterrupt(void) } /* + * Handle receipt of an print backtrace. + * + * Note: this is called within a signal handler! All we can do is set + * a flag that will cause the next CHECK_FOR_INTERRUPTS() to invoke + * LogBackTrace(). + */ +static void +HandlePrintBacktraceInterrupt(void) +{ + InterruptPending = true; + PrintBacktracePending = true; + /* latch will be set by procsignal_sigusr1_handler */ +} + +/* * Perform global barrier related interrupt checking. * * Any backend that participates in ProcSignal signaling must arrange to @@ -585,6 +632,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + if (CheckProcSignal(PROCSIG_BACKTRACE_PRINT)) + HandlePrintBacktraceInterrupt(); + SetLatch(MyLatch); latch_sigusr1_handler(); diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c index 69fe23a..abd5b1c 100644 --- a/src/backend/storage/ipc/signalfuncs.c +++ b/src/backend/storage/ipc/signalfuncs.c @@ -22,6 +22,7 @@ #include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "tcop/tcopprot.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -215,3 +216,76 @@ pg_rotate_logfile_v2(PG_FUNCTION_ARGS) SendPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE); PG_RETURN_BOOL(true); } + +/* + * pg_print_callstack - print callstack of process that are part of this + * instance. + * + * Permission checking for this function is managed through the normal + * GRANT system. + */ +Datum +pg_print_callstack(PG_FUNCTION_ARGS) +{ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to print backtraces"))); + +#ifdef HAVE_BACKTRACE_SYMBOLS + SendPostmasterSignal(PMSIGNAL_BACKTRACE_EMIT); +#else + { + ereport(WARNING, + (errmsg("backtrace generation is not supported by this installation"))); + PG_RETURN_BOOL(false); + } +#endif + PG_RETURN_BOOL(true); +} + +/* + * pg_print_callstack - print callstack of process that are part of this + * instance. + * + * Permission checking for this function is managed through the normal + * GRANT system. + */ +Datum +pg_print_callstack_pid(PG_FUNCTION_ARGS) +{ + int bt_pid = PG_ARGISNULL(0) ? -1 : PG_GETARG_INT32(0); + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to print backtraces"))); + + if (bt_pid <= 0 || bt_pid > INT_MAX) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value should between 1 and %d", + INT_MAX))); +#ifdef HAVE_BACKTRACE_SYMBOLS + { + if (!IsCallStackSupportedPid(bt_pid)) + { + ereport(WARNING, + (errmsg("backtrace generation is not supported for stats collector/syslogger process"))); + PG_RETURN_BOOL(false); + } + + if(HandlePrintCallStack(bt_pid)) + PG_RETURN_BOOL(true); + + ereport(WARNING, + (errmsg("specified pid is not part of this instance"))); + PG_RETURN_BOOL(false); + } +#else + { + ereport(WARNING, + (errmsg("backtrace generation is not supported by this installation"))); + PG_RETURN_BOOL(false); + } +#endif +} diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 8dab9fd..977600c 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -19,6 +19,7 @@ #include "postgres.h" +#include #include #include #include @@ -2921,6 +2922,39 @@ FloatExceptionHandler(SIGNAL_ARGS) } /* + * LogBackTrace + * + * Get the backtrace and log the backtrace to log file. + */ +void +LogBackTrace(void) +{ + int save_errno = errno; + + void *buf[100]; + int nframes; + char **strfrms; + StringInfoData errtrace; + + /* OK to process messages. Reset the flag saying there are more to do. */ + PrintBacktracePending = false; + + nframes = backtrace(buf, lengthof(buf)); + strfrms = backtrace_symbols(buf, nframes); + if (strfrms == NULL) + return; + + initStringInfo(&errtrace); + for (int i = 0; i < nframes; i++) + appendStringInfo(&errtrace, "\n%s", strfrms[i]); + free(strfrms); + + elog(LOG, "current backtrace:%s", errtrace.data); + + errno = save_errno; +} + +/* * RecoveryConflictInterrupt: out-of-line portion of recovery conflict * handling following receipt of SIGUSR1. Designed to be similar to die() * and StatementCancelHandler(). Called only by a normal user backend @@ -3274,6 +3308,10 @@ ProcessInterrupts(void) if (ParallelMessagePending) HandleParallelMessages(); + + /* Process printing back trace */ + if (PrintBacktracePending) + LogBackTrace(); } diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index ea28769..0cac8d2 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -37,6 +37,7 @@ volatile sig_atomic_t ProcSignalBarrierPending = false; volatile uint32 InterruptHoldoffCount = 0; volatile uint32 QueryCancelHoldoffCount = 0; volatile uint32 CritSectionCount = 0; +volatile sig_atomic_t PrintBacktracePending = false; int MyProcPid; pg_time_t MyStartTime; diff --git a/src/bin/pg_ctl/t/005_backtrace.pl b/src/bin/pg_ctl/t/005_backtrace.pl new file mode 100644 index 0000000..2346aa1 --- /dev/null +++ b/src/bin/pg_ctl/t/005_backtrace.pl @@ -0,0 +1,73 @@ +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 2; +use Time::HiRes qw(usleep); + +# Set up node with logging collector +my $node = get_new_node('primary'); +$node->init(); +$node->append_conf( + 'postgresql.conf', qq( +logging_collector = on +lc_messages = 'C' +)); + +$node->start(); + +# Verify that log output gets to the file +$node->psql('postgres', 'select pg_print_callstack()'); + +# might need to retry if logging collector process is slow... +my $max_attempts = 180 * 10; + +my $current_logfiles; +for (my $attempts = 0; $attempts < $max_attempts; $attempts++) +{ + eval { + $current_logfiles = slurp_file($node->data_dir . '/current_logfiles'); + }; + last unless $@; + usleep(100_000); +} +die $@ if $@; + +note "current_logfiles = $current_logfiles"; + +like( + $current_logfiles, + qr|^stderr log/postgresql-.*log$|, + 'current_logfiles is sane'); + +my $lfname = $current_logfiles; +$lfname =~ s/^stderr //; +chomp $lfname; + +my $first_logfile; +my $bt_occurence_count; + +# Verify that the backtraces of the processes are logged into logfile. +for (my $attempts = 0; $attempts < $max_attempts; $attempts++) +{ + $first_logfile = $node->data_dir . '/' . $lfname; + chomp $first_logfile; + print "file is $first_logfile"; + open my $fh, '<', $first_logfile + or die "Could not open '$first_logfile' $!"; + while (my $line = <$fh>) + { + chomp $line; + if ($line =~ m/current backtrace/) + { + $bt_occurence_count++; + } + } + last if $bt_occurence_count == 6; + usleep(100_000); +} + +is($bt_occurence_count, 6, 'found expected back trace in the log file'); + +$node->stop(); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index dd64c3b..9fa651b 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11369,4 +11369,12 @@ proname => 'is_normalized', prorettype => 'bool', proargtypes => 'text text', prosrc => 'unicode_is_normalized' }, +# function to get the callstack of server process +{ oid => '8908', descr => 'print callstack of process that are part of this instance', + proname => 'pg_print_callstack', provolatile => 'v', prorettype => 'bool', + proargtypes => '', prosrc => 'pg_print_callstack' }, +{ oid => '8909', descr => 'print callstack of process that are part of this instance', + proname => 'pg_print_callstack', provolatile => 'v', prorettype => 'bool', + proargtypes => 'int4', prosrc => 'pg_print_callstack_pid' }, + ] diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 1bdc97e..0d00ff9 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -92,6 +92,8 @@ extern PGDLLIMPORT volatile uint32 InterruptHoldoffCount; extern PGDLLIMPORT volatile uint32 QueryCancelHoldoffCount; extern PGDLLIMPORT volatile uint32 CritSectionCount; +extern PGDLLIMPORT volatile sig_atomic_t PrintBacktracePending; + /* in tcop/postgres.c */ extern void ProcessInterrupts(void); diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h index dbbed18..5d66c79 100644 --- a/src/include/storage/pmsignal.h +++ b/src/include/storage/pmsignal.h @@ -42,6 +42,8 @@ typedef enum PMSIGNAL_START_WALRECEIVER, /* start a walreceiver */ PMSIGNAL_ADVANCE_STATE_MACHINE, /* advance postmaster's state machine */ + PMSIGNAL_BACKTRACE_EMIT, /* send PROCSIG_BACKTRACE_PRINT to all backend */ + NUM_PMSIGNALS /* Must be last value of enum! */ } PMSignalReason; diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index 4ae7dc3..80cb281 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -43,6 +43,8 @@ typedef enum PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, + PROCSIG_BACKTRACE_PRINT, /* ask backend to print the current backtrace */ + NUM_PROCSIGNALS /* Must be last! */ } ProcSignalReason; @@ -71,5 +73,8 @@ extern void WaitForProcSignalBarrier(uint64 generation); extern void ProcessProcSignalBarrier(void); extern void procsignal_sigusr1_handler(SIGNAL_ARGS); +extern bool EmitProcSignalPrintCallStack(int bt_pid); +extern bool HandlePrintCallStack(int bt_pid); +extern bool IsCallStackSupportedPid(int pid); #endif /* PROCSIGNAL_H */ diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h index e547210..0f8b74d 100644 --- a/src/include/tcop/tcopprot.h +++ b/src/include/tcop/tcopprot.h @@ -71,6 +71,7 @@ extern void RecoveryConflictInterrupt(ProcSignalReason reason); /* called from S extern void ProcessClientReadInterrupt(bool blocked); extern void ProcessClientWriteInterrupt(bool blocked); +extern void LogBackTrace(void); /* Called from EmitProcSignalPrintCallStack */ extern void process_postgres_switches(int argc, char *argv[], GucContext ctx, const char **dbname); extern void PostgresMain(int argc, char *argv[], -- 1.8.3.1