From: Tom Lane Date: Mon, 14 Jun 2004 18:08:19 +0000 (+0000) Subject: Arrange to explicitly stop the pgstat processes at the same time we X-Git-Tag: REL9_0_0~12537 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=bbe42a1514f090084a2df55d3c51b5cab2d8e556;p=pg-rex%2Fsyncrep.git Arrange to explicitly stop the pgstat processes at the same time we begin the shutdown checkpoint; there isn't anything left for them to do, so we may as well ensure that they shut down sooner rather than later. Per discussion. --- diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 1a904cc8db..813f70d5db 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -13,7 +13,7 @@ * * Copyright (c) 2001-2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.74 2004/06/03 02:08:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.75 2004/06/14 18:08:18 tgl Exp $ * ---------- */ #include "postgres.h" @@ -66,12 +66,6 @@ bool pgstat_collect_tuplelevel = false; bool pgstat_collect_blocklevel = false; /* ---------- - * Other global variables - * ---------- - */ -bool pgstat_is_running = false; - -/* ---------- * Local data * ---------- */ @@ -79,7 +73,6 @@ NON_EXEC_STATIC int pgStatSock = -1; static int pgStatPipe[2]; static struct sockaddr_storage pgStatAddr; -static int pgStatPid; static time_t last_pgstat_start_time; static long pgStatNumMessages = 0; @@ -125,6 +118,7 @@ static void pgstat_parseArgs(int argc, char *argv[]); NON_EXEC_STATIC void PgstatBufferMain(int argc, char *argv[]); NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]); static void pgstat_recvbuffer(void); +static void pgstat_exit(SIGNAL_ARGS); static void pgstat_die(SIGNAL_ARGS); static int pgstat_add_backend(PgStat_MsgHdr *msg); @@ -496,19 +490,22 @@ pgstat_parseArgs(int argc, char *argv[]) * Called from postmaster at startup or after an existing collector * died. Attempt to fire up a fresh statistics collector. * + * Returns PID of child process, or 0 if fail. + * * Note: if fail, we will be called again from the postmaster main loop. * ---------- */ -void +int pgstat_start(void) { time_t curtime; + pid_t pgStatPid; /* * Do nothing if no collector needed */ - if (pgstat_is_running || !pgstat_collect_startcollector) - return; + if (!pgstat_collect_startcollector) + return 0; /* * Do nothing if too soon since last collector start. This is a @@ -520,7 +517,7 @@ pgstat_start(void) curtime = time(NULL); if ((unsigned int) (curtime - last_pgstat_start_time) < (unsigned int) PGSTAT_RESTART_INTERVAL) - return; + return 0; last_pgstat_start_time = curtime; /* @@ -536,12 +533,11 @@ pgstat_start(void) * pgstat_collect_startcollector on after it had been off. */ pgstat_collect_startcollector = false; - return; + return 0; } /* - * Okay, fork off the collector. Remember its PID for - * pgstat_ispgstat. + * Okay, fork off the collector. */ fflush(stdout); @@ -553,9 +549,9 @@ pgstat_start(void) #endif #ifdef EXEC_BACKEND - switch ((pgStatPid = (int) pgstat_forkexec(STAT_PROC_BUFFER))) + switch ((pgStatPid = pgstat_forkexec(STAT_PROC_BUFFER))) #else - switch ((pgStatPid = (int) fork())) + switch ((pgStatPid = fork())) #endif { case -1: @@ -565,7 +561,7 @@ pgstat_start(void) #endif ereport(LOG, (errmsg("could not fork statistics buffer: %m"))); - return; + return 0; #ifndef EXEC_BACKEND case 0: @@ -585,32 +581,11 @@ pgstat_start(void) #endif default: - pgstat_is_running = true; - return; + return (int) pgStatPid; } -} - -/* ---------- - * pgstat_ispgstat() - - * - * Called from postmaster to check if a terminated child process - * was the statistics collector. - * ---------- - */ -bool -pgstat_ispgstat(int pid) -{ - if (!pgstat_is_running) - return false; - - if (pgStatPid != pid) - return false; - - /* Oh dear ... */ - pgstat_is_running = false; - - return true; + /* shouldn't get here */ + return 0; } @@ -1381,12 +1356,12 @@ PgstatBufferMain(int argc, char *argv[]) /* * Ignore all signals usually bound to some action in the postmaster, - * except for SIGCHLD --- see pgstat_recvbuffer. + * except for SIGCHLD and SIGQUIT --- see pgstat_recvbuffer. */ pqsignal(SIGHUP, SIG_IGN); pqsignal(SIGINT, SIG_IGN); pqsignal(SIGTERM, SIG_IGN); - pqsignal(SIGQUIT, SIG_IGN); + pqsignal(SIGQUIT, pgstat_exit); pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); @@ -1476,9 +1451,9 @@ PgstatCollectorMain(int argc, char *argv[]) /* * Reset signal handling. With the exception of restoring default - * SIGCHLD handling, this is a no-op in the non-EXEC_BACKEND case - * because we'll have inherited these settings from the buffer process; - * but it's not a no-op for EXEC_BACKEND. + * SIGCHLD and SIGQUIT handling, this is a no-op in the non-EXEC_BACKEND + * case because we'll have inherited these settings from the buffer + * process; but it's not a no-op for EXEC_BACKEND. */ pqsignal(SIGHUP, SIG_IGN); pqsignal(SIGINT, SIG_IGN); @@ -1885,9 +1860,9 @@ pgstat_recvbuffer(void) } /* - * Wait for some work to do; but not for more than 10 seconds - * (this determines how quickly we will shut down after postmaster - * termination). + * Wait for some work to do; but not for more than 10 seconds. + * (This determines how quickly we will shut down after an + * ungraceful postmaster termination; so it needn't be very fast.) */ timeout.tv_sec = 10; timeout.tv_usec = 0; @@ -1992,19 +1967,33 @@ pgstat_recvbuffer(void) /* * Make sure we forwarded all messages before we check for - * Postmaster termination. + * postmaster termination. */ if (msg_have != 0 || FD_ISSET(pgStatSock, &rfds)) continue; /* - * If the postmaster has terminated, we've done our job. + * If the postmaster has terminated, we die too. (This is no longer + * the normal exit path, however.) */ if (!PostmasterIsAlive(true)) exit(0); } } +/* SIGQUIT signal handler for buffer process */ +static void +pgstat_exit(SIGNAL_ARGS) +{ + /* + * For now, we just nail the doors shut and get out of town. It might + * be cleaner to allow any pending messages to be sent, but that creates + * a tradeoff against speed of exit. + */ + exit(0); +} + +/* SIGCHLD signal handler for buffer process */ static void pgstat_die(SIGNAL_ARGS) { diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 2081eb3650..5a591f2f49 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.403 2004/06/11 03:54:43 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.404 2004/06/14 18:08:19 tgl Exp $ * * NOTES * @@ -191,7 +191,8 @@ char *preload_libraries_string = NULL; /* PIDs of special child processes; 0 when not running */ static pid_t StartupPID = 0, - BgWriterPID = 0; + BgWriterPID = 0, + PgStatPID = 0; /* Startup/shutdown state */ #define NoShutdown 0 @@ -812,10 +813,9 @@ PostmasterMain(int argc, char *argv[]) whereToSendOutput = None; /* - * Initialize and try to startup the statistics collector process + * Initialize the statistics collector stuff */ pgstat_init(); - pgstat_start(); /* * Load cached files for client authentication. @@ -1149,8 +1149,9 @@ ServerLoop(void) } /* If we have lost the stats collector, try to start a new one */ - if (!pgstat_is_running) - pgstat_start(); + if (PgStatPID == 0 && + StartupPID == 0 && !FatalError && Shutdown == NoShutdown) + PgStatPID = pgstat_start(); /* * Touch the socket and lock file at least every ten minutes, to ensure @@ -1509,14 +1510,6 @@ processCancelRequest(Port *port, void *pkt) backendPID = (int) ntohl(canc->backendPID); cancelAuthCode = (long) ntohl(canc->cancelAuthCode); - if (backendPID == BgWriterPID) - { - ereport(DEBUG2, - (errmsg_internal("ignoring cancel request for bgwriter process %d", - backendPID))); - return; - } - /* * See if we have a matching backend. In the EXEC_BACKEND case, we * can no longer access the postmaster's own backend list, and must @@ -1698,6 +1691,7 @@ SIGHUP_handler(SIGNAL_ARGS) SignalChildren(SIGHUP); if (BgWriterPID != 0) kill(BgWriterPID, SIGHUP); + /* PgStatPID does not currently need SIGHUP */ load_hba(); load_ident(); @@ -1755,6 +1749,9 @@ pmdie(SIGNAL_ARGS) /* And tell it to shut down */ if (BgWriterPID != 0) kill(BgWriterPID, SIGUSR2); + /* Tell pgstat to shut down too; nothing left for it to do */ + if (PgStatPID != 0) + kill(PgStatPID, SIGQUIT); break; case SIGINT: @@ -1796,6 +1793,9 @@ pmdie(SIGNAL_ARGS) /* And tell it to shut down */ if (BgWriterPID != 0) kill(BgWriterPID, SIGUSR2); + /* Tell pgstat to shut down too; nothing left for it to do */ + if (PgStatPID != 0) + kill(PgStatPID, SIGQUIT); break; case SIGQUIT: @@ -1811,6 +1811,8 @@ pmdie(SIGNAL_ARGS) kill(StartupPID, SIGQUIT); if (BgWriterPID != 0) kill(BgWriterPID, SIGQUIT); + if (PgStatPID != 0) + kill(PgStatPID, SIGQUIT); if (DLGetHead(BackendList)) SignalChildren(SIGQUIT); ExitPostmaster(0); @@ -1868,19 +1870,6 @@ reaper(SIGNAL_ARGS) #endif /* HAVE_WAITPID */ /* - * Check if this child was the statistics collector. If so, try to - * start a new one. (If fail, we'll try again in future cycles of - * the main loop.) - */ - if (pgstat_ispgstat(pid)) - { - LogChildExit(LOG, gettext("statistics collector process"), - pid, exitstatus); - pgstat_start(); - continue; - } - - /* * Check if this child was a startup process. */ if (StartupPID != 0 && pid == StartupPID) @@ -1909,9 +1898,12 @@ reaper(SIGNAL_ARGS) /* * Go to shutdown mode if a shutdown request was pending. + * Otherwise, try to start the stats collector too. */ if (Shutdown > NoShutdown && BgWriterPID != 0) kill(BgWriterPID, SIGUSR2); + else if (PgStatPID == 0 && Shutdown == NoShutdown) + PgStatPID = pgstat_start(); continue; } @@ -1921,6 +1913,7 @@ reaper(SIGNAL_ARGS) */ if (BgWriterPID != 0 && pid == BgWriterPID) { + BgWriterPID = 0; if (exitstatus == 0 && Shutdown > NoShutdown && !FatalError && !DLGetHead(BackendList)) { @@ -1936,13 +1929,29 @@ reaper(SIGNAL_ARGS) /* * Any unexpected exit of the bgwriter is treated as a crash. */ - LogChildExit(DEBUG2, gettext("background writer process"), + LogChildExit(LOG, gettext("background writer process"), pid, exitstatus); HandleChildCrash(pid, exitstatus); continue; } /* + * Was it the statistics collector? If so, just try to start a new + * one; no need to force reset of the rest of the system. (If fail, + * we'll try again in future cycles of the main loop.) + */ + if (PgStatPID != 0 && pid == PgStatPID) + { + PgStatPID = 0; + if (exitstatus != 0) + LogChildExit(LOG, gettext("statistics collector process"), + pid, exitstatus); + if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown) + PgStatPID = pgstat_start(); + continue; + } + + /* * Else do standard backend child cleanup. */ CleanupProc(pid, exitstatus); @@ -2113,6 +2122,17 @@ HandleChildCrash(int pid, kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT)); } + /* Force a power-cycle of the pgstat processes too */ + /* (Shouldn't be necessary, but just for luck) */ + if (PgStatPID != 0 && !FatalError) + { + ereport(DEBUG2, + (errmsg_internal("sending %s to process %d", + "SIGQUIT", + (int) PgStatPID))); + kill(PgStatPID, SIGQUIT); + } + FatalError = true; } @@ -2152,7 +2172,7 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus) } /* - * Send a signal to all backend children. + * Send a signal to all backend children (but NOT special children) */ static void SignalChildren(int signal) @@ -2954,7 +2974,7 @@ PostmasterRandom(void) } /* - * Count up number of child processes. + * Count up number of child processes (regular backends only) */ static int CountChildren(void) diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 96d3af7ab0..61cb86bb3c 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -5,7 +5,7 @@ * * Copyright (c) 2001-2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.23 2004/05/29 22:48:22 tgl Exp $ + * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.24 2004/06/14 18:08:19 tgl Exp $ * ---------- */ #ifndef PGSTAT_H @@ -335,20 +335,13 @@ extern bool pgstat_collect_querystring; extern bool pgstat_collect_tuplelevel; extern bool pgstat_collect_blocklevel; -/* ---------- - * Other global variables - * ---------- - */ -extern bool pgstat_is_running; - /* ---------- * Functions called from postmaster * ---------- */ extern void pgstat_init(void); -extern void pgstat_start(void); -extern bool pgstat_ispgstat(int pid); +extern int pgstat_start(void); extern void pgstat_beterm(int pid); #ifdef EXEC_BACKEND