OSDN Git Service

Arrange to explicitly stop the pgstat processes at the same time we
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 14 Jun 2004 18:08:19 +0000 (18:08 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 14 Jun 2004 18:08:19 +0000 (18:08 +0000)
begin the shutdown checkpoint; there isn't anything left for them to do,
so we may as well ensure that they shut down sooner rather than later.
Per discussion.

src/backend/postmaster/pgstat.c
src/backend/postmaster/postmaster.c
src/include/pgstat.h

index 1a904cc..813f70d 100644 (file)
@@ -13,7 +13,7 @@
  *
  *     Copyright (c) 2001-2003, PostgreSQL Global Development Group
  *
- *     $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.74 2004/06/03 02:08:03 tgl Exp $
+ *     $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.75 2004/06/14 18:08:18 tgl Exp $
  * ----------
  */
 #include "postgres.h"
@@ -66,12 +66,6 @@ bool         pgstat_collect_tuplelevel = false;
 bool           pgstat_collect_blocklevel = false;
 
 /* ----------
- * Other global variables
- * ----------
- */
-bool           pgstat_is_running = false;
-
-/* ----------
  * Local data
  * ----------
  */
@@ -79,7 +73,6 @@ NON_EXEC_STATIC int   pgStatSock = -1;
 static int     pgStatPipe[2];
 static struct sockaddr_storage pgStatAddr;
 
-static int     pgStatPid;
 static time_t last_pgstat_start_time;
 
 static long pgStatNumMessages = 0;
@@ -125,6 +118,7 @@ static void pgstat_parseArgs(int argc, char *argv[]);
 NON_EXEC_STATIC void PgstatBufferMain(int argc, char *argv[]);
 NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]);
 static void pgstat_recvbuffer(void);
+static void pgstat_exit(SIGNAL_ARGS);
 static void pgstat_die(SIGNAL_ARGS);
 
 static int     pgstat_add_backend(PgStat_MsgHdr *msg);
@@ -496,19 +490,22 @@ pgstat_parseArgs(int argc, char *argv[])
  *     Called from postmaster at startup or after an existing collector
  *     died.  Attempt to fire up a fresh statistics collector.
  *
+ *     Returns PID of child process, or 0 if fail.
+ *
  *     Note: if fail, we will be called again from the postmaster main loop.
  * ----------
  */
-void
+int
 pgstat_start(void)
 {
        time_t          curtime;
+       pid_t           pgStatPid;
 
        /*
         * Do nothing if no collector needed
         */
-       if (pgstat_is_running || !pgstat_collect_startcollector)
-               return;
+       if (!pgstat_collect_startcollector)
+               return 0;
 
        /*
         * Do nothing if too soon since last collector start.  This is a
@@ -520,7 +517,7 @@ pgstat_start(void)
        curtime = time(NULL);
        if ((unsigned int) (curtime - last_pgstat_start_time) <
                (unsigned int) PGSTAT_RESTART_INTERVAL)
-               return;
+               return 0;
        last_pgstat_start_time = curtime;
 
        /*
@@ -536,12 +533,11 @@ pgstat_start(void)
                 * pgstat_collect_startcollector on after it had been off.
                 */
                pgstat_collect_startcollector = false;
-               return;
+               return 0;
        }
 
        /*
-        * Okay, fork off the collector.  Remember its PID for
-        * pgstat_ispgstat.
+        * Okay, fork off the collector.
         */
 
        fflush(stdout);
@@ -553,9 +549,9 @@ pgstat_start(void)
 #endif
 
 #ifdef EXEC_BACKEND
-       switch ((pgStatPid = (int) pgstat_forkexec(STAT_PROC_BUFFER)))
+       switch ((pgStatPid = pgstat_forkexec(STAT_PROC_BUFFER)))
 #else
-       switch ((pgStatPid = (int) fork()))
+       switch ((pgStatPid = fork()))
 #endif
        {
                case -1:
@@ -565,7 +561,7 @@ pgstat_start(void)
 #endif
                        ereport(LOG,
                                        (errmsg("could not fork statistics buffer: %m")));
-                       return;
+                       return 0;
 
 #ifndef EXEC_BACKEND
                case 0:
@@ -585,32 +581,11 @@ pgstat_start(void)
 #endif
 
                default:
-                       pgstat_is_running = true;
-                       return;
+                       return (int) pgStatPid;
        }
-}
 
-
-/* ----------
- * pgstat_ispgstat() -
- *
- *     Called from postmaster to check if a terminated child process
- *     was the statistics collector.
- * ----------
- */
-bool
-pgstat_ispgstat(int pid)
-{
-       if (!pgstat_is_running)
-               return false;
-
-       if (pgStatPid != pid)
-               return false;
-
-       /* Oh dear ... */
-       pgstat_is_running = false;
-
-       return true;
+       /* shouldn't get here */
+       return 0;
 }
 
 
@@ -1381,12 +1356,12 @@ PgstatBufferMain(int argc, char *argv[])
 
        /*
         * Ignore all signals usually bound to some action in the postmaster,
-        * except for SIGCHLD --- see pgstat_recvbuffer.
+        * except for SIGCHLD and SIGQUIT --- see pgstat_recvbuffer.
         */
        pqsignal(SIGHUP, SIG_IGN);
        pqsignal(SIGINT, SIG_IGN);
        pqsignal(SIGTERM, SIG_IGN);
-       pqsignal(SIGQUIT, SIG_IGN);
+       pqsignal(SIGQUIT, pgstat_exit);
        pqsignal(SIGALRM, SIG_IGN);
        pqsignal(SIGPIPE, SIG_IGN);
        pqsignal(SIGUSR1, SIG_IGN);
@@ -1476,9 +1451,9 @@ PgstatCollectorMain(int argc, char *argv[])
 
        /*
         * Reset signal handling.  With the exception of restoring default
-        * SIGCHLD handling, this is a no-op in the non-EXEC_BACKEND case
-        * because we'll have inherited these settings from the buffer process;
-        * but it's not a no-op for EXEC_BACKEND.
+        * SIGCHLD and SIGQUIT handling, this is a no-op in the non-EXEC_BACKEND
+        * case because we'll have inherited these settings from the buffer
+        * process; but it's not a no-op for EXEC_BACKEND.
         */
        pqsignal(SIGHUP, SIG_IGN);
        pqsignal(SIGINT, SIG_IGN);
@@ -1885,9 +1860,9 @@ pgstat_recvbuffer(void)
                }
 
                /*
-                * Wait for some work to do; but not for more than 10 seconds
-                * (this determines how quickly we will shut down after postmaster
-                * termination).
+                * Wait for some work to do; but not for more than 10 seconds.
+                * (This determines how quickly we will shut down after an
+                * ungraceful postmaster termination; so it needn't be very fast.)
                 */
                timeout.tv_sec = 10;
                timeout.tv_usec = 0;
@@ -1992,19 +1967,33 @@ pgstat_recvbuffer(void)
 
                /*
                 * Make sure we forwarded all messages before we check for
-                * Postmaster termination.
+                * postmaster termination.
                 */
                if (msg_have != 0 || FD_ISSET(pgStatSock, &rfds))
                        continue;
 
                /*
-                * If the postmaster has terminated, we've done our job.
+                * If the postmaster has terminated, we die too.  (This is no longer
+                * the normal exit path, however.)
                 */
                if (!PostmasterIsAlive(true))
                        exit(0);
        }
 }
 
+/* SIGQUIT signal handler for buffer process */
+static void
+pgstat_exit(SIGNAL_ARGS)
+{
+       /*
+        * For now, we just nail the doors shut and get out of town.  It might
+        * be cleaner to allow any pending messages to be sent, but that creates
+        * a tradeoff against speed of exit.
+        */
+       exit(0);
+}
+
+/* SIGCHLD signal handler for buffer process */
 static void
 pgstat_die(SIGNAL_ARGS)
 {
index 2081eb3..5a591f2 100644 (file)
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.403 2004/06/11 03:54:43 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.404 2004/06/14 18:08:19 tgl Exp $
  *
  * NOTES
  *
@@ -191,7 +191,8 @@ char           *preload_libraries_string = NULL;
 
 /* PIDs of special child processes; 0 when not running */
 static pid_t StartupPID = 0,
-                       BgWriterPID = 0;
+                       BgWriterPID = 0,
+                       PgStatPID = 0;
 
 /* Startup/shutdown state */
 #define                        NoShutdown              0
@@ -812,10 +813,9 @@ PostmasterMain(int argc, char *argv[])
        whereToSendOutput = None;
 
        /*
-        * Initialize and try to startup the statistics collector process
+        * Initialize the statistics collector stuff
         */
        pgstat_init();
-       pgstat_start();
 
        /*
         * Load cached files for client authentication.
@@ -1149,8 +1149,9 @@ ServerLoop(void)
                }
 
                /* If we have lost the stats collector, try to start a new one */
-               if (!pgstat_is_running)
-                       pgstat_start();
+               if (PgStatPID == 0 &&
+                       StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
+                       PgStatPID = pgstat_start();
 
                /*
                 * Touch the socket and lock file at least every ten minutes, to ensure
@@ -1509,14 +1510,6 @@ processCancelRequest(Port *port, void *pkt)
        backendPID = (int) ntohl(canc->backendPID);
        cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
 
-       if (backendPID == BgWriterPID)
-       {
-               ereport(DEBUG2,
-                               (errmsg_internal("ignoring cancel request for bgwriter process %d",
-                                                                backendPID)));
-               return;
-       }
-
        /*
         * See if we have a matching backend.  In the EXEC_BACKEND case, we
         * can no longer access the postmaster's own backend list, and must
@@ -1698,6 +1691,7 @@ SIGHUP_handler(SIGNAL_ARGS)
                SignalChildren(SIGHUP);
                if (BgWriterPID != 0)
                        kill(BgWriterPID, SIGHUP);
+               /* PgStatPID does not currently need SIGHUP */
                load_hba();
                load_ident();
 
@@ -1755,6 +1749,9 @@ pmdie(SIGNAL_ARGS)
                        /* And tell it to shut down */
                        if (BgWriterPID != 0)
                                kill(BgWriterPID, SIGUSR2);
+                       /* Tell pgstat to shut down too; nothing left for it to do */
+                       if (PgStatPID != 0)
+                               kill(PgStatPID, SIGQUIT);
                        break;
 
                case SIGINT:
@@ -1796,6 +1793,9 @@ pmdie(SIGNAL_ARGS)
                        /* And tell it to shut down */
                        if (BgWriterPID != 0)
                                kill(BgWriterPID, SIGUSR2);
+                       /* Tell pgstat to shut down too; nothing left for it to do */
+                       if (PgStatPID != 0)
+                               kill(PgStatPID, SIGQUIT);
                        break;
 
                case SIGQUIT:
@@ -1811,6 +1811,8 @@ pmdie(SIGNAL_ARGS)
                                kill(StartupPID, SIGQUIT);
                        if (BgWriterPID != 0)
                                kill(BgWriterPID, SIGQUIT);
+                       if (PgStatPID != 0)
+                               kill(PgStatPID, SIGQUIT);
                        if (DLGetHead(BackendList))
                                SignalChildren(SIGQUIT);
                        ExitPostmaster(0);
@@ -1868,19 +1870,6 @@ reaper(SIGNAL_ARGS)
 #endif /* HAVE_WAITPID */
 
                /*
-                * Check if this child was the statistics collector. If so, try to
-                * start a new one.  (If fail, we'll try again in future cycles of
-                * the main loop.)
-                */
-               if (pgstat_ispgstat(pid))
-               {
-                       LogChildExit(LOG, gettext("statistics collector process"),
-                                                pid, exitstatus);
-                       pgstat_start();
-                       continue;
-               }
-
-               /*
                 * Check if this child was a startup process.
                 */
                if (StartupPID != 0 && pid == StartupPID)
@@ -1909,9 +1898,12 @@ reaper(SIGNAL_ARGS)
 
                        /*
                         * Go to shutdown mode if a shutdown request was pending.
+                        * Otherwise, try to start the stats collector too.
                         */
                        if (Shutdown > NoShutdown && BgWriterPID != 0)
                                kill(BgWriterPID, SIGUSR2);
+                       else if (PgStatPID == 0 && Shutdown == NoShutdown)
+                               PgStatPID = pgstat_start();
 
                        continue;
                }
@@ -1921,6 +1913,7 @@ reaper(SIGNAL_ARGS)
                 */
                if (BgWriterPID != 0 && pid == BgWriterPID)
                {
+                       BgWriterPID = 0;
                        if (exitstatus == 0 && Shutdown > NoShutdown &&
                                !FatalError && !DLGetHead(BackendList))
                        {
@@ -1936,13 +1929,29 @@ reaper(SIGNAL_ARGS)
                        /*
                         * Any unexpected exit of the bgwriter is treated as a crash.
                         */
-                       LogChildExit(DEBUG2, gettext("background writer process"),
+                       LogChildExit(LOG, gettext("background writer process"),
                                                 pid, exitstatus);
                        HandleChildCrash(pid, exitstatus);
                        continue;
                }
 
                /*
+                * Was it the statistics collector?  If so, just try to start a new
+                * one; no need to force reset of the rest of the system.  (If fail,
+                * we'll try again in future cycles of the main loop.)
+                */
+               if (PgStatPID != 0 && pid == PgStatPID)
+               {
+                       PgStatPID = 0;
+                       if (exitstatus != 0)
+                               LogChildExit(LOG, gettext("statistics collector process"),
+                                                        pid, exitstatus);
+                       if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
+                               PgStatPID = pgstat_start();
+                       continue;
+               }
+
+               /*
                 * Else do standard backend child cleanup.
                 */
                CleanupProc(pid, exitstatus);
@@ -2113,6 +2122,17 @@ HandleChildCrash(int pid,
                kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
        }
 
+       /* Force a power-cycle of the pgstat processes too */
+       /* (Shouldn't be necessary, but just for luck) */
+       if (PgStatPID != 0 && !FatalError)
+       {
+               ereport(DEBUG2,
+                               (errmsg_internal("sending %s to process %d",
+                                                                "SIGQUIT",
+                                                                (int) PgStatPID)));
+               kill(PgStatPID, SIGQUIT);
+       }
+
        FatalError = true;
 }
 
@@ -2152,7 +2172,7 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
 }
 
 /*
- * Send a signal to all backend children.
+ * Send a signal to all backend children (but NOT special children)
  */
 static void
 SignalChildren(int signal)
@@ -2954,7 +2974,7 @@ PostmasterRandom(void)
 }
 
 /*
- * Count up number of child processes.
+ * Count up number of child processes (regular backends only)
  */
 static int
 CountChildren(void)
index 96d3af7..61cb86b 100644 (file)
@@ -5,7 +5,7 @@
  *
  *     Copyright (c) 2001-2003, PostgreSQL Global Development Group
  *
- *     $PostgreSQL: pgsql/src/include/pgstat.h,v 1.23 2004/05/29 22:48:22 tgl Exp $
+ *     $PostgreSQL: pgsql/src/include/pgstat.h,v 1.24 2004/06/14 18:08:19 tgl Exp $
  * ----------
  */
 #ifndef PGSTAT_H
@@ -335,20 +335,13 @@ extern bool pgstat_collect_querystring;
 extern bool pgstat_collect_tuplelevel;
 extern bool pgstat_collect_blocklevel;
 
-/* ----------
- * Other global variables
- * ----------
- */
-extern bool pgstat_is_running;
-
 
 /* ----------
  * Functions called from postmaster
  * ----------
  */
 extern void pgstat_init(void);
-extern void pgstat_start(void);
-extern bool pgstat_ispgstat(int pid);
+extern int     pgstat_start(void);
 extern void pgstat_beterm(int pid);
 
 #ifdef EXEC_BACKEND