OSDN Git Service

Clean up some awkward, inaccurate, and inefficient processing around
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 2 May 2010 02:10:33 +0000 (02:10 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 2 May 2010 02:10:33 +0000 (02:10 +0000)
MaxStandbyDelay.  Use the GUC units mechanism for the value, and choose more
appropriate timestamp functions for performing tests with it.  Make the
ps_activity manipulation in ResolveRecoveryConflictWithVirtualXIDs have
behavior similar to ps_activity code elsewhere, notably not updating the
display when update_process_title is off and not truncating the display
contents at an arbitrarily-chosen length.  Improve the docs to be explicit
about what MaxStandbyDelay actually measures, viz the difference between
primary and standby servers' clocks, and the possible hazards if their clocks
aren't in sync.

doc/src/sgml/config.sgml
doc/src/sgml/high-availability.sgml
src/backend/access/transam/xlog.c
src/backend/storage/ipc/standby.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample

index c84529f..f9c13e7 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.272 2010/04/29 21:36:18 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.273 2010/05/02 02:10:32 tgl Exp $ -->
 
 <chapter Id="runtime-config">
   <title>Server Configuration</title>
@@ -1947,13 +1947,12 @@ SET ENABLE_SEQSCAN TO OFF;
        <para>
         When Hot Standby is active, this parameter specifies a wait policy
         for applying WAL entries that conflict with active queries.
-        If a conflict should occur the server will delay up to this number
-        of seconds before it cancels conflicting queries, as
+        If a conflict should occur the server will delay up to this long
+        before it cancels conflicting queries, as
         described in <xref linkend="hot-standby-conflict">.
-        Typically, this parameter is used only during replication.
-        The value is specified in seconds, and -1 causes the standby to wait
-        forever for a conflicting query to complete.
         The default is 30 seconds.
+        A value of -1 causes the standby to wait forever for a conflicting
+        query to complete.
         This parameter can only be set in the <filename>postgresql.conf</>
         file or on the server command line.
        </para>
@@ -1964,7 +1963,7 @@ SET ENABLE_SEQSCAN TO OFF;
       </para>
       <para>
        While it is tempting to believe that <varname>max_standby_delay</>
-       is the maximum number of seconds a query can run before
+       is the maximum length of time a query can run before
        cancellation is possible, this is not true.  When a long-running
        query ends, there is a finite time required to apply backlogged
        WAL logs.  If a second long-running query appears before the
index b2f1a58..ef4c1bb 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/high-availability.sgml,v 1.65 2010/04/29 21:36:18 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/high-availability.sgml,v 1.66 2010/05/02 02:10:32 tgl Exp $ -->
 
 <chapter id="high-availability">
  <title>High Availability, Load Balancing, and Replication</title>
@@ -1480,7 +1480,8 @@ if (!triggered)
     There are a number of choices for resolving query conflicts.  The default
     is to wait and hope the query finishes. The server will wait
     automatically until the lag between primary and standby is at most
-    <varname>max_standby_delay</> seconds. Once that grace period expires,
+    <xref linkend="guc-max-standby-delay"> (30 seconds by default).
+    Once that grace period expires,
     one of the following actions is taken:
 
       <itemizedlist>
@@ -1514,15 +1515,28 @@ if (!triggered)
    </para>
 
    <para>
-    <varname>max_standby_delay</> is set in <filename>postgresql.conf</>.
-    The parameter applies to the server as a whole, so if the delay is consumed
-    by a single query then there may be little or no waiting for queries that
-    follow, though they will have benefited equally from the initial
-    waiting period. The server may take time to catch up again before the grace
-    period is available again, though if there is a heavy and constant stream
-    of conflicts it may seldom catch up fully.
+    Keep in mind that <varname>max_standby_delay</> is compared to the
+    difference between the standby server's clock and the transaction
+    commit timestamps read from the WAL log.  Thus, the grace period
+    allowed to any one query on the standby is never more than
+    <varname>max_standby_delay</>, and could be considerably less if the
+    standby has already fallen behind as a result of waiting for previous
+    queries to complete, or as a result of being unable to keep up with a
+    heavy update load.
    </para>
 
+   <caution>
+    <para>
+     Be sure that the primary and standby servers' clocks are kept in sync;
+     otherwise the values compared to <varname>max_standby_delay</> will be
+     erroneous, possibly leading to undesirable query cancellations.
+     If the clocks are intentionally not in sync, or if there is a large
+     propagation delay from primary to standby, it is advisable to set
+     <varname>max_standby_delay</> to -1.  In any case the value should be
+     larger than the largest expected clock skew between primary and standby.
+    </para>
+   </caution>
+
    <para>
     Users should be clear that tables that are regularly and heavily updated on the
     primary server will quickly cause cancellation of longer running queries on
@@ -1656,7 +1670,7 @@ LOG:  database system is ready to accept read only connections
     <varname>max_standby_delay</> or even set it to zero, though that is a
     very aggressive setting. If the standby server is tasked as an additional
     server for decision support queries then it might be acceptable to set this
-    to a value of many hours (in seconds).  It is also possible to set
+    to a value of many hours.  It is also possible to set
     <varname>max_standby_delay</> to -1 which means wait forever for queries
     to complete; this will be useful when performing
     an archive recovery from a backup.
index 61dfb20..99e53b7 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.407 2010/04/29 21:49:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.408 2010/05/02 02:10:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -72,7 +72,7 @@ int                   XLogArchiveTimeout = 0;
 bool           XLogArchiveMode = false;
 char      *XLogArchiveCommand = NULL;
 bool           EnableHotStandby = false;
-int                    MaxStandbyDelay = 30;
+int                    MaxStandbyDelay = 30 * 1000;
 bool           fullPageWrites = true;
 bool           log_checkpoints = false;
 int                    sync_method = DEFAULT_SYNC_METHOD;
index 3072332..6bd1568 100644 (file)
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.20 2010/04/28 16:10:42 heikki Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.21 2010/05/02 02:10:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -124,31 +124,24 @@ static int        standbyWait_us = STANDBY_INITIAL_WAIT_US;
 static bool
 WaitExceedsMaxStandbyDelay(void)
 {
-       long            delay_secs;
-       int                     delay_usecs;
-
-       if (MaxStandbyDelay == -1)
-               return false;
-
        /* Are we past max_standby_delay? */
-       TimestampDifference(GetLatestXLogTime(), GetCurrentTimestamp(),
-                                               &delay_secs, &delay_usecs);
-       if (delay_secs > MaxStandbyDelay)
+       if (MaxStandbyDelay >= 0 &&
+               TimestampDifferenceExceeds(GetLatestXLogTime(), GetCurrentTimestamp(),
+                                                                  MaxStandbyDelay))
                return true;
 
        /*
-        * Sleep, then do bookkeeping.
+        * Sleep a bit (this is essential to avoid busy-waiting).
         */
        pg_usleep(standbyWait_us);
 
        /*
-        * Progressively increase the sleep times.
+        * Progressively increase the sleep times, but not to more than 1s,
+        * since pg_usleep isn't interruptable on some platforms.
         */
        standbyWait_us *= 2;
        if (standbyWait_us > 1000000)
                standbyWait_us = 1000000;
-       if (standbyWait_us > MaxStandbyDelay * 1000000 / 4)
-               standbyWait_us = MaxStandbyDelay * 1000000 / 4;
 
        return false;
 }
@@ -163,50 +156,41 @@ static void
 ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
                                                                           ProcSignalReason reason)
 {
-       char            waitactivitymsg[100];
-       char            oldactivitymsg[101];
-
        while (VirtualTransactionIdIsValid(*waitlist))
        {
-               long            wait_s;
-               int                     wait_us;        /* wait in microseconds (us) */
                TimestampTz waitStart;
-               bool            logged;
+               char       *new_status;
+
+               pgstat_report_waiting(true);
 
                waitStart = GetCurrentTimestamp();
+               new_status = NULL;              /* we haven't changed the ps display */
+
+               /* reset standbyWait_us for each xact we wait for */
                standbyWait_us = STANDBY_INITIAL_WAIT_US;
-               logged = false;
 
                /* wait until the virtual xid is gone */
                while (!ConditionalVirtualXactLockTableWait(*waitlist))
                {
                        /*
-                        * Report if we have been waiting for a while now...
+                        * Report via ps if we have been waiting for more than 500 msec
+                        * (should that be configurable?)
                         */
-                       TimestampTz now = GetCurrentTimestamp();
-
-                       TimestampDifference(waitStart, now, &wait_s, &wait_us);
-                       if (!logged && (wait_s > 0 || wait_us > 500000))
+                       if (update_process_title && new_status == NULL &&
+                               TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(),
+                                                                                  500))
                        {
-                               const char *oldactivitymsgp;
+                               const char *old_status;
                                int                     len;
 
-                               oldactivitymsgp = get_ps_display(&len);
-
-                               if (len > 100)
-                                       len = 100;
-
-                               memcpy(oldactivitymsg, oldactivitymsgp, len);
-                               oldactivitymsg[len] = 0;
-
-                               snprintf(waitactivitymsg, sizeof(waitactivitymsg),
-                                                "waiting for max_standby_delay (%u s)",
+                               old_status = get_ps_display(&len);
+                               new_status = (char *) palloc(len + 50);
+                               memcpy(new_status, old_status, len);
+                               snprintf(new_status + len, 50,
+                                                " waiting for max_standby_delay (%d ms)",
                                                 MaxStandbyDelay);
-                               set_ps_display(waitactivitymsg, false);
-
-                               pgstat_report_waiting(true);
-
-                               logged = true;
+                               set_ps_display(new_status, false);
+                               new_status[len] = '\0'; /* truncate off " waiting" */
                        }
 
                        /* Is it time to kill it? */
@@ -225,16 +209,17 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
                                 * unresponsive backend when system is heavily loaded.
                                 */
                                if (pid != 0)
-                                       pg_usleep(5000);
+                                       pg_usleep(5000L);
                        }
                }
 
-               /* Reset ps display */
-               if (logged)
+               /* Reset ps display if we changed it */
+               if (new_status)
                {
-                       set_ps_display(oldactivitymsg, false);
-                       pgstat_report_waiting(false);
+                       set_ps_display(new_status, false);
+                       pfree(new_status);
                }
+               pgstat_report_waiting(false);
 
                /* The virtual transaction is gone now, wait for the next one */
                waitlist++;
@@ -401,7 +386,7 @@ ResolveRecoveryConflictWithBufferPin(void)
                 */
                SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
        }
-       else if (MaxStandbyDelay == -1)
+       else if (MaxStandbyDelay < 0)
        {
                /*
                 * Send out a request to check for buffer pin deadlocks before we
@@ -412,17 +397,11 @@ ResolveRecoveryConflictWithBufferPin(void)
        }
        else
        {
-               TimestampTz now;
-               long            standby_delay_secs; /* How far Startup process is lagging */
-               int                     standby_delay_usecs;
-
-               now = GetCurrentTimestamp();
+               TimestampTz then = GetLatestXLogTime();
+               TimestampTz now = GetCurrentTimestamp();
 
                /* Are we past max_standby_delay? */
-               TimestampDifference(GetLatestXLogTime(), now,
-                                                       &standby_delay_secs, &standby_delay_usecs);
-
-               if (standby_delay_secs >= MaxStandbyDelay)
+               if (TimestampDifferenceExceeds(then, now, MaxStandbyDelay))
                {
                        /*
                         * We're already behind, so clear a path as quickly as possible.
@@ -434,7 +413,7 @@ ResolveRecoveryConflictWithBufferPin(void)
                        TimestampTz fin_time;           /* Expected wake-up time by timer */
                        long            timer_delay_secs;               /* Amount of time we set timer
                                                                                                 * for */
-                       int                     timer_delay_usecs = 0;
+                       int                     timer_delay_usecs;
 
                        /*
                         * Send out a request to check for buffer pin deadlocks before we
@@ -446,12 +425,10 @@ ResolveRecoveryConflictWithBufferPin(void)
                        /*
                         * How much longer we should wait?
                         */
-                       timer_delay_secs = MaxStandbyDelay - standby_delay_secs;
-                       if (standby_delay_usecs > 0)
-                       {
-                               timer_delay_secs -= 1;
-                               timer_delay_usecs = 1000000 - standby_delay_usecs;
-                       }
+                       fin_time = TimestampTzPlusMilliseconds(then, MaxStandbyDelay);
+
+                       TimestampDifference(now, fin_time,
+                                                               &timer_delay_secs, &timer_delay_usecs);
 
                        /*
                         * It's possible that the difference is less than a microsecond;
@@ -460,13 +437,6 @@ ResolveRecoveryConflictWithBufferPin(void)
                        if (timer_delay_secs == 0 && timer_delay_usecs == 0)
                                timer_delay_usecs = 1;
 
-                       /*
-                        * When is the finish time? We recheck this if we are woken early.
-                        */
-                       fin_time = TimestampTzPlusMilliseconds(now,
-                                                                                                  (timer_delay_secs * 1000) +
-                                                                                                (timer_delay_usecs / 1000));
-
                        if (enable_standby_sig_alarm(timer_delay_secs, timer_delay_usecs, fin_time))
                                sig_alarm_enabled = true;
                        else
index d913137..430c375 100644 (file)
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.553 2010/04/29 21:36:19 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.554 2010/05/02 02:10:33 tgl Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -1386,10 +1386,11 @@ static struct config_int ConfigureNamesInt[] =
        {
                {"max_standby_delay", PGC_SIGHUP, WAL_SETTINGS,
                        gettext_noop("Sets the maximum delay to avoid conflict processing on hot standby servers."),
-                       NULL
+                       NULL,
+                       GUC_UNIT_MS
                },
                &MaxStandbyDelay,
-               30, -1, INT_MAX, NULL, NULL
+               30 * 1000, -1, INT_MAX / 1000, NULL, NULL
        },
 
        {
index 2ed8412..94a4e69 100644 (file)
 # - Hot Standby -
 
 #hot_standby = off             # allows queries during recovery
-#max_standby_delay = 30s       # max acceptable lag (s) to allow queries to
+#max_standby_delay = 30s       # max acceptable lag to allow queries to
                                # complete without conflict; -1 means forever
 #vacuum_defer_cleanup_age = 0  # num transactions by which cleanup is deferred