From f9ed327f760e7794faecfda54b19515a95ba901d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 2 May 2010 02:10:33 +0000 Subject: [PATCH] Clean up some awkward, inaccurate, and inefficient processing around MaxStandbyDelay. Use the GUC units mechanism for the value, and choose more appropriate timestamp functions for performing tests with it. Make the ps_activity manipulation in ResolveRecoveryConflictWithVirtualXIDs have behavior similar to ps_activity code elsewhere, notably not updating the display when update_process_title is off and not truncating the display contents at an arbitrarily-chosen length. Improve the docs to be explicit about what MaxStandbyDelay actually measures, viz the difference between primary and standby servers' clocks, and the possible hazards if their clocks aren't in sync. --- doc/src/sgml/config.sgml | 13 ++- doc/src/sgml/high-availability.sgml | 34 +++++--- src/backend/access/transam/xlog.c | 4 +- src/backend/storage/ipc/standby.c | 112 ++++++++++---------------- src/backend/utils/misc/guc.c | 7 +- src/backend/utils/misc/postgresql.conf.sample | 2 +- 6 files changed, 78 insertions(+), 94 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index c84529f267..f9c13e7c49 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1,4 +1,4 @@ - + Server Configuration @@ -1947,13 +1947,12 @@ SET ENABLE_SEQSCAN TO OFF; When Hot Standby is active, this parameter specifies a wait policy for applying WAL entries that conflict with active queries. - If a conflict should occur the server will delay up to this number - of seconds before it cancels conflicting queries, as + If a conflict should occur the server will delay up to this long + before it cancels conflicting queries, as described in . - Typically, this parameter is used only during replication. - The value is specified in seconds, and -1 causes the standby to wait - forever for a conflicting query to complete. The default is 30 seconds. + A value of -1 causes the standby to wait forever for a conflicting + query to complete. This parameter can only be set in the postgresql.conf file or on the server command line. @@ -1964,7 +1963,7 @@ SET ENABLE_SEQSCAN TO OFF; While it is tempting to believe that max_standby_delay - is the maximum number of seconds a query can run before + is the maximum length of time a query can run before cancellation is possible, this is not true. When a long-running query ends, there is a finite time required to apply backlogged WAL logs. If a second long-running query appears before the diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml index b2f1a583fc..ef4c1bb2c2 100644 --- a/doc/src/sgml/high-availability.sgml +++ b/doc/src/sgml/high-availability.sgml @@ -1,4 +1,4 @@ - + High Availability, Load Balancing, and Replication @@ -1480,7 +1480,8 @@ if (!triggered) There are a number of choices for resolving query conflicts. The default is to wait and hope the query finishes. The server will wait automatically until the lag between primary and standby is at most - max_standby_delay seconds. Once that grace period expires, + (30 seconds by default). + Once that grace period expires, one of the following actions is taken: @@ -1514,15 +1515,28 @@ if (!triggered) - max_standby_delay is set in postgresql.conf. - The parameter applies to the server as a whole, so if the delay is consumed - by a single query then there may be little or no waiting for queries that - follow, though they will have benefited equally from the initial - waiting period. The server may take time to catch up again before the grace - period is available again, though if there is a heavy and constant stream - of conflicts it may seldom catch up fully. + Keep in mind that max_standby_delay is compared to the + difference between the standby server's clock and the transaction + commit timestamps read from the WAL log. Thus, the grace period + allowed to any one query on the standby is never more than + max_standby_delay, and could be considerably less if the + standby has already fallen behind as a result of waiting for previous + queries to complete, or as a result of being unable to keep up with a + heavy update load. + + + Be sure that the primary and standby servers' clocks are kept in sync; + otherwise the values compared to max_standby_delay will be + erroneous, possibly leading to undesirable query cancellations. + If the clocks are intentionally not in sync, or if there is a large + propagation delay from primary to standby, it is advisable to set + max_standby_delay to -1. In any case the value should be + larger than the largest expected clock skew between primary and standby. + + + Users should be clear that tables that are regularly and heavily updated on the primary server will quickly cause cancellation of longer running queries on @@ -1656,7 +1670,7 @@ LOG: database system is ready to accept read only connections max_standby_delay or even set it to zero, though that is a very aggressive setting. If the standby server is tasked as an additional server for decision support queries then it might be acceptable to set this - to a value of many hours (in seconds). It is also possible to set + to a value of many hours. It is also possible to set max_standby_delay to -1 which means wait forever for queries to complete; this will be useful when performing an archive recovery from a backup. diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 61dfb20117..99e53b7632 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.407 2010/04/29 21:49:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.408 2010/05/02 02:10:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -72,7 +72,7 @@ int XLogArchiveTimeout = 0; bool XLogArchiveMode = false; char *XLogArchiveCommand = NULL; bool EnableHotStandby = false; -int MaxStandbyDelay = 30; +int MaxStandbyDelay = 30 * 1000; bool fullPageWrites = true; bool log_checkpoints = false; int sync_method = DEFAULT_SYNC_METHOD; diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 30723327cd..6bd156845f 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.20 2010/04/28 16:10:42 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.21 2010/05/02 02:10:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -124,31 +124,24 @@ static int standbyWait_us = STANDBY_INITIAL_WAIT_US; static bool WaitExceedsMaxStandbyDelay(void) { - long delay_secs; - int delay_usecs; - - if (MaxStandbyDelay == -1) - return false; - /* Are we past max_standby_delay? */ - TimestampDifference(GetLatestXLogTime(), GetCurrentTimestamp(), - &delay_secs, &delay_usecs); - if (delay_secs > MaxStandbyDelay) + if (MaxStandbyDelay >= 0 && + TimestampDifferenceExceeds(GetLatestXLogTime(), GetCurrentTimestamp(), + MaxStandbyDelay)) return true; /* - * Sleep, then do bookkeeping. + * Sleep a bit (this is essential to avoid busy-waiting). */ pg_usleep(standbyWait_us); /* - * Progressively increase the sleep times. + * Progressively increase the sleep times, but not to more than 1s, + * since pg_usleep isn't interruptable on some platforms. */ standbyWait_us *= 2; if (standbyWait_us > 1000000) standbyWait_us = 1000000; - if (standbyWait_us > MaxStandbyDelay * 1000000 / 4) - standbyWait_us = MaxStandbyDelay * 1000000 / 4; return false; } @@ -163,50 +156,41 @@ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, ProcSignalReason reason) { - char waitactivitymsg[100]; - char oldactivitymsg[101]; - while (VirtualTransactionIdIsValid(*waitlist)) { - long wait_s; - int wait_us; /* wait in microseconds (us) */ TimestampTz waitStart; - bool logged; + char *new_status; + + pgstat_report_waiting(true); waitStart = GetCurrentTimestamp(); + new_status = NULL; /* we haven't changed the ps display */ + + /* reset standbyWait_us for each xact we wait for */ standbyWait_us = STANDBY_INITIAL_WAIT_US; - logged = false; /* wait until the virtual xid is gone */ while (!ConditionalVirtualXactLockTableWait(*waitlist)) { /* - * Report if we have been waiting for a while now... + * Report via ps if we have been waiting for more than 500 msec + * (should that be configurable?) */ - TimestampTz now = GetCurrentTimestamp(); - - TimestampDifference(waitStart, now, &wait_s, &wait_us); - if (!logged && (wait_s > 0 || wait_us > 500000)) + if (update_process_title && new_status == NULL && + TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(), + 500)) { - const char *oldactivitymsgp; + const char *old_status; int len; - oldactivitymsgp = get_ps_display(&len); - - if (len > 100) - len = 100; - - memcpy(oldactivitymsg, oldactivitymsgp, len); - oldactivitymsg[len] = 0; - - snprintf(waitactivitymsg, sizeof(waitactivitymsg), - "waiting for max_standby_delay (%u s)", + old_status = get_ps_display(&len); + new_status = (char *) palloc(len + 50); + memcpy(new_status, old_status, len); + snprintf(new_status + len, 50, + " waiting for max_standby_delay (%d ms)", MaxStandbyDelay); - set_ps_display(waitactivitymsg, false); - - pgstat_report_waiting(true); - - logged = true; + set_ps_display(new_status, false); + new_status[len] = '\0'; /* truncate off " waiting" */ } /* Is it time to kill it? */ @@ -225,16 +209,17 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, * unresponsive backend when system is heavily loaded. */ if (pid != 0) - pg_usleep(5000); + pg_usleep(5000L); } } - /* Reset ps display */ - if (logged) + /* Reset ps display if we changed it */ + if (new_status) { - set_ps_display(oldactivitymsg, false); - pgstat_report_waiting(false); + set_ps_display(new_status, false); + pfree(new_status); } + pgstat_report_waiting(false); /* The virtual transaction is gone now, wait for the next one */ waitlist++; @@ -401,7 +386,7 @@ ResolveRecoveryConflictWithBufferPin(void) */ SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); } - else if (MaxStandbyDelay == -1) + else if (MaxStandbyDelay < 0) { /* * Send out a request to check for buffer pin deadlocks before we @@ -412,17 +397,11 @@ ResolveRecoveryConflictWithBufferPin(void) } else { - TimestampTz now; - long standby_delay_secs; /* How far Startup process is lagging */ - int standby_delay_usecs; - - now = GetCurrentTimestamp(); + TimestampTz then = GetLatestXLogTime(); + TimestampTz now = GetCurrentTimestamp(); /* Are we past max_standby_delay? */ - TimestampDifference(GetLatestXLogTime(), now, - &standby_delay_secs, &standby_delay_usecs); - - if (standby_delay_secs >= MaxStandbyDelay) + if (TimestampDifferenceExceeds(then, now, MaxStandbyDelay)) { /* * We're already behind, so clear a path as quickly as possible. @@ -434,7 +413,7 @@ ResolveRecoveryConflictWithBufferPin(void) TimestampTz fin_time; /* Expected wake-up time by timer */ long timer_delay_secs; /* Amount of time we set timer * for */ - int timer_delay_usecs = 0; + int timer_delay_usecs; /* * Send out a request to check for buffer pin deadlocks before we @@ -446,12 +425,10 @@ ResolveRecoveryConflictWithBufferPin(void) /* * How much longer we should wait? */ - timer_delay_secs = MaxStandbyDelay - standby_delay_secs; - if (standby_delay_usecs > 0) - { - timer_delay_secs -= 1; - timer_delay_usecs = 1000000 - standby_delay_usecs; - } + fin_time = TimestampTzPlusMilliseconds(then, MaxStandbyDelay); + + TimestampDifference(now, fin_time, + &timer_delay_secs, &timer_delay_usecs); /* * It's possible that the difference is less than a microsecond; @@ -460,13 +437,6 @@ ResolveRecoveryConflictWithBufferPin(void) if (timer_delay_secs == 0 && timer_delay_usecs == 0) timer_delay_usecs = 1; - /* - * When is the finish time? We recheck this if we are woken early. - */ - fin_time = TimestampTzPlusMilliseconds(now, - (timer_delay_secs * 1000) + - (timer_delay_usecs / 1000)); - if (enable_standby_sig_alarm(timer_delay_secs, timer_delay_usecs, fin_time)) sig_alarm_enabled = true; else diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index d913137b49..430c37526d 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.553 2010/04/29 21:36:19 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.554 2010/05/02 02:10:33 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -1386,10 +1386,11 @@ static struct config_int ConfigureNamesInt[] = { {"max_standby_delay", PGC_SIGHUP, WAL_SETTINGS, gettext_noop("Sets the maximum delay to avoid conflict processing on hot standby servers."), - NULL + NULL, + GUC_UNIT_MS }, &MaxStandbyDelay, - 30, -1, INT_MAX, NULL, NULL + 30 * 1000, -1, INT_MAX / 1000, NULL, NULL }, { diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 2ed84127f7..94a4e69c6c 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -186,7 +186,7 @@ # - Hot Standby - #hot_standby = off # allows queries during recovery -#max_standby_delay = 30s # max acceptable lag (s) to allow queries to +#max_standby_delay = 30s # max acceptable lag to allow queries to # complete without conflict; -1 means forever #vacuum_defer_cleanup_age = 0 # num transactions by which cleanup is deferred -- 2.11.0