1 /*-------------------------------------------------------------------------
4 * Misc functions used in Hot Standby mode.
6 * All functions for handling RM_STANDBY_ID, which relate to
7 * AccessExclusiveLocks and starting snapshots for Hot Standby mode.
8 * Plus conflict recovery processing.
10 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
14 * src/backend/storage/ipc/standby.c
16 *-------------------------------------------------------------------------
19 #include "access/transam.h"
20 #include "access/twophase.h"
21 #include "access/xact.h"
22 #include "access/xlog.h"
23 #include "miscadmin.h"
24 #include "storage/bufmgr.h"
25 #include "storage/lmgr.h"
26 #include "storage/proc.h"
27 #include "storage/procarray.h"
28 #include "storage/sinvaladt.h"
29 #include "storage/standby.h"
30 #include "utils/ps_status.h"
32 /* User-settable GUC parameters */
33 int vacuum_defer_cleanup_age;
34 int max_standby_archive_delay = 30 * 1000;
35 int max_standby_streaming_delay = 30 * 1000;
37 static List *RecoveryLockList;
39 static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
40 ProcSignalReason reason);
41 static void ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid);
42 static void LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
43 static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
47 * InitRecoveryTransactionEnvironment
48 * Initialize tracking of in-progress transactions in master
50 * We need to issue shared invalidations and hold locks. Holding locks
51 * means others may want to wait on us, so we need to make a lock table
52 * vxact entry like a real transaction. We could create and delete
53 * lock table entries for each transaction but its simpler just to create
54 * one permanent entry and leave it there all the time. Locks are then
55 * acquired and released as needed. Yes, this means you can see the
56 * Startup process in pg_locks once we have run this.
59 InitRecoveryTransactionEnvironment(void)
61 VirtualTransactionId vxid;
64 * Initialize shared invalidation management for Startup process, being
65 * careful to register ourselves as a sendOnly process so we don't need to
66 * read messages, nor will we get signalled when the queue starts filling
69 SharedInvalBackendInit(true);
72 * Lock a virtual transaction id for Startup process.
74 * We need to do GetNextLocalTransactionId() because
75 * SharedInvalBackendInit() leaves localTransactionid invalid and the lock
76 * manager doesn't like that at all.
78 * Note that we don't need to run XactLockTableInsert() because nobody
79 * needs to wait on xids. That sounds a little strange, but table locks
80 * are held by vxids and row level locks are held by xids. All queries
81 * hold AccessShareLocks so never block while we write or lock new rows.
83 vxid.backendId = MyBackendId;
84 vxid.localTransactionId = GetNextLocalTransactionId();
85 VirtualXactLockTableInsert(vxid);
87 standbyState = STANDBY_INITIALIZED;
91 * ShutdownRecoveryTransactionEnvironment
92 * Shut down transaction tracking
94 * Prepare to switch from hot standby mode to normal operation. Shut down
95 * recovery-time transaction tracking.
98 ShutdownRecoveryTransactionEnvironment(void)
100 /* Mark all tracked in-progress transactions as finished. */
101 ExpireAllKnownAssignedTransactionIds();
103 /* Release all locks the tracked transactions were holding */
104 StandbyReleaseAllLocks();
109 * -----------------------------------------------------
110 * Standby wait timers and backend cancel logic
111 * -----------------------------------------------------
115 * Determine the cutoff time at which we want to start canceling conflicting
116 * transactions. Returns zero (a time safely in the past) if we are willing
120 GetStandbyLimitTime(void)
126 * The cutoff time is the last WAL data receipt time plus the appropriate
127 * delay variable. Delay of -1 means wait forever.
129 GetXLogReceiptTime(&rtime, &fromStream);
132 if (max_standby_streaming_delay < 0)
133 return 0; /* wait forever */
134 return TimestampTzPlusMilliseconds(rtime, max_standby_streaming_delay);
138 if (max_standby_archive_delay < 0)
139 return 0; /* wait forever */
140 return TimestampTzPlusMilliseconds(rtime, max_standby_archive_delay);
144 #define STANDBY_INITIAL_WAIT_US 1000
145 static int standbyWait_us = STANDBY_INITIAL_WAIT_US;
148 * Standby wait logic for ResolveRecoveryConflictWithVirtualXIDs.
149 * We wait here for a while then return. If we decide we can't wait any
150 * more then we return true, if we can wait some more return false.
153 WaitExceedsMaxStandbyDelay(void)
157 /* Are we past the limit time? */
158 ltime = GetStandbyLimitTime();
159 if (ltime && GetCurrentTimestamp() >= ltime)
163 * Sleep a bit (this is essential to avoid busy-waiting).
165 pg_usleep(standbyWait_us);
168 * Progressively increase the sleep times, but not to more than 1s, since
169 * pg_usleep isn't interruptable on some platforms.
172 if (standbyWait_us > 1000000)
173 standbyWait_us = 1000000;
179 * This is the main executioner for any query backend that conflicts with
180 * recovery processing. Judgement has already been passed on it within
181 * a specific rmgr. Here we just issue the orders to the procs. The procs
182 * then throw the required error as instructed.
185 ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
186 ProcSignalReason reason)
188 TimestampTz waitStart;
191 /* Fast exit, to avoid a kernel call if there's no work to be done. */
192 if (!VirtualTransactionIdIsValid(*waitlist))
195 waitStart = GetCurrentTimestamp();
196 new_status = NULL; /* we haven't changed the ps display */
198 while (VirtualTransactionIdIsValid(*waitlist))
200 /* reset standbyWait_us for each xact we wait for */
201 standbyWait_us = STANDBY_INITIAL_WAIT_US;
203 /* wait until the virtual xid is gone */
204 while (!ConditionalVirtualXactLockTableWait(*waitlist))
207 * Report via ps if we have been waiting for more than 500 msec
208 * (should that be configurable?)
210 if (update_process_title && new_status == NULL &&
211 TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(),
214 const char *old_status;
217 old_status = get_ps_display(&len);
218 new_status = (char *) palloc(len + 8 + 1);
219 memcpy(new_status, old_status, len);
220 strcpy(new_status + len, " waiting");
221 set_ps_display(new_status, false);
222 new_status[len] = '\0'; /* truncate off " waiting" */
225 /* Is it time to kill it? */
226 if (WaitExceedsMaxStandbyDelay())
231 * Now find out who to throw out of the balloon.
233 Assert(VirtualTransactionIdIsValid(*waitlist));
234 pid = CancelVirtualTransaction(*waitlist, reason);
237 * Wait a little bit for it to die so that we avoid flooding
238 * an unresponsive backend when system is heavily loaded.
245 /* The virtual transaction is gone now, wait for the next one */
249 /* Reset ps display if we changed it */
252 set_ps_display(new_status, false);
258 ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode node)
260 VirtualTransactionId *backends;
263 * If we get passed InvalidTransactionId then we are a little surprised,
264 * but it is theoretically possible in normal running. It also happens
265 * when replaying already applied WAL records after a standby crash or
266 * restart. If latestRemovedXid is invalid then there is no conflict. That
267 * rule applies across all record types that suffer from this conflict.
269 if (!TransactionIdIsValid(latestRemovedXid))
272 backends = GetConflictingVirtualXIDs(latestRemovedXid,
275 ResolveRecoveryConflictWithVirtualXIDs(backends,
276 PROCSIG_RECOVERY_CONFLICT_SNAPSHOT);
280 ResolveRecoveryConflictWithTablespace(Oid tsid)
282 VirtualTransactionId *temp_file_users;
285 * Standby users may be currently using this tablespace for for their
286 * temporary files. We only care about current users because
287 * temp_tablespace parameter will just ignore tablespaces that no longer
290 * Ask everybody to cancel their queries immediately so we can ensure no
291 * temp files remain and we can remove the tablespace. Nuke the entire
292 * site from orbit, it's the only way to be sure.
294 * XXX: We could work out the pids of active backends using this
295 * tablespace by examining the temp filenames in the directory. We would
296 * then convert the pids into VirtualXIDs before attempting to cancel
299 * We don't wait for commit because drop tablespace is non-transactional.
301 temp_file_users = GetConflictingVirtualXIDs(InvalidTransactionId,
303 ResolveRecoveryConflictWithVirtualXIDs(temp_file_users,
304 PROCSIG_RECOVERY_CONFLICT_TABLESPACE);
308 ResolveRecoveryConflictWithDatabase(Oid dbid)
311 * We don't do ResolveRecoveryConflictWithVirtualXIDs() here since that
312 * only waits for transactions and completely idle sessions would block
313 * us. This is rare enough that we do this as simply as possible: no wait,
314 * just force them off immediately.
316 * No locking is required here because we already acquired
317 * AccessExclusiveLock. Anybody trying to connect while we do this will
318 * block during InitPostgres() and then disconnect when they see the
319 * database has been removed.
321 while (CountDBBackends(dbid) > 0)
323 CancelDBBackends(dbid, PROCSIG_RECOVERY_CONFLICT_DATABASE, true);
326 * Wait awhile for them to die so that we avoid flooding an
327 * unresponsive backend when system is heavily loaded.
334 ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid)
336 VirtualTransactionId *backends;
337 bool lock_acquired = false;
338 int num_attempts = 0;
341 SET_LOCKTAG_RELATION(locktag, dbOid, relOid);
344 * If blowing away everybody with conflicting locks doesn't work, after
345 * the first two attempts then we just start blowing everybody away until
346 * it does work. We do this because its likely that we either have too
347 * many locks and we just can't get one at all, or that there are many
348 * people crowding for the same table. Recovery must win; the end
349 * justifies the means.
351 while (!lock_acquired)
353 if (++num_attempts < 3)
354 backends = GetLockConflicts(&locktag, AccessExclusiveLock);
356 backends = GetConflictingVirtualXIDs(InvalidTransactionId,
359 ResolveRecoveryConflictWithVirtualXIDs(backends,
360 PROCSIG_RECOVERY_CONFLICT_LOCK);
362 if (LockAcquireExtended(&locktag, AccessExclusiveLock, true, true, false)
363 != LOCKACQUIRE_NOT_AVAIL)
364 lock_acquired = true;
369 * ResolveRecoveryConflictWithBufferPin is called from LockBufferForCleanup()
370 * to resolve conflicts with other backends holding buffer pins.
372 * We either resolve conflicts immediately or set a SIGALRM to wake us at
373 * the limit of our patience. The sleep in LockBufferForCleanup() is
374 * performed here, for code clarity.
376 * Resolve conflicts by sending a PROCSIG signal to all backends to check if
377 * they hold one of the buffer pins that is blocking Startup process. If so,
378 * backends will take an appropriate error action, ERROR or FATAL.
380 * We also must check for deadlocks. Deadlocks occur because if queries
381 * wait on a lock, that must be behind an AccessExclusiveLock, which can only
382 * be cleared if the Startup process replays a transaction completion record.
383 * If Startup process is also waiting then that is a deadlock. The deadlock
384 * can occur if the query is waiting and then the Startup sleeps, or if
385 * Startup is sleeping and the query waits on a lock. We protect against
386 * only the former sequence here, the latter sequence is checked prior to
387 * the query sleeping, in CheckRecoveryConflictDeadlock().
389 * Deadlocks are extremely rare, and relatively expensive to check for,
390 * so we don't do a deadlock check right away ... only if we have had to wait
391 * at least deadlock_timeout. Most of the logic about that is in proc.c.
394 ResolveRecoveryConflictWithBufferPin(void)
396 bool sig_alarm_enabled = false;
400 Assert(InHotStandby);
402 ltime = GetStandbyLimitTime();
403 now = GetCurrentTimestamp();
408 * We're willing to wait forever for conflicts, so set timeout for
409 * deadlock check (only)
411 if (enable_standby_sig_alarm(now, now, true))
412 sig_alarm_enabled = true;
414 elog(FATAL, "could not set timer for process wakeup");
416 else if (now >= ltime)
419 * We're already behind, so clear a path as quickly as possible.
421 SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
426 * Wake up at ltime, and check for deadlocks as well if we will be
427 * waiting longer than deadlock_timeout
429 if (enable_standby_sig_alarm(now, ltime, false))
430 sig_alarm_enabled = true;
432 elog(FATAL, "could not set timer for process wakeup");
435 /* Wait to be signaled by UnpinBuffer() */
438 if (sig_alarm_enabled)
440 if (!disable_standby_sig_alarm())
441 elog(FATAL, "could not disable timer for process wakeup");
446 SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
448 Assert(reason == PROCSIG_RECOVERY_CONFLICT_BUFFERPIN ||
449 reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
452 * We send signal to all backends to ask them if they are holding the
453 * buffer pin which is delaying the Startup process. We must not set the
454 * conflict flag yet, since most backends will be innocent. Let the
455 * SIGUSR1 handling in each backend decide their own fate.
457 CancelDBBackends(InvalidOid, reason, false);
461 * In Hot Standby perform early deadlock detection. We abort the lock
462 * wait if are about to sleep while holding the buffer pin that Startup
463 * process is waiting for. The deadlock occurs because we can only be
464 * waiting behind an AccessExclusiveLock, which can only clear when a
465 * transaction completion record is replayed, which can only occur when
466 * Startup process is not waiting. So if Startup process is waiting we
467 * never will clear that lock, so if we wait we cause deadlock. If we
468 * are the Startup process then no need to check for deadlocks.
471 CheckRecoveryConflictDeadlock(LWLockId partitionLock)
475 if (!HoldingBufferPinThatDelaysRecovery())
478 LWLockRelease(partitionLock);
481 * Error message should match ProcessInterrupts() but we avoid calling
482 * that because we aren't handling an interrupt at this point. Note that
483 * we only cancel the current transaction here, so if we are in a
484 * subtransaction and the pin is held by a parent, then the Startup
485 * process will continue to wait even though we have avoided deadlock.
488 (errcode(ERRCODE_T_R_DEADLOCK_DETECTED),
489 errmsg("canceling statement due to conflict with recovery"),
490 errdetail("User transaction caused buffer deadlock with recovery.")));
494 * -----------------------------------------------------
495 * Locking in Recovery Mode
496 * -----------------------------------------------------
498 * All locks are held by the Startup process using a single virtual
499 * transaction. This implementation is both simpler and in some senses,
500 * more correct. The locks held mean "some original transaction held
501 * this lock, so query access is not allowed at this time". So the Startup
502 * process is the proxy by which the original locks are implemented.
504 * We only keep track of AccessExclusiveLocks, which are only ever held by
505 * one transaction on one relation, and don't worry about lock queuing.
507 * We keep a single dynamically expandible list of locks in local memory,
508 * RelationLockList, so we can keep track of the various entries made by
509 * the Startup process's virtual xid in the shared lock table.
511 * List elements use type xl_rel_lock, since the WAL record type exactly
512 * matches the information that we need to keep track of.
514 * We use session locks rather than normal locks so we don't need
520 StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
522 xl_standby_lock *newlock;
525 /* Already processed? */
526 if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
529 elog(trace_recovery(DEBUG4),
530 "adding recovery lock: db %u rel %u", dbOid, relOid);
532 /* dbOid is InvalidOid when we are locking a shared relation. */
533 Assert(OidIsValid(relOid));
535 newlock = palloc(sizeof(xl_standby_lock));
537 newlock->dbOid = dbOid;
538 newlock->relOid = relOid;
539 RecoveryLockList = lappend(RecoveryLockList, newlock);
542 * Attempt to acquire the lock as requested, if not resolve conflict
544 SET_LOCKTAG_RELATION(locktag, newlock->dbOid, newlock->relOid);
546 if (LockAcquireExtended(&locktag, AccessExclusiveLock, true, true, false)
547 == LOCKACQUIRE_NOT_AVAIL)
548 ResolveRecoveryConflictWithLock(newlock->dbOid, newlock->relOid);
552 StandbyReleaseLocks(TransactionId xid)
559 * Release all matching locks and remove them from list
562 for (cell = list_head(RecoveryLockList); cell; cell = next)
564 xl_standby_lock *lock = (xl_standby_lock *) lfirst(cell);
568 if (!TransactionIdIsValid(xid) || lock->xid == xid)
572 elog(trace_recovery(DEBUG4),
573 "releasing recovery lock: xid %u db %u rel %u",
574 lock->xid, lock->dbOid, lock->relOid);
575 SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid);
576 if (!LockRelease(&locktag, AccessExclusiveLock, true))
578 "RecoveryLockList contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u",
579 lock->xid, lock->dbOid, lock->relOid);
581 RecoveryLockList = list_delete_cell(RecoveryLockList, cell, prev);
590 * Release locks for a transaction tree, starting at xid down, from
593 * Called during WAL replay of COMMIT/ROLLBACK when in hot standby mode,
594 * to remove any AccessExclusiveLocks requested by a transaction.
597 StandbyReleaseLockTree(TransactionId xid, int nsubxids, TransactionId *subxids)
601 StandbyReleaseLocks(xid);
603 for (i = 0; i < nsubxids; i++)
604 StandbyReleaseLocks(subxids[i]);
608 * StandbyReleaseLocksMany
609 * Release standby locks held by XIDs < removeXid
611 * If keepPreparedXacts is true, keep prepared transactions even if
612 * they're older than removeXid
615 StandbyReleaseLocksMany(TransactionId removeXid, bool keepPreparedXacts)
623 * Release all matching locks.
626 for (cell = list_head(RecoveryLockList); cell; cell = next)
628 xl_standby_lock *lock = (xl_standby_lock *) lfirst(cell);
632 if (!TransactionIdIsValid(removeXid) || TransactionIdPrecedes(lock->xid, removeXid))
634 if (keepPreparedXacts && StandbyTransactionIdIsPrepared(lock->xid))
636 elog(trace_recovery(DEBUG4),
637 "releasing recovery lock: xid %u db %u rel %u",
638 lock->xid, lock->dbOid, lock->relOid);
639 SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid);
640 if (!LockRelease(&locktag, AccessExclusiveLock, true))
642 "RecoveryLockList contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u",
643 lock->xid, lock->dbOid, lock->relOid);
644 RecoveryLockList = list_delete_cell(RecoveryLockList, cell, prev);
653 * Called at end of recovery and when we see a shutdown checkpoint.
656 StandbyReleaseAllLocks(void)
658 elog(trace_recovery(DEBUG2), "release all standby locks");
659 StandbyReleaseLocksMany(InvalidTransactionId, false);
663 * StandbyReleaseOldLocks
664 * Release standby locks held by XIDs < removeXid, as long
665 * as they're not prepared transactions.
668 StandbyReleaseOldLocks(TransactionId removeXid)
670 StandbyReleaseLocksMany(removeXid, true);
674 * --------------------------------------------------------------------
675 * Recovery handling for Rmgr RM_STANDBY_ID
677 * These record types will only be created if XLogStandbyInfoActive()
678 * --------------------------------------------------------------------
682 standby_redo(XLogRecPtr lsn, XLogRecord *record)
684 uint8 info = record->xl_info & ~XLR_INFO_MASK;
686 /* Do nothing if we're not in hot standby mode */
687 if (standbyState == STANDBY_DISABLED)
690 if (info == XLOG_STANDBY_LOCK)
692 xl_standby_locks *xlrec = (xl_standby_locks *) XLogRecGetData(record);
695 for (i = 0; i < xlrec->nlocks; i++)
696 StandbyAcquireAccessExclusiveLock(xlrec->locks[i].xid,
697 xlrec->locks[i].dbOid,
698 xlrec->locks[i].relOid);
700 else if (info == XLOG_RUNNING_XACTS)
702 xl_running_xacts *xlrec = (xl_running_xacts *) XLogRecGetData(record);
703 RunningTransactionsData running;
705 running.xcnt = xlrec->xcnt;
706 running.subxid_overflow = xlrec->subxid_overflow;
707 running.nextXid = xlrec->nextXid;
708 running.latestCompletedXid = xlrec->latestCompletedXid;
709 running.oldestRunningXid = xlrec->oldestRunningXid;
710 running.xids = xlrec->xids;
712 ProcArrayApplyRecoveryInfo(&running);
715 elog(PANIC, "relation_redo: unknown op code %u", info);
719 standby_desc_running_xacts(StringInfo buf, xl_running_xacts *xlrec)
723 appendStringInfo(buf, " nextXid %u latestCompletedXid %u oldestRunningXid %u",
725 xlrec->latestCompletedXid,
726 xlrec->oldestRunningXid);
729 appendStringInfo(buf, "; %d xacts:", xlrec->xcnt);
730 for (i = 0; i < xlrec->xcnt; i++)
731 appendStringInfo(buf, " %u", xlrec->xids[i]);
734 if (xlrec->subxid_overflow)
735 appendStringInfo(buf, "; subxid ovf");
739 standby_desc(StringInfo buf, uint8 xl_info, char *rec)
741 uint8 info = xl_info & ~XLR_INFO_MASK;
743 if (info == XLOG_STANDBY_LOCK)
745 xl_standby_locks *xlrec = (xl_standby_locks *) rec;
748 appendStringInfo(buf, "AccessExclusive locks:");
750 for (i = 0; i < xlrec->nlocks; i++)
751 appendStringInfo(buf, " xid %u db %u rel %u",
752 xlrec->locks[i].xid, xlrec->locks[i].dbOid,
753 xlrec->locks[i].relOid);
755 else if (info == XLOG_RUNNING_XACTS)
757 xl_running_xacts *xlrec = (xl_running_xacts *) rec;
759 appendStringInfo(buf, " running xacts:");
760 standby_desc_running_xacts(buf, xlrec);
763 appendStringInfo(buf, "UNKNOWN");
767 * Log details of the current snapshot to WAL. This allows the snapshot state
768 * to be reconstructed on the standby.
770 * We can move directly to STANDBY_SNAPSHOT_READY at startup if we
771 * start from a shutdown checkpoint because we know nothing was running
772 * at that time and our recovery snapshot is known empty. In the more
773 * typical case of an online checkpoint we need to jump through a few
774 * hoops to get a correct recovery snapshot and this requires a two or
775 * sometimes a three stage process.
777 * The initial snapshot must contain all running xids and all current
778 * AccessExclusiveLocks at a point in time on the standby. Assembling
779 * that information while the server is running requires many and
780 * various LWLocks, so we choose to derive that information piece by
781 * piece and then re-assemble that info on the standby. When that
782 * information is fully assembled we move to STANDBY_SNAPSHOT_READY.
784 * Since locking on the primary when we derive the information is not
785 * strict, we note that there is a time window between the derivation and
786 * writing to WAL of the derived information. That allows race conditions
787 * that we must resolve, since xids and locks may enter or leave the
788 * snapshot during that window. This creates the issue that an xid or
789 * lock may start *after* the snapshot has been derived yet *before* the
790 * snapshot is logged in the running xacts WAL record. We resolve this by
791 * starting to accumulate changes at a point just prior to when we derive
792 * the snapshot on the primary, then ignore duplicates when we later apply
793 * the snapshot from the running xacts record. This is implemented during
794 * CreateCheckpoint() where we use the logical checkpoint location as
795 * our starting point and then write the running xacts record immediately
796 * before writing the main checkpoint WAL record. Since we always start
797 * up from a checkpoint and are immediately at our starting point, we
798 * unconditionally move to STANDBY_INITIALIZED. After this point we
800 * * move shared nextXid forwards as we see new xids
801 * * extend the clog and subtrans with each new xid
802 * * keep track of uncommitted known assigned xids
803 * * keep track of uncommitted AccessExclusiveLocks
805 * When we see a commit/abort we must remove known assigned xids and locks
806 * from the completing transaction. Attempted removals that cannot locate
807 * an entry are expected and must not cause an error when we are in state
808 * STANDBY_INITIALIZED. This is implemented in StandbyReleaseLocks() and
809 * KnownAssignedXidsRemove().
811 * Later, when we apply the running xact data we must be careful to ignore
812 * transactions already committed, since those commits raced ahead when
813 * making WAL entries.
816 LogStandbySnapshot(TransactionId *oldestActiveXid, TransactionId *nextXid)
818 RunningTransactions running;
819 xl_standby_lock *locks;
822 Assert(XLogStandbyInfoActive());
825 * Get details of any AccessExclusiveLocks being held at the moment.
827 * XXX GetRunningTransactionLocks() currently holds a lock on all
828 * partitions though it is possible to further optimise the locking. By
829 * reference counting locks and storing the value on the ProcArray entry
830 * for each backend we can easily tell if any locks need recording without
831 * trying to acquire the partition locks and scanning the lock table.
833 locks = GetRunningTransactionLocks(&nlocks);
835 LogAccessExclusiveLocks(nlocks, locks);
838 * Log details of all in-progress transactions. This should be the last
839 * record we write, because standby will open up when it sees this.
841 running = GetRunningTransactionData();
842 LogCurrentRunningXacts(running);
843 /* GetRunningTransactionData() acquired XidGenLock, we must release it */
844 LWLockRelease(XidGenLock);
846 *oldestActiveXid = running->oldestRunningXid;
847 *nextXid = running->nextXid;
851 * Record an enhanced snapshot of running transactions into WAL.
853 * The definitions of RunningTransactionsData and xl_xact_running_xacts
854 * are similar. We keep them separate because xl_xact_running_xacts
855 * is a contiguous chunk of memory and never exists fully until it is
859 LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
861 xl_running_xacts xlrec;
862 XLogRecData rdata[2];
866 xlrec.xcnt = CurrRunningXacts->xcnt;
867 xlrec.subxid_overflow = CurrRunningXacts->subxid_overflow;
868 xlrec.nextXid = CurrRunningXacts->nextXid;
869 xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
870 xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
873 rdata[0].data = (char *) (&xlrec);
874 rdata[0].len = MinSizeOfXactRunningXacts;
875 rdata[0].buffer = InvalidBuffer;
877 /* array of TransactionIds */
880 rdata[0].next = &(rdata[1]);
881 rdata[1].data = (char *) CurrRunningXacts->xids;
882 rdata[1].len = xlrec.xcnt * sizeof(TransactionId);
883 rdata[1].buffer = InvalidBuffer;
887 rdata[lastrdata].next = NULL;
889 recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS, rdata);
891 if (CurrRunningXacts->subxid_overflow)
892 elog(trace_recovery(DEBUG2),
893 "snapshot of %u running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
894 CurrRunningXacts->xcnt,
895 recptr.xlogid, recptr.xrecoff,
896 CurrRunningXacts->oldestRunningXid,
897 CurrRunningXacts->latestCompletedXid,
898 CurrRunningXacts->nextXid);
900 elog(trace_recovery(DEBUG2),
901 "snapshot of %u running transaction ids (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
902 CurrRunningXacts->xcnt,
903 recptr.xlogid, recptr.xrecoff,
904 CurrRunningXacts->oldestRunningXid,
905 CurrRunningXacts->latestCompletedXid,
906 CurrRunningXacts->nextXid);
910 * Wholesale logging of AccessExclusiveLocks. Other lock types need not be
911 * logged, as described in backend/storage/lmgr/README.
914 LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
916 XLogRecData rdata[2];
917 xl_standby_locks xlrec;
919 xlrec.nlocks = nlocks;
921 rdata[0].data = (char *) &xlrec;
922 rdata[0].len = offsetof(xl_standby_locks, locks);
923 rdata[0].buffer = InvalidBuffer;
924 rdata[0].next = &rdata[1];
926 rdata[1].data = (char *) locks;
927 rdata[1].len = nlocks * sizeof(xl_standby_lock);
928 rdata[1].buffer = InvalidBuffer;
929 rdata[1].next = NULL;
931 (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK, rdata);
935 * Individual logging of AccessExclusiveLocks for use during LockAcquire()
938 LogAccessExclusiveLock(Oid dbOid, Oid relOid)
940 xl_standby_lock xlrec;
942 xlrec.xid = GetTopTransactionId();
945 * Decode the locktag back to the original values, to avoid sending lots
946 * of empty bytes with every message. See lock.h to check how a locktag
947 * is defined for LOCKTAG_RELATION
950 xlrec.relOid = relOid;
952 LogAccessExclusiveLocks(1, &xlrec);
956 * Prepare to log an AccessExclusiveLock, for use during LockAcquire()
959 LogAccessExclusiveLockPrepare(void)
962 * Ensure that a TransactionId has been assigned to this transaction, for
963 * two reasons, both related to lock release on the standby. First, we
964 * must assign an xid so that RecordTransactionCommit() and
965 * RecordTransactionAbort() do not optimise away the transaction
966 * completion record which recovery relies upon to release locks. It's a
967 * hack, but for a corner case not worth adding code for into the main
968 * commit path. Second, must must assign an xid before the lock is
969 * recorded in shared memory, otherwise a concurrently executing
970 * GetRunningTransactionLocks() might see a lock associated with an
971 * InvalidTransactionId which we later assert cannot happen.
973 (void) GetTopTransactionId();