From d53a56687f3d4772d17ffa0013a33231b7163731 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Thu, 11 Sep 2008 14:01:10 +0000 Subject: [PATCH] Initialize the minimum frozen Xid in vac_update_datfrozenxid using GetOldestXmin() instead of RecentGlobalXmin; this is safer because we do not depend on the latter being correctly set elsewhere, and while it is more expensive, this code path is not performance-critical. This is a real risk for autovacuum, because it can execute whole cycles without doing a single vacuum, which would mean that RecentGlobalXmin would stay at its initialization value, FirstNormalTransactionId, causing a bogus value to be inserted in pg_database. This bug could explain some recent reports of failure to truncate pg_clog. At the same time, change the initialization of RecentGlobalXmin to InvalidTransactionId, and ensure that it's set to something else whenever it's going to be used. Using it as FirstNormalTransactionId in HOT page pruning could incur in data loss. InitPostgres takes care of setting it to a valid value, but the extra checks are there to prevent "special" backends from behaving in unusual ways. Per Tom Lane's detailed problem dissection in 29544.1221061979@sss.pgh.pa.us --- src/backend/access/heap/heapam.c | 5 +++- src/backend/access/index/indexam.c | 4 ++- src/backend/commands/vacuum.c | 47 +++++++++++++------------------ src/backend/executor/nodeBitmapHeapscan.c | 4 ++- src/backend/utils/init/postinit.c | 10 +++++-- src/backend/utils/time/snapmgr.c | 8 ++++-- 6 files changed, 43 insertions(+), 35 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 6d3528323b..eb9f8701ae 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.262 2008/08/11 11:05:10 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.263 2008/09/11 14:01:09 alvherre Exp $ * * * INTERFACE ROUTINES @@ -219,6 +219,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) /* * Prune and repair fragmentation for the whole page, if possible. */ + Assert(TransactionIdIsValid(RecentGlobalXmin)); heap_page_prune_opt(scan->rs_rd, buffer, RecentGlobalXmin); /* @@ -1469,6 +1470,8 @@ heap_hot_search_buffer(ItemPointer tid, Buffer buffer, Snapshot snapshot, if (all_dead) *all_dead = true; + Assert(TransactionIdIsValid(RecentGlobalXmin)); + Assert(ItemPointerGetBlockNumber(tid) == BufferGetBlockNumber(buffer)); offnum = ItemPointerGetOffsetNumber(tid); at_chain_start = true; diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 7d6028804a..0c132d5fc0 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.109 2008/06/19 00:46:03 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.110 2008/09/11 14:01:09 alvherre Exp $ * * INTERFACE ROUTINES * index_open - open an index relation by relation OID @@ -419,6 +419,8 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) SCAN_CHECKS; GET_SCAN_PROCEDURE(amgettuple); + Assert(TransactionIdIsValid(RecentGlobalXmin)); + /* * We always reset xs_hot_dead; if we are here then either we are just * starting the scan, or we previously returned a visible tuple, and in diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 7eb5777d3a..af7b6646d2 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.376 2008/08/13 00:07:50 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.377 2008/09/11 14:01:09 alvherre Exp $ * *------------------------------------------------------------------------- */ @@ -790,14 +790,12 @@ vac_update_datfrozenxid(void) bool dirty = false; /* - * Initialize the "min" calculation with RecentGlobalXmin. Any - * not-yet-committed pg_class entries for new tables must have - * relfrozenxid at least this high, because any other open xact must have - * RecentXmin >= its PGPROC.xmin >= our RecentGlobalXmin; see - * AddNewRelationTuple(). So we cannot produce a wrong minimum by - * starting with this. + * Initialize the "min" calculation with GetOldestXmin, which is a + * reasonable approximation to the minimum relfrozenxid for not-yet- + * committed pg_class entries for new tables; see AddNewRelationTuple(). + * Se we cannot produce a wrong minimum by starting with this. */ - newFrozenXid = RecentGlobalXmin; + newFrozenXid = GetOldestXmin(true, true); /* * We must seqscan pg_class to find the minimum Xid, because there is no @@ -990,18 +988,16 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound) /* Begin a transaction for vacuuming this relation */ StartTransactionCommand(); - if (vacstmt->full) - { - /* functions in indexes may want a snapshot set */ - PushActiveSnapshot(GetTransactionSnapshot()); - } - else + /* + * Functions in indexes may want a snapshot set. Also, setting + * a snapshot ensures that RecentGlobalXmin is kept truly recent. + */ + PushActiveSnapshot(GetTransactionSnapshot()); + + if (!vacstmt->full) { /* - * During a lazy VACUUM we do not run any user-supplied functions, and - * so it should be safe to not create a transaction snapshot. - * - * We can furthermore set the PROC_IN_VACUUM flag, which lets other + * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets other * concurrent VACUUMs know that they can ignore this one while * determining their OldestXmin. (The reason we don't set it during a * full VACUUM is exactly that we may have to run user- defined @@ -1050,8 +1046,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound) if (!onerel) { - if (vacstmt->full) - PopActiveSnapshot(); + PopActiveSnapshot(); CommitTransactionCommand(); return; } @@ -1082,8 +1077,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound) (errmsg("skipping \"%s\" --- only table or database owner can vacuum it", RelationGetRelationName(onerel)))); relation_close(onerel, lmode); - if (vacstmt->full) - PopActiveSnapshot(); + PopActiveSnapshot(); CommitTransactionCommand(); return; } @@ -1099,8 +1093,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound) (errmsg("skipping \"%s\" --- cannot vacuum indexes, views, or special system tables", RelationGetRelationName(onerel)))); relation_close(onerel, lmode); - if (vacstmt->full) - PopActiveSnapshot(); + PopActiveSnapshot(); CommitTransactionCommand(); return; } @@ -1115,8 +1108,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound) if (isOtherTempNamespace(RelationGetNamespace(onerel))) { relation_close(onerel, lmode); - if (vacstmt->full) - PopActiveSnapshot(); + PopActiveSnapshot(); CommitTransactionCommand(); return; } @@ -1168,8 +1160,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound) /* * Complete the transaction and free all temporary memory used. */ - if (vacstmt->full) - PopActiveSnapshot(); + PopActiveSnapshot(); CommitTransactionCommand(); /* diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index e09ece6537..41b9da62ea 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -21,7 +21,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.29 2008/06/19 00:46:04 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.30 2008/09/11 14:01:09 alvherre Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,7 @@ #include "access/heapam.h" #include "access/relscan.h" +#include "access/transam.h" #include "executor/execdebug.h" #include "executor/nodeBitmapHeapscan.h" #include "pgstat.h" @@ -262,6 +263,7 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres) /* * Prune and repair fragmentation for the whole page, if possible. */ + Assert(TransactionIdIsValid(RecentGlobalXmin)); heap_page_prune_opt(scan->rs_rd, buffer, RecentGlobalXmin); /* diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index e0dbbf9311..6b66b2e392 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/init/postinit.c,v 1.184 2008/05/12 00:00:52 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/utils/init/postinit.c,v 1.185 2008/09/11 14:01:09 alvherre Exp $ * * *------------------------------------------------------------------------- @@ -47,6 +47,7 @@ #include "utils/plancache.h" #include "utils/portal.h" #include "utils/relcache.h" +#include "utils/snapmgr.h" #include "utils/syscache.h" #include "utils/tqual.h" @@ -461,10 +462,15 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, on_shmem_exit(ShutdownPostgres, 0); /* - * Start a new transaction here before first access to db + * Start a new transaction here before first access to db, and get a + * snapshot. We don't have a use for the snapshot itself, but we're + * interested in the secondary effect that it sets RecentGlobalXmin. */ if (!bootstrap) + { StartTransactionCommand(); + (void) GetTransactionSnapshot(); + } /* * Now that we have a transaction, we can take locks. Take a writer's diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 841a92567d..0add5f489c 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -22,7 +22,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/time/snapmgr.c,v 1.4 2008/07/11 02:10:14 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/utils/time/snapmgr.c,v 1.5 2008/09/11 14:01:10 alvherre Exp $ * *------------------------------------------------------------------------- */ @@ -59,10 +59,14 @@ static Snapshot SecondarySnapshot = NULL; * These are updated by GetSnapshotData. We initialize them this way * for the convenience of TransactionIdIsInProgress: even in bootstrap * mode, we don't want it to say that BootstrapTransactionId is in progress. + * + * RecentGlobalXmin is initialized to InvalidTransactionId, to ensure that no + * one tries to use a stale value. Readers should ensure that it has been set + * to something else before using it. */ TransactionId TransactionXmin = FirstNormalTransactionId; TransactionId RecentXmin = FirstNormalTransactionId; -TransactionId RecentGlobalXmin = FirstNormalTransactionId; +TransactionId RecentGlobalXmin = InvalidTransactionId; /* * Elements of the list of registered snapshots. -- 2.11.0