From 1ddc2703a936d03953657f43345460b9242bbed1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 7 Feb 2010 22:40:33 +0000 Subject: [PATCH] Work around deadlock problems with VACUUM FULL/CLUSTER on system catalogs, as per my recent proposal. First, teach IndexBuildHeapScan to not wait for INSERT_IN_PROGRESS or DELETE_IN_PROGRESS tuples to commit unless the index build is checking uniqueness/exclusion constraints. If it isn't, there's no harm in just indexing the in-doubt tuple. Second, modify VACUUM FULL/CLUSTER to suppress reverifying uniqueness/exclusion constraint properties while rebuilding indexes of the target relation. This is reasonable because these commands aren't meant to deal with corrupted-data situations. Constraint properties will still be rechecked when an index is rebuilt by a REINDEX command. This gets us out of the problem that new-style VACUUM FULL would often wait for other transactions while holding exclusive lock on a system catalog, leading to probable deadlock because those other transactions need to look at the catalogs too. Although the real ultimate cause of the problem is a debatable choice to release locks early after modifying system catalogs, changing that choice would require pretty serious analysis and is not something to be undertaken lightly or on a tight schedule. The present patch fixes the problem in a fairly reasonable way and should also improve the speed of VACUUM FULL/CLUSTER a little bit. --- src/backend/catalog/index.c | 143 ++++++++++++++++++++++--------------- src/backend/commands/cluster.c | 3 +- src/backend/commands/indexcmds.c | 4 +- src/include/catalog/index.h | 4 +- src/test/regress/parallel_schedule | 7 +- 5 files changed, 94 insertions(+), 67 deletions(-) diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index e614d3baf6..d8a3d47a0a 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.333 2010/02/07 20:48:09 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.334 2010/02/07 22:40:33 tgl Exp $ * * * INTERFACE ROUTINES @@ -1541,6 +1541,8 @@ IndexBuildHeapScan(Relation heapRelation, IndexBuildCallback callback, void *callback_state) { + bool is_system_catalog; + bool checking_uniqueness; HeapScanDesc scan; HeapTuple heapTuple; Datum values[INDEX_MAX_KEYS]; @@ -1560,6 +1562,13 @@ IndexBuildHeapScan(Relation heapRelation, */ Assert(OidIsValid(indexRelation->rd_rel->relam)); + /* Remember if it's a system catalog */ + is_system_catalog = IsSystemRelation(heapRelation); + + /* See whether we're verifying uniqueness/exclusion properties */ + checking_uniqueness = (indexInfo->ii_Unique || + indexInfo->ii_ExclusionOps != NULL); + /* * Need an EState for evaluation of index expressions and partial-index * predicates. Also a slot to hold the current tuple. @@ -1652,6 +1661,7 @@ IndexBuildHeapScan(Relation heapRelation, { /* do our own time qual check */ bool indexIt; + TransactionId xwait; recheck: @@ -1710,29 +1720,31 @@ IndexBuildHeapScan(Relation heapRelation, case HEAPTUPLE_INSERT_IN_PROGRESS: /* - * Since caller should hold ShareLock or better, we should - * not see any tuples inserted by open transactions --- - * unless it's our own transaction. (Consider INSERT - * followed by CREATE INDEX within a transaction.) An - * exception occurs when reindexing a system catalog, - * because we often release lock on system catalogs before - * committing. In that case we wait for the inserting - * transaction to finish and check again. (We could do - * that on user tables too, but since the case is not - * expected it seems better to throw an error.) + * Since caller should hold ShareLock or better, normally + * the only way to see this is if it was inserted earlier + * in our own transaction. However, it can happen in + * system catalogs, since we tend to release write lock + * before commit there. Give a warning if neither case + * applies. */ - if (!TransactionIdIsCurrentTransactionId( - HeapTupleHeaderGetXmin(heapTuple->t_data))) + xwait = HeapTupleHeaderGetXmin(heapTuple->t_data); + if (!TransactionIdIsCurrentTransactionId(xwait)) { - if (!IsSystemRelation(heapRelation)) - elog(ERROR, "concurrent insert in progress"); - else + if (!is_system_catalog) + elog(WARNING, "concurrent insert in progress within table \"%s\"", + RelationGetRelationName(heapRelation)); + + /* + * If we are performing uniqueness checks, indexing + * such a tuple could lead to a bogus uniqueness + * failure. In that case we wait for the inserting + * transaction to finish and check again. + */ + if (checking_uniqueness) { /* * Must drop the lock on the buffer before we wait */ - TransactionId xwait = HeapTupleHeaderGetXmin(heapTuple->t_data); - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); XactLockTableWait(xwait); goto recheck; @@ -1749,30 +1761,27 @@ IndexBuildHeapScan(Relation heapRelation, case HEAPTUPLE_DELETE_IN_PROGRESS: /* - * Since caller should hold ShareLock or better, we should - * not see any tuples deleted by open transactions --- - * unless it's our own transaction. (Consider DELETE - * followed by CREATE INDEX within a transaction.) An - * exception occurs when reindexing a system catalog, - * because we often release lock on system catalogs before - * committing. In that case we wait for the deleting - * transaction to finish and check again. (We could do - * that on user tables too, but since the case is not - * expected it seems better to throw an error.) + * Similar situation to INSERT_IN_PROGRESS case. */ Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); - if (!TransactionIdIsCurrentTransactionId( - HeapTupleHeaderGetXmax(heapTuple->t_data))) + xwait = HeapTupleHeaderGetXmax(heapTuple->t_data); + if (!TransactionIdIsCurrentTransactionId(xwait)) { - if (!IsSystemRelation(heapRelation)) - elog(ERROR, "concurrent delete in progress"); - else + if (!is_system_catalog) + elog(WARNING, "concurrent delete in progress within table \"%s\"", + RelationGetRelationName(heapRelation)); + + /* + * If we are performing uniqueness checks, assuming + * the tuple is dead could lead to missing a uniqueness + * violation. In that case we wait for the deleting + * transaction to finish and check again. + */ + if (checking_uniqueness) { /* * Must drop the lock on the buffer before we wait */ - TransactionId xwait = HeapTupleHeaderGetXmax(heapTuple->t_data); - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); XactLockTableWait(xwait); goto recheck; @@ -2402,7 +2411,7 @@ IndexGetRelation(Oid indexId) * reindex_index - This routine is used to recreate a single index */ void -reindex_index(Oid indexId) +reindex_index(Oid indexId, bool skip_constraint_checks) { Relation iRel, heapRelation, @@ -2411,6 +2420,7 @@ reindex_index(Oid indexId) IndexInfo *indexInfo; HeapTuple indexTuple; Form_pg_index indexForm; + volatile bool skipped_constraint = false; /* * Open and lock the parent heap relation. ShareLock is sufficient since @@ -2448,6 +2458,17 @@ reindex_index(Oid indexId) /* Fetch info needed for index_build */ indexInfo = BuildIndexInfo(iRel); + /* If requested, skip checking uniqueness/exclusion constraints */ + if (skip_constraint_checks) + { + if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL) + skipped_constraint = true; + indexInfo->ii_Unique = false; + indexInfo->ii_ExclusionOps = NULL; + indexInfo->ii_ExclusionProcs = NULL; + indexInfo->ii_ExclusionStrats = NULL; + } + /* We'll build a new physical relation for the index */ RelationSetNewRelfilenode(iRel, InvalidTransactionId); @@ -2466,33 +2487,38 @@ reindex_index(Oid indexId) /* * If the index is marked invalid or not ready (ie, it's from a failed - * CREATE INDEX CONCURRENTLY), we can now mark it valid. This allows - * REINDEX to be used to clean up in such cases. + * CREATE INDEX CONCURRENTLY), and we didn't skip a uniqueness check, + * we can now mark it valid. This allows REINDEX to be used to clean up + * in such cases. * * We can also reset indcheckxmin, because we have now done a * non-concurrent index build, *except* in the case where index_build * found some still-broken HOT chains. */ - pg_index = heap_open(IndexRelationId, RowExclusiveLock); + if (!skipped_constraint) + { + pg_index = heap_open(IndexRelationId, RowExclusiveLock); - indexTuple = SearchSysCacheCopy(INDEXRELID, - ObjectIdGetDatum(indexId), - 0, 0, 0); - if (!HeapTupleIsValid(indexTuple)) - elog(ERROR, "cache lookup failed for index %u", indexId); - indexForm = (Form_pg_index) GETSTRUCT(indexTuple); + indexTuple = SearchSysCacheCopy(INDEXRELID, + ObjectIdGetDatum(indexId), + 0, 0, 0); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", indexId); + indexForm = (Form_pg_index) GETSTRUCT(indexTuple); - if (!indexForm->indisvalid || !indexForm->indisready || - (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain)) - { - indexForm->indisvalid = true; - indexForm->indisready = true; - if (!indexInfo->ii_BrokenHotChain) - indexForm->indcheckxmin = false; - simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); - CatalogUpdateIndexes(pg_index, indexTuple); + if (!indexForm->indisvalid || !indexForm->indisready || + (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain)) + { + indexForm->indisvalid = true; + indexForm->indisready = true; + if (!indexInfo->ii_BrokenHotChain) + indexForm->indcheckxmin = false; + simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); + CatalogUpdateIndexes(pg_index, indexTuple); + } + + heap_close(pg_index, RowExclusiveLock); } - heap_close(pg_index, RowExclusiveLock); /* Close rels, but keep locks */ index_close(iRel, NoLock); @@ -2513,6 +2539,11 @@ reindex_index(Oid indexId) * do CCI after having collected the index list. (This way we can still use * catalog indexes while collecting the list.) * + * We also skip rechecking uniqueness/exclusion constraint properties if + * heap_rebuilt is true. This avoids likely deadlock conditions when doing + * VACUUM FULL or CLUSTER on system catalogs. REINDEX should be used to + * rebuild an index if constraint inconsistency is suspected. + * * Returns true if any indexes were rebuilt. Note that a * CommandCounterIncrement will occur after each index rebuild. */ @@ -2594,7 +2625,7 @@ reindex_relation(Oid relid, bool toast_too, bool heap_rebuilt) if (is_pg_class) RelationSetIndexList(rel, doneIndexes, InvalidOid); - reindex_index(indexOid); + reindex_index(indexOid, heap_rebuilt); CommandCounterIncrement(); diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index da605bffac..ef8e867736 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.198 2010/02/07 20:48:10 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.199 2010/02/07 22:40:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -948,7 +948,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, */ if (!is_system_catalog && !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data))) - elog(WARNING, "concurrent insert in progress within table \"%s\"", RelationGetRelationName(OldHeap)); /* treat as live */ diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 7e6be57ee8..b79723a777 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.191 2010/02/07 20:48:10 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.192 2010/02/07 22:40:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1588,7 +1588,7 @@ ReindexIndex(RangeVar *indexRelation) ReleaseSysCache(tuple); - reindex_index(indOid); + reindex_index(indOid, false); } /* diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 2bacf827c9..6b5f3c43c8 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/index.h,v 1.82 2010/02/07 20:48:11 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/index.h,v 1.83 2010/02/07 22:40:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -70,7 +70,7 @@ extern double IndexBuildHeapScan(Relation heapRelation, extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot); -extern void reindex_index(Oid indexId); +extern void reindex_index(Oid indexId, bool skip_constraint_checks); extern bool reindex_relation(Oid relid, bool toast_too, bool heap_rebuilt); extern bool ReindexIsProcessingHeap(Oid heapOid); diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index eb53eff4b4..43794122f5 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -1,5 +1,5 @@ # ---------- -# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.59 2010/02/07 20:48:13 tgl Exp $ +# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.60 2010/02/07 22:40:33 tgl Exp $ # # By convention, we put no more than twenty tests in any one parallel group; # this limits the number of connections needed to run the tests. @@ -52,10 +52,7 @@ test: copy copyselect # ---------- # Another group of parallel tests # ---------- -test: constraints triggers create_misc create_aggregate create_operator inherit typed_table drop_if_exists create_cast - -# XXX temporarily run this by itself -test: vacuum +test: constraints triggers create_misc create_aggregate create_operator inherit typed_table vacuum drop_if_exists create_cast # Depends on the above test: create_index create_view -- 2.11.0