1 /*-------------------------------------------------------------------------
4 * CLUSTER a table on an index. This is now also used for VACUUM FULL.
6 * There is hardly anything left of Paul Brown's original implementation...
9 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994-5, Regents of the University of California
14 * src/backend/commands/cluster.c
16 *-------------------------------------------------------------------------
20 #include "access/genam.h"
21 #include "access/heapam.h"
22 #include "access/relscan.h"
23 #include "access/rewriteheap.h"
24 #include "access/transam.h"
25 #include "access/xact.h"
26 #include "catalog/catalog.h"
27 #include "catalog/dependency.h"
28 #include "catalog/heap.h"
29 #include "catalog/index.h"
30 #include "catalog/indexing.h"
31 #include "catalog/namespace.h"
32 #include "catalog/pg_namespace.h"
33 #include "catalog/toasting.h"
34 #include "commands/cluster.h"
35 #include "commands/tablecmds.h"
36 #include "commands/trigger.h"
37 #include "commands/vacuum.h"
38 #include "miscadmin.h"
39 #include "optimizer/planner.h"
40 #include "storage/bufmgr.h"
41 #include "storage/procarray.h"
42 #include "storage/smgr.h"
43 #include "utils/acl.h"
44 #include "utils/fmgroids.h"
45 #include "utils/inval.h"
46 #include "utils/lsyscache.h"
47 #include "utils/memutils.h"
48 #include "utils/pg_rusage.h"
49 #include "utils/relcache.h"
50 #include "utils/relmapper.h"
51 #include "utils/snapmgr.h"
52 #include "utils/syscache.h"
53 #include "utils/tqual.h"
54 #include "utils/tuplesort.h"
58 * This struct is used to pass around the information on tables to be
59 * clustered. We need this so we can make a list of them when invoked without
60 * a specific table/index pair.
69 static void rebuild_relation(Relation OldHeap, Oid indexOid,
70 int freeze_min_age, int freeze_table_age, bool verbose);
71 static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
72 int freeze_min_age, int freeze_table_age, bool verbose,
73 bool *pSwapToastByContent, TransactionId *pFreezeXid);
74 static List *get_tables_to_cluster(MemoryContext cluster_context);
75 static void reform_and_rewrite_tuple(HeapTuple tuple,
76 TupleDesc oldTupDesc, TupleDesc newTupDesc,
77 Datum *values, bool *isnull,
78 bool newRelHasOids, RewriteState rwstate);
81 /*---------------------------------------------------------------------------
82 * This cluster code allows for clustering multiple tables at once. Because
83 * of this, we cannot just run everything on a single transaction, or we
84 * would be forced to acquire exclusive locks on all the tables being
85 * clustered, simultaneously --- very likely leading to deadlock.
87 * To solve this we follow a similar strategy to VACUUM code,
88 * clustering each relation in a separate transaction. For this to work,
90 * - provide a separate memory context so that we can pass information in
91 * a way that survives across transactions
92 * - start a new transaction every time a new relation is clustered
93 * - check for validity of the information on to-be-clustered relations,
94 * as someone might have deleted a relation behind our back, or
95 * clustered one on a different index
96 * - end the transaction
98 * The single-relation case does not have any such overhead.
100 * We also allow a relation to be specified without index. In that case,
101 * the indisclustered bit will be looked up, and an ERROR will be thrown
102 * if there is no index with the bit set.
103 *---------------------------------------------------------------------------
106 cluster(ClusterStmt *stmt, bool isTopLevel)
108 if (stmt->relation != NULL)
110 /* This is the single-relation case. */
112 indexOid = InvalidOid;
115 /* Find and lock the table */
116 rel = heap_openrv(stmt->relation, AccessExclusiveLock);
118 tableOid = RelationGetRelid(rel);
120 /* Check permissions */
121 if (!pg_class_ownercheck(tableOid, GetUserId()))
122 aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
123 RelationGetRelationName(rel));
126 * Reject clustering a remote temp table ... their local buffer
127 * manager is not going to cope.
129 if (RELATION_IS_OTHER_TEMP(rel))
131 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
132 errmsg("cannot cluster temporary tables of other sessions")));
134 if (stmt->indexname == NULL)
138 /* We need to find the index that has indisclustered set. */
139 foreach(index, RelationGetIndexList(rel))
142 Form_pg_index indexForm;
144 indexOid = lfirst_oid(index);
145 idxtuple = SearchSysCache1(INDEXRELID,
146 ObjectIdGetDatum(indexOid));
147 if (!HeapTupleIsValid(idxtuple))
148 elog(ERROR, "cache lookup failed for index %u", indexOid);
149 indexForm = (Form_pg_index) GETSTRUCT(idxtuple);
150 if (indexForm->indisclustered)
152 ReleaseSysCache(idxtuple);
155 ReleaseSysCache(idxtuple);
156 indexOid = InvalidOid;
159 if (!OidIsValid(indexOid))
161 (errcode(ERRCODE_UNDEFINED_OBJECT),
162 errmsg("there is no previously clustered index for table \"%s\"",
163 stmt->relation->relname)));
168 * The index is expected to be in the same namespace as the
171 indexOid = get_relname_relid(stmt->indexname,
172 rel->rd_rel->relnamespace);
173 if (!OidIsValid(indexOid))
175 (errcode(ERRCODE_UNDEFINED_OBJECT),
176 errmsg("index \"%s\" for table \"%s\" does not exist",
177 stmt->indexname, stmt->relation->relname)));
180 /* close relation, keep lock till commit */
181 heap_close(rel, NoLock);
184 cluster_rel(tableOid, indexOid, false, stmt->verbose, -1, -1);
189 * This is the "multi relation" case. We need to cluster all tables
190 * that have some index with indisclustered set.
192 MemoryContext cluster_context;
197 * We cannot run this form of CLUSTER inside a user transaction block;
198 * we'd be holding locks way too long.
200 PreventTransactionChain(isTopLevel, "CLUSTER");
203 * Create special memory context for cross-transaction storage.
205 * Since it is a child of PortalContext, it will go away even in case
208 cluster_context = AllocSetContextCreate(PortalContext,
210 ALLOCSET_DEFAULT_MINSIZE,
211 ALLOCSET_DEFAULT_INITSIZE,
212 ALLOCSET_DEFAULT_MAXSIZE);
215 * Build the list of relations to cluster. Note that this lives in
218 rvs = get_tables_to_cluster(cluster_context);
220 /* Commit to get out of starting transaction */
222 CommitTransactionCommand();
224 /* Ok, now that we've got them all, cluster them one by one */
227 RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
229 /* Start a new transaction for each relation. */
230 StartTransactionCommand();
231 /* functions in indexes may want a snapshot set */
232 PushActiveSnapshot(GetTransactionSnapshot());
233 cluster_rel(rvtc->tableOid, rvtc->indexOid, true, stmt->verbose,
236 CommitTransactionCommand();
239 /* Start a new transaction for the cleanup work. */
240 StartTransactionCommand();
242 /* Clean up working storage */
243 MemoryContextDelete(cluster_context);
250 * This clusters the table by creating a new, clustered table and
251 * swapping the relfilenodes of the new table and the old table, so
252 * the OID of the original table is preserved. Thus we do not lose
253 * GRANT, inheritance nor references to this table (this was a bug
254 * in releases thru 7.3).
256 * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
257 * the new table, it's better to create the indexes afterwards than to fill
258 * them incrementally while we load the table.
260 * If indexOid is InvalidOid, the table will be rewritten in physical order
261 * instead of index order. This is the new implementation of VACUUM FULL,
262 * and error messages should refer to the operation as VACUUM not CLUSTER.
265 cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose,
266 int freeze_min_age, int freeze_table_age)
270 /* Check for user-requested abort. */
271 CHECK_FOR_INTERRUPTS();
274 * We grab exclusive access to the target rel and index for the duration
275 * of the transaction. (This is redundant for the single-transaction
276 * case, since cluster() already did it.) The index lock is taken inside
277 * check_index_is_clusterable.
279 OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
281 /* If the table has gone away, we can skip processing it */
286 * Since we may open a new transaction for each relation, we have to check
287 * that the relation still is what we think it is.
289 * If this is a single-transaction CLUSTER, we can skip these tests. We
290 * *must* skip the one on indisclustered since it would reject an attempt
291 * to cluster a not-previously-clustered index.
296 Form_pg_index indexForm;
298 /* Check that the user still owns the relation */
299 if (!pg_class_ownercheck(tableOid, GetUserId()))
301 relation_close(OldHeap, AccessExclusiveLock);
306 * Silently skip a temp table for a remote session. Only doing this
307 * check in the "recheck" case is appropriate (which currently means
308 * somebody is executing a database-wide CLUSTER), because there is
309 * another check in cluster() which will stop any attempt to cluster
310 * remote temp tables by name. There is another check in cluster_rel
311 * which is redundant, but we leave it for extra safety.
313 if (RELATION_IS_OTHER_TEMP(OldHeap))
315 relation_close(OldHeap, AccessExclusiveLock);
319 if (OidIsValid(indexOid))
322 * Check that the index still exists
324 if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
326 relation_close(OldHeap, AccessExclusiveLock);
331 * Check that the index is still the one with indisclustered set.
333 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid));
334 if (!HeapTupleIsValid(tuple)) /* probably can't happen */
336 relation_close(OldHeap, AccessExclusiveLock);
339 indexForm = (Form_pg_index) GETSTRUCT(tuple);
340 if (!indexForm->indisclustered)
342 ReleaseSysCache(tuple);
343 relation_close(OldHeap, AccessExclusiveLock);
346 ReleaseSysCache(tuple);
351 * We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER
352 * would work in most respects, but the index would only get marked as
353 * indisclustered in the current database, leading to unexpected behavior
354 * if CLUSTER were later invoked in another database.
356 if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
358 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
359 errmsg("cannot cluster a shared catalog")));
362 * Don't process temp tables of other backends ... their local buffer
363 * manager is not going to cope.
365 if (RELATION_IS_OTHER_TEMP(OldHeap))
367 if (OidIsValid(indexOid))
369 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
370 errmsg("cannot cluster temporary tables of other sessions")));
373 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
374 errmsg("cannot vacuum temporary tables of other sessions")));
378 * Also check for active uses of the relation in the current transaction,
379 * including open scans and pending AFTER trigger events.
381 CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
383 /* Check heap and index are valid to cluster on */
384 if (OidIsValid(indexOid))
385 check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock);
387 /* rebuild_relation does all the dirty work */
388 rebuild_relation(OldHeap, indexOid, freeze_min_age, freeze_table_age,
391 /* NB: rebuild_relation does heap_close() on OldHeap */
395 * Verify that the specified heap and index are valid to cluster on
397 * Side effect: obtains exclusive lock on the index. The caller should
398 * already have exclusive lock on the table, so the index lock is likely
399 * redundant, but it seems best to grab it anyway to ensure the index
400 * definition can't change under us.
403 check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMODE lockmode)
407 OldIndex = index_open(indexOid, lockmode);
410 * Check that index is in fact an index on the given relation
412 if (OldIndex->rd_index == NULL ||
413 OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
415 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
416 errmsg("\"%s\" is not an index for table \"%s\"",
417 RelationGetRelationName(OldIndex),
418 RelationGetRelationName(OldHeap))));
420 /* Index AM must allow clustering */
421 if (!OldIndex->rd_am->amclusterable)
423 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
424 errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
425 RelationGetRelationName(OldIndex))));
428 * Disallow clustering on incomplete indexes (those that might not index
429 * every row of the relation). We could relax this by making a separate
430 * seqscan pass over the table to copy the missing rows, but that seems
431 * expensive and tedious.
433 if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred))
435 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
436 errmsg("cannot cluster on partial index \"%s\"",
437 RelationGetRelationName(OldIndex))));
440 * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
441 * it might well not contain entries for every heap row, or might not even
442 * be internally consistent. (But note that we don't check indcheckxmin;
443 * the worst consequence of following broken HOT chains would be that we
444 * might put recently-dead tuples out-of-order in the new table, and there
445 * is little harm in that.)
447 if (!OldIndex->rd_index->indisvalid)
449 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
450 errmsg("cannot cluster on invalid index \"%s\"",
451 RelationGetRelationName(OldIndex))));
453 /* Drop relcache refcnt on OldIndex, but keep lock */
454 index_close(OldIndex, NoLock);
458 * mark_index_clustered: mark the specified index as the one clustered on
460 * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
463 mark_index_clustered(Relation rel, Oid indexOid)
465 HeapTuple indexTuple;
466 Form_pg_index indexForm;
471 * If the index is already marked clustered, no need to do anything.
473 if (OidIsValid(indexOid))
475 indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid));
476 if (!HeapTupleIsValid(indexTuple))
477 elog(ERROR, "cache lookup failed for index %u", indexOid);
478 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
480 if (indexForm->indisclustered)
482 ReleaseSysCache(indexTuple);
486 ReleaseSysCache(indexTuple);
490 * Check each index of the relation and set/clear the bit as needed.
492 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
494 foreach(index, RelationGetIndexList(rel))
496 Oid thisIndexOid = lfirst_oid(index);
498 indexTuple = SearchSysCacheCopy1(INDEXRELID,
499 ObjectIdGetDatum(thisIndexOid));
500 if (!HeapTupleIsValid(indexTuple))
501 elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
502 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
505 * Unset the bit if set. We know it's wrong because we checked this
508 if (indexForm->indisclustered)
510 indexForm->indisclustered = false;
511 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
512 CatalogUpdateIndexes(pg_index, indexTuple);
514 else if (thisIndexOid == indexOid)
516 indexForm->indisclustered = true;
517 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
518 CatalogUpdateIndexes(pg_index, indexTuple);
520 heap_freetuple(indexTuple);
523 heap_close(pg_index, RowExclusiveLock);
527 * rebuild_relation: rebuild an existing relation in index or physical order
529 * OldHeap: table to rebuild --- must be opened and exclusive-locked!
530 * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
532 * NB: this routine closes OldHeap at the right time; caller should not.
535 rebuild_relation(Relation OldHeap, Oid indexOid,
536 int freeze_min_age, int freeze_table_age, bool verbose)
538 Oid tableOid = RelationGetRelid(OldHeap);
539 Oid tableSpace = OldHeap->rd_rel->reltablespace;
541 bool is_system_catalog;
542 bool swap_toast_by_content;
543 TransactionId frozenXid;
545 /* Mark the correct index as clustered */
546 if (OidIsValid(indexOid))
547 mark_index_clustered(OldHeap, indexOid);
549 /* Remember if it's a system catalog */
550 is_system_catalog = IsSystemRelation(OldHeap);
552 /* Close relcache entry, but keep lock until transaction commit */
553 heap_close(OldHeap, NoLock);
555 /* Create the transient table that will receive the re-ordered data */
556 OIDNewHeap = make_new_heap(tableOid, tableSpace);
558 /* Copy the heap data into the new table in the desired order */
559 copy_heap_data(OIDNewHeap, tableOid, indexOid,
560 freeze_min_age, freeze_table_age, verbose,
561 &swap_toast_by_content, &frozenXid);
564 * Swap the physical files of the target and transient tables, then
565 * rebuild the target's indexes and throw away the transient table.
567 finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
568 swap_toast_by_content, false, frozenXid);
573 * Create the transient table that will be filled with new data during
574 * CLUSTER, ALTER TABLE, and similar operations. The transient table
575 * duplicates the logical structure of the OldHeap, but is placed in
576 * NewTableSpace which might be different from OldHeap's.
578 * After this, the caller should load the new heap with transferred/modified
579 * data, then call finish_heap_swap to complete the operation.
582 make_new_heap(Oid OIDOldHeap, Oid NewTableSpace)
584 TupleDesc OldHeapDesc,
586 char NewHeapName[NAMEDATALEN];
594 OldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
595 OldHeapDesc = RelationGetDescr(OldHeap);
598 * Need to make a copy of the tuple descriptor, since
599 * heap_create_with_catalog modifies it. Note that the NewHeap will not
600 * receive any of the defaults or constraints associated with the OldHeap;
601 * we don't need 'em, and there's no reason to spend cycles inserting them
602 * into the catalogs only to delete them.
604 tupdesc = CreateTupleDescCopy(OldHeapDesc);
607 * But we do want to use reloptions of the old heap for new heap.
609 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
610 if (!HeapTupleIsValid(tuple))
611 elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
612 reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
615 reloptions = (Datum) 0;
618 * Create the new heap, using a temporary name in the same namespace as
619 * the existing table. NOTE: there is some risk of collision with user
620 * relnames. Working around this seems more trouble than it's worth; in
621 * particular, we can't create the new heap in a different namespace from
622 * the old, or we will have problems with the TEMP status of temp tables.
624 * Note: the new heap is not a shared relation, even if we are rebuilding
625 * a shared rel. However, we do make the new heap mapped if the source is
626 * mapped. This simplifies swap_relation_files, and is absolutely
627 * necessary for rebuilding pg_class, for reasons explained there.
629 snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
631 OIDNewHeap = heap_create_with_catalog(NewHeapName,
632 RelationGetNamespace(OldHeap),
637 OldHeap->rd_rel->relowner,
640 OldHeap->rd_rel->relkind,
641 OldHeap->rd_rel->relpersistence,
643 RelationIsMapped(OldHeap),
650 Assert(OIDNewHeap != InvalidOid);
652 ReleaseSysCache(tuple);
655 * Advance command counter so that the newly-created relation's catalog
656 * tuples will be visible to heap_open.
658 CommandCounterIncrement();
661 * If necessary, create a TOAST table for the new relation.
663 * If the relation doesn't have a TOAST table already, we can't need one
664 * for the new relation. The other way around is possible though: if some
665 * wide columns have been dropped, AlterTableCreateToastTable can decide
666 * that no TOAST table is needed for the new table.
668 * Note that AlterTableCreateToastTable ends with CommandCounterIncrement,
669 * so that the TOAST table will be visible for insertion.
671 toastid = OldHeap->rd_rel->reltoastrelid;
672 if (OidIsValid(toastid))
674 /* keep the existing toast table's reloptions, if any */
675 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
676 if (!HeapTupleIsValid(tuple))
677 elog(ERROR, "cache lookup failed for relation %u", toastid);
678 reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
681 reloptions = (Datum) 0;
683 AlterTableCreateToastTable(OIDNewHeap, reloptions);
685 ReleaseSysCache(tuple);
688 heap_close(OldHeap, NoLock);
694 * Do the physical copying of heap data.
696 * There are two output parameters:
697 * *pSwapToastByContent is set true if toast tables must be swapped by content.
698 * *pFreezeXid receives the TransactionId used as freeze cutoff point.
701 copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
702 int freeze_min_age, int freeze_table_age, bool verbose,
703 bool *pSwapToastByContent, TransactionId *pFreezeXid)
708 TupleDesc oldTupDesc;
709 TupleDesc newTupDesc;
713 IndexScanDesc indexScan;
714 HeapScanDesc heapScan;
716 bool is_system_catalog;
717 TransactionId OldestXmin;
718 TransactionId FreezeXid;
719 RewriteState rwstate;
721 Tuplesortstate *tuplesort;
722 double num_tuples = 0,
724 tups_recently_dead = 0;
725 int elevel = verbose ? INFO : DEBUG2;
728 pg_rusage_init(&ru0);
731 * Open the relations we need.
733 NewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
734 OldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
735 if (OidIsValid(OIDOldIndex))
736 OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
741 * Their tuple descriptors should be exactly alike, but here we only need
742 * assume that they have the same number of columns.
744 oldTupDesc = RelationGetDescr(OldHeap);
745 newTupDesc = RelationGetDescr(NewHeap);
746 Assert(newTupDesc->natts == oldTupDesc->natts);
748 /* Preallocate values/isnull arrays */
749 natts = newTupDesc->natts;
750 values = (Datum *) palloc(natts * sizeof(Datum));
751 isnull = (bool *) palloc(natts * sizeof(bool));
754 * We need to log the copied data in WAL iff WAL archiving/streaming is
755 * enabled AND it's not a WAL-logged rel.
757 use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
759 /* use_wal off requires smgr_targblock be initially invalid */
760 Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
763 * If both tables have TOAST tables, perform toast swap by content. It is
764 * possible that the old table has a toast table but the new one doesn't,
765 * if toastable columns have been dropped. In that case we have to do
766 * swap by links. This is okay because swap by content is only essential
767 * for system catalogs, and we don't support schema changes for them.
769 if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
771 *pSwapToastByContent = true;
774 * When doing swap by content, any toast pointers written into NewHeap
775 * must use the old toast table's OID, because that's where the toast
776 * data will eventually be found. Set this up by setting rd_toastoid.
777 * Note that we must hold NewHeap open until we are done writing data,
778 * since the relcache will not guarantee to remember this setting once
779 * the relation is closed. Also, this technique depends on the fact
780 * that no one will try to read from the NewHeap until after we've
781 * finished writing it and swapping the rels --- otherwise they could
782 * follow the toast pointers to the wrong place.
784 NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
787 *pSwapToastByContent = false;
790 * compute xids used to freeze and weed out dead tuples. We use -1
791 * freeze_min_age to avoid having CLUSTER freeze tuples earlier than a
792 * plain VACUUM would.
794 vacuum_set_xid_limits(freeze_min_age, freeze_table_age,
795 OldHeap->rd_rel->relisshared,
796 &OldestXmin, &FreezeXid, NULL);
799 * FreezeXid will become the table's new relfrozenxid, and that mustn't go
800 * backwards, so take the max.
802 if (TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid))
803 FreezeXid = OldHeap->rd_rel->relfrozenxid;
805 /* return selected value to caller */
806 *pFreezeXid = FreezeXid;
808 /* Remember if it's a system catalog */
809 is_system_catalog = IsSystemRelation(OldHeap);
811 /* Initialize the rewrite operation */
812 rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid, use_wal);
815 * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
816 * the OldHeap. We know how to use a sort to duplicate the ordering of a
817 * btree index, and will use seqscan-and-sort for that case if the planner
818 * tells us it's cheaper. Otherwise, always indexscan if an index is
819 * provided, else plain seqscan.
821 if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
822 use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex);
826 /* Set up sorting if wanted */
828 tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
829 maintenance_work_mem, false);
834 * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
835 * that still need to be copied, we scan with SnapshotAny and use
836 * HeapTupleSatisfiesVacuum for the visibility test.
838 if (OldIndex != NULL && !use_sort)
841 indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
842 index_rescan(indexScan, NULL, 0, NULL, 0);
846 heapScan = heap_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
850 /* Log what we're doing */
851 if (indexScan != NULL)
853 (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
854 get_namespace_name(RelationGetNamespace(OldHeap)),
855 RelationGetRelationName(OldHeap),
856 RelationGetRelationName(OldIndex))));
857 else if (tuplesort != NULL)
859 (errmsg("clustering \"%s.%s\" using sequential scan and sort",
860 get_namespace_name(RelationGetNamespace(OldHeap)),
861 RelationGetRelationName(OldHeap))));
864 (errmsg("vacuuming \"%s.%s\"",
865 get_namespace_name(RelationGetNamespace(OldHeap)),
866 RelationGetRelationName(OldHeap))));
869 * Scan through the OldHeap, either in OldIndex order or sequentially;
870 * copy each tuple into the NewHeap, or transiently to the tuplesort
871 * module. Note that we don't bother sorting dead tuples (they won't get
872 * to the new table anyway).
880 CHECK_FOR_INTERRUPTS();
882 if (indexScan != NULL)
884 tuple = index_getnext(indexScan, ForwardScanDirection);
888 /* Since we used no scan keys, should never need to recheck */
889 if (indexScan->xs_recheck)
890 elog(ERROR, "CLUSTER does not support lossy index conditions");
892 buf = indexScan->xs_cbuf;
896 tuple = heap_getnext(heapScan, ForwardScanDirection);
900 buf = heapScan->rs_cbuf;
903 LockBuffer(buf, BUFFER_LOCK_SHARE);
905 switch (HeapTupleSatisfiesVacuum(tuple->t_data, OldestXmin, buf))
908 /* Definitely dead */
911 case HEAPTUPLE_RECENTLY_DEAD:
912 tups_recently_dead += 1;
915 /* Live or recently dead, must copy it */
918 case HEAPTUPLE_INSERT_IN_PROGRESS:
921 * Since we hold exclusive lock on the relation, normally the
922 * only way to see this is if it was inserted earlier in our
923 * own transaction. However, it can happen in system
924 * catalogs, since we tend to release write lock before commit
925 * there. Give a warning if neither case applies; but in any
926 * case we had better copy it.
928 if (!is_system_catalog &&
929 !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
930 elog(WARNING, "concurrent insert in progress within table \"%s\"",
931 RelationGetRelationName(OldHeap));
935 case HEAPTUPLE_DELETE_IN_PROGRESS:
938 * Similar situation to INSERT_IN_PROGRESS case.
940 Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
941 if (!is_system_catalog &&
942 !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple->t_data)))
943 elog(WARNING, "concurrent delete in progress within table \"%s\"",
944 RelationGetRelationName(OldHeap));
945 /* treat as recently dead */
946 tups_recently_dead += 1;
950 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
951 isdead = false; /* keep compiler quiet */
955 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
960 /* heap rewrite module still needs to see it... */
961 if (rewrite_heap_dead_tuple(rwstate, tuple))
963 /* A previous recently-dead tuple is now known dead */
965 tups_recently_dead -= 1;
971 if (tuplesort != NULL)
972 tuplesort_putheaptuple(tuplesort, tuple);
974 reform_and_rewrite_tuple(tuple,
975 oldTupDesc, newTupDesc,
977 NewHeap->rd_rel->relhasoids, rwstate);
980 if (indexScan != NULL)
981 index_endscan(indexScan);
982 if (heapScan != NULL)
983 heap_endscan(heapScan);
986 * In scan-and-sort mode, complete the sort, then read out all live tuples
987 * from the tuplestore and write them to the new relation.
989 if (tuplesort != NULL)
991 tuplesort_performsort(tuplesort);
998 CHECK_FOR_INTERRUPTS();
1000 tuple = tuplesort_getheaptuple(tuplesort, true, &shouldfree);
1004 reform_and_rewrite_tuple(tuple,
1005 oldTupDesc, newTupDesc,
1007 NewHeap->rd_rel->relhasoids, rwstate);
1010 heap_freetuple(tuple);
1013 tuplesort_end(tuplesort);
1016 /* Write out any remaining tuples, and fsync if needed */
1017 end_heap_rewrite(rwstate);
1019 /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
1020 NewHeap->rd_toastoid = InvalidOid;
1022 /* Log what we did */
1024 (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
1025 RelationGetRelationName(OldHeap),
1026 tups_vacuumed, num_tuples,
1027 RelationGetNumberOfBlocks(OldHeap)),
1028 errdetail("%.0f dead row versions cannot be removed yet.\n"
1031 pg_rusage_show(&ru0))));
1037 if (OldIndex != NULL)
1038 index_close(OldIndex, NoLock);
1039 heap_close(OldHeap, NoLock);
1040 heap_close(NewHeap, NoLock);
1044 * Swap the physical files of two given relations.
1046 * We swap the physical identity (reltablespace and relfilenode) while
1047 * keeping the same logical identities of the two relations.
1049 * We can swap associated TOAST data in either of two ways: recursively swap
1050 * the physical content of the toast tables (and their indexes), or swap the
1051 * TOAST links in the given relations' pg_class entries. The former is needed
1052 * to manage rewrites of shared catalogs (where we cannot change the pg_class
1053 * links) while the latter is the only way to handle cases in which a toast
1054 * table is added or removed altogether.
1056 * Additionally, the first relation is marked with relfrozenxid set to
1057 * frozenXid. It seems a bit ugly to have this here, but the caller would
1058 * have to do it anyway, so having it here saves a heap_update. Note: in
1059 * the swap-toast-links case, we assume we don't need to change the toast
1060 * table's relfrozenxid: the new version of the toast table should already
1061 * have relfrozenxid set to RecentXmin, which is good enough.
1063 * Lastly, if r2 and its toast table and toast index (if any) are mapped,
1064 * their OIDs are emitted into mapped_tables[]. This is hacky but beats
1065 * having to look the information up again later in finish_heap_swap.
1068 swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
1069 bool swap_toast_by_content,
1070 TransactionId frozenXid,
1073 Relation relRelation;
1076 Form_pg_class relform1,
1081 CatalogIndexState indstate;
1083 /* We need writable copies of both pg_class tuples. */
1084 relRelation = heap_open(RelationRelationId, RowExclusiveLock);
1086 reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
1087 if (!HeapTupleIsValid(reltup1))
1088 elog(ERROR, "cache lookup failed for relation %u", r1);
1089 relform1 = (Form_pg_class) GETSTRUCT(reltup1);
1091 reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
1092 if (!HeapTupleIsValid(reltup2))
1093 elog(ERROR, "cache lookup failed for relation %u", r2);
1094 relform2 = (Form_pg_class) GETSTRUCT(reltup2);
1096 relfilenode1 = relform1->relfilenode;
1097 relfilenode2 = relform2->relfilenode;
1099 if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2))
1101 /* Normal non-mapped relations: swap relfilenodes and reltablespaces */
1102 Assert(!target_is_pg_class);
1104 swaptemp = relform1->relfilenode;
1105 relform1->relfilenode = relform2->relfilenode;
1106 relform2->relfilenode = swaptemp;
1108 swaptemp = relform1->reltablespace;
1109 relform1->reltablespace = relform2->reltablespace;
1110 relform2->reltablespace = swaptemp;
1112 /* Also swap toast links, if we're swapping by links */
1113 if (!swap_toast_by_content)
1115 swaptemp = relform1->reltoastrelid;
1116 relform1->reltoastrelid = relform2->reltoastrelid;
1117 relform2->reltoastrelid = swaptemp;
1119 /* we should NOT swap reltoastidxid */
1125 * Mapped-relation case. Here we have to swap the relation mappings
1126 * instead of modifying the pg_class columns. Both must be mapped.
1128 if (OidIsValid(relfilenode1) || OidIsValid(relfilenode2))
1129 elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
1130 NameStr(relform1->relname));
1133 * We can't change the tablespace of a mapped rel, and we can't handle
1134 * toast link swapping for one either, because we must not apply any
1135 * critical changes to its pg_class row. These cases should be
1136 * prevented by upstream permissions tests, so this check is a
1137 * non-user-facing emergency backstop.
1139 if (relform1->reltablespace != relform2->reltablespace)
1140 elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
1141 NameStr(relform1->relname));
1142 if (!swap_toast_by_content &&
1143 (relform1->reltoastrelid || relform2->reltoastrelid))
1144 elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
1145 NameStr(relform1->relname));
1148 * Fetch the mappings --- shouldn't fail, but be paranoid
1150 relfilenode1 = RelationMapOidToFilenode(r1, relform1->relisshared);
1151 if (!OidIsValid(relfilenode1))
1152 elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1153 NameStr(relform1->relname), r1);
1154 relfilenode2 = RelationMapOidToFilenode(r2, relform2->relisshared);
1155 if (!OidIsValid(relfilenode2))
1156 elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1157 NameStr(relform2->relname), r2);
1160 * Send replacement mappings to relmapper. Note these won't actually
1161 * take effect until CommandCounterIncrement.
1163 RelationMapUpdateMap(r1, relfilenode2, relform1->relisshared, false);
1164 RelationMapUpdateMap(r2, relfilenode1, relform2->relisshared, false);
1166 /* Pass OIDs of mapped r2 tables back to caller */
1167 *mapped_tables++ = r2;
1171 * In the case of a shared catalog, these next few steps will only affect
1172 * our own database's pg_class row; but that's okay, because they are all
1173 * noncritical updates. That's also an important fact for the case of a
1174 * mapped catalog, because it's possible that we'll commit the map change
1175 * and then fail to commit the pg_class update.
1178 /* set rel1's frozen Xid */
1179 if (relform1->relkind != RELKIND_INDEX)
1181 Assert(TransactionIdIsNormal(frozenXid));
1182 relform1->relfrozenxid = frozenXid;
1185 /* swap size statistics too, since new rel has freshly-updated stats */
1190 swap_pages = relform1->relpages;
1191 relform1->relpages = relform2->relpages;
1192 relform2->relpages = swap_pages;
1194 swap_tuples = relform1->reltuples;
1195 relform1->reltuples = relform2->reltuples;
1196 relform2->reltuples = swap_tuples;
1200 * Update the tuples in pg_class --- unless the target relation of the
1201 * swap is pg_class itself. In that case, there is zero point in making
1202 * changes because we'd be updating the old data that we're about to throw
1203 * away. Because the real work being done here for a mapped relation is
1204 * just to change the relation map settings, it's all right to not update
1205 * the pg_class rows in this case.
1207 if (!target_is_pg_class)
1209 simple_heap_update(relRelation, &reltup1->t_self, reltup1);
1210 simple_heap_update(relRelation, &reltup2->t_self, reltup2);
1212 /* Keep system catalogs current */
1213 indstate = CatalogOpenIndexes(relRelation);
1214 CatalogIndexInsert(indstate, reltup1);
1215 CatalogIndexInsert(indstate, reltup2);
1216 CatalogCloseIndexes(indstate);
1220 /* no update ... but we do still need relcache inval */
1221 CacheInvalidateRelcacheByTuple(reltup1);
1222 CacheInvalidateRelcacheByTuple(reltup2);
1226 * If we have toast tables associated with the relations being swapped,
1227 * deal with them too.
1229 if (relform1->reltoastrelid || relform2->reltoastrelid)
1231 if (swap_toast_by_content)
1233 if (relform1->reltoastrelid && relform2->reltoastrelid)
1235 /* Recursively swap the contents of the toast tables */
1236 swap_relation_files(relform1->reltoastrelid,
1237 relform2->reltoastrelid,
1239 swap_toast_by_content,
1245 /* caller messed up */
1246 elog(ERROR, "cannot swap toast files by content when there's only one");
1252 * We swapped the ownership links, so we need to change dependency
1255 * NOTE: it is possible that only one table has a toast table.
1257 * NOTE: at present, a TOAST table's only dependency is the one on
1258 * its owning table. If more are ever created, we'd need to use
1259 * something more selective than deleteDependencyRecordsFor() to
1260 * get rid of just the link we want.
1262 ObjectAddress baseobject,
1267 * We disallow this case for system catalogs, to avoid the
1268 * possibility that the catalog we're rebuilding is one of the
1269 * ones the dependency changes would change. It's too late to be
1270 * making any data changes to the target catalog.
1272 if (IsSystemClass(relform1))
1273 elog(ERROR, "cannot swap toast files by links for system catalogs");
1275 /* Delete old dependencies */
1276 if (relform1->reltoastrelid)
1278 count = deleteDependencyRecordsFor(RelationRelationId,
1279 relform1->reltoastrelid,
1282 elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1285 if (relform2->reltoastrelid)
1287 count = deleteDependencyRecordsFor(RelationRelationId,
1288 relform2->reltoastrelid,
1291 elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1295 /* Register new dependencies */
1296 baseobject.classId = RelationRelationId;
1297 baseobject.objectSubId = 0;
1298 toastobject.classId = RelationRelationId;
1299 toastobject.objectSubId = 0;
1301 if (relform1->reltoastrelid)
1303 baseobject.objectId = r1;
1304 toastobject.objectId = relform1->reltoastrelid;
1305 recordDependencyOn(&toastobject, &baseobject,
1306 DEPENDENCY_INTERNAL);
1309 if (relform2->reltoastrelid)
1311 baseobject.objectId = r2;
1312 toastobject.objectId = relform2->reltoastrelid;
1313 recordDependencyOn(&toastobject, &baseobject,
1314 DEPENDENCY_INTERNAL);
1320 * If we're swapping two toast tables by content, do the same for their
1323 if (swap_toast_by_content &&
1324 relform1->reltoastidxid && relform2->reltoastidxid)
1325 swap_relation_files(relform1->reltoastidxid,
1326 relform2->reltoastidxid,
1328 swap_toast_by_content,
1329 InvalidTransactionId,
1333 heap_freetuple(reltup1);
1334 heap_freetuple(reltup2);
1336 heap_close(relRelation, RowExclusiveLock);
1339 * Close both relcache entries' smgr links. We need this kluge because
1340 * both links will be invalidated during upcoming CommandCounterIncrement.
1341 * Whichever of the rels is the second to be cleared will have a dangling
1342 * reference to the other's smgr entry. Rather than trying to avoid this
1343 * by ordering operations just so, it's easiest to close the links first.
1344 * (Fortunately, since one of the entries is local in our transaction,
1345 * it's sufficient to clear out our own relcache this way; the problem
1346 * cannot arise for other backends when they see our update on the
1347 * non-transient relation.)
1349 * Caution: the placement of this step interacts with the decision to
1350 * handle toast rels by recursion. When we are trying to rebuild pg_class
1351 * itself, the smgr close on pg_class must happen after all accesses in
1354 RelationCloseSmgrByOid(r1);
1355 RelationCloseSmgrByOid(r2);
1359 * Remove the transient table that was built by make_new_heap, and finish
1360 * cleaning up (including rebuilding all indexes on the old heap).
1363 finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
1364 bool is_system_catalog,
1365 bool swap_toast_by_content,
1366 bool check_constraints,
1367 TransactionId frozenXid)
1369 ObjectAddress object;
1370 Oid mapped_tables[4];
1374 /* Zero out possible results from swapped_relation_files */
1375 memset(mapped_tables, 0, sizeof(mapped_tables));
1378 * Swap the contents of the heap relations (including any toast tables).
1379 * Also set old heap's relfrozenxid to frozenXid.
1381 swap_relation_files(OIDOldHeap, OIDNewHeap,
1382 (OIDOldHeap == RelationRelationId),
1383 swap_toast_by_content, frozenXid, mapped_tables);
1386 * If it's a system catalog, queue an sinval message to flush all
1387 * catcaches on the catalog when we reach CommandCounterIncrement.
1389 if (is_system_catalog)
1390 CacheInvalidateCatalog(OIDOldHeap);
1393 * Rebuild each index on the relation (but not the toast table, which is
1394 * all-new at this point). It is important to do this before the DROP
1395 * step because if we are processing a system catalog that will be used
1396 * during DROP, we want to have its indexes available. There is no
1397 * advantage to the other order anyway because this is all transactional,
1398 * so no chance to reclaim disk space before commit. We do not need a
1399 * final CommandCounterIncrement() because reindex_relation does it.
1401 * Note: because index_build is called via reindex_relation, it will never
1402 * set indcheckxmin true for the indexes. This is OK even though in some
1403 * sense we are building new indexes rather than rebuilding existing ones,
1404 * because the new heap won't contain any HOT chains at all, let alone
1405 * broken ones, so it can't be necessary to set indcheckxmin.
1407 reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
1408 if (check_constraints)
1409 reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
1410 reindex_relation(OIDOldHeap, reindex_flags);
1412 /* Destroy new heap with old filenode */
1413 object.classId = RelationRelationId;
1414 object.objectId = OIDNewHeap;
1415 object.objectSubId = 0;
1418 * The new relation is local to our transaction and we know nothing
1419 * depends on it, so DROP_RESTRICT should be OK.
1421 performDeletion(&object, DROP_RESTRICT);
1423 /* performDeletion does CommandCounterIncrement at end */
1426 * Now we must remove any relation mapping entries that we set up for the
1427 * transient table, as well as its toast table and toast index if any. If
1428 * we fail to do this before commit, the relmapper will complain about new
1429 * permanent map entries being added post-bootstrap.
1431 for (i = 0; OidIsValid(mapped_tables[i]); i++)
1432 RelationMapRemoveMapping(mapped_tables[i]);
1435 * At this point, everything is kosher except that, if we did toast swap
1436 * by links, the toast table's name corresponds to the transient table.
1437 * The name is irrelevant to the backend because it's referenced by OID,
1438 * but users looking at the catalogs could be confused. Rename it to
1439 * prevent this problem.
1441 * Note no lock required on the relation, because we already hold an
1442 * exclusive lock on it.
1444 if (!swap_toast_by_content)
1448 newrel = heap_open(OIDOldHeap, NoLock);
1449 if (OidIsValid(newrel->rd_rel->reltoastrelid))
1454 char NewToastName[NAMEDATALEN];
1456 toastrel = relation_open(newrel->rd_rel->reltoastrelid,
1458 toastidx = toastrel->rd_rel->reltoastidxid;
1459 toastnamespace = toastrel->rd_rel->relnamespace;
1460 relation_close(toastrel, AccessShareLock);
1462 /* rename the toast table ... */
1463 snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
1465 RenameRelationInternal(newrel->rd_rel->reltoastrelid,
1469 /* ... and its index too */
1470 snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
1472 RenameRelationInternal(toastidx,
1476 relation_close(newrel, NoLock);
1482 * Get a list of tables that the current user owns and
1483 * have indisclustered set. Return the list in a List * of rvsToCluster
1484 * with the tableOid and the indexOid on which the table is already
1488 get_tables_to_cluster(MemoryContext cluster_context)
1490 Relation indRelation;
1493 HeapTuple indexTuple;
1494 Form_pg_index index;
1495 MemoryContext old_context;
1500 * Get all indexes that have indisclustered set and are owned by
1501 * appropriate user. System relations or nailed-in relations cannot ever
1502 * have indisclustered set, because CLUSTER will refuse to set it when
1503 * called with one of them as argument.
1505 indRelation = heap_open(IndexRelationId, AccessShareLock);
1507 Anum_pg_index_indisclustered,
1508 BTEqualStrategyNumber, F_BOOLEQ,
1509 BoolGetDatum(true));
1510 scan = heap_beginscan(indRelation, SnapshotNow, 1, &entry);
1511 while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1513 index = (Form_pg_index) GETSTRUCT(indexTuple);
1515 if (!pg_class_ownercheck(index->indrelid, GetUserId()))
1519 * We have to build the list in a different memory context so it will
1520 * survive the cross-transaction processing
1522 old_context = MemoryContextSwitchTo(cluster_context);
1524 rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
1525 rvtc->tableOid = index->indrelid;
1526 rvtc->indexOid = index->indexrelid;
1527 rvs = lcons(rvtc, rvs);
1529 MemoryContextSwitchTo(old_context);
1533 relation_close(indRelation, AccessShareLock);
1540 * Reconstruct and rewrite the given tuple
1542 * We cannot simply copy the tuple as-is, for several reasons:
1544 * 1. We'd like to squeeze out the values of any dropped columns, both
1545 * to save space and to ensure we have no corner-case failures. (It's
1546 * possible for example that the new table hasn't got a TOAST table
1547 * and so is unable to store any large values of dropped cols.)
1549 * 2. The tuple might not even be legal for the new table; this is
1550 * currently only known to happen as an after-effect of ALTER TABLE
1553 * So, we must reconstruct the tuple from component Datums.
1556 reform_and_rewrite_tuple(HeapTuple tuple,
1557 TupleDesc oldTupDesc, TupleDesc newTupDesc,
1558 Datum *values, bool *isnull,
1559 bool newRelHasOids, RewriteState rwstate)
1561 HeapTuple copiedTuple;
1564 heap_deform_tuple(tuple, oldTupDesc, values, isnull);
1566 /* Be sure to null out any dropped columns */
1567 for (i = 0; i < newTupDesc->natts; i++)
1569 if (newTupDesc->attrs[i]->attisdropped)
1573 copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
1575 /* Preserve OID, if any */
1577 HeapTupleSetOid(copiedTuple, HeapTupleGetOid(tuple));
1579 /* The heap rewrite module does the rest */
1580 rewrite_heap_tuple(rwstate, tuple, copiedTuple);
1582 heap_freetuple(copiedTuple);