/*-------------------------------------------------------------------------
*
* cluster.c
- * Paul Brown's implementation of cluster index.
+ * CLUSTER a table on an index.
+ *
+ * There is hardly anything left of Paul Brown's original implementation...
*
- * I am going to use the rename function as a model for this in the
- * parser and executor, and the vacuum code as an example in this
- * file. As I go - in contrast to the rest of postgres - there will
- * be BUCKETS of comments. This is to allow reviewers to understand
- * my (probably bogus) assumptions about the way this works.
- * [pbrown '94]
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994-5, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/commands/cluster.c,v 1.85 2002/08/10 21:00:34 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/cluster.c,v 1.86 2002/08/11 21:17:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
-
#include "postgres.h"
#include "access/genam.h"
#include "access/heapam.h"
+#include "catalog/catalog.h"
#include "catalog/dependency.h"
#include "catalog/heap.h"
#include "catalog/index.h"
/*
* We need one of these structs for each index in the relation to be
* clustered. It's basically the data needed by index_create() so
- * we can recreate the indexes after destroying the old heap.
+ * we can rebuild the indexes on the new heap.
*/
typedef struct
{
+ Oid indexOID;
char *indexName;
IndexInfo *indexInfo;
Oid accessMethodOID;
Oid *classOID;
- Oid indexOID;
- bool isPrimary;
} IndexAttrs;
-static Oid copy_heap(Oid OIDOldHeap, const char *NewName);
-static void rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex);
-static List *get_indexattr_list (Oid OIDOldHeap);
+static Oid make_new_heap(Oid OIDOldHeap, const char *NewName);
+static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex);
+static List *get_indexattr_list(Relation OldHeap);
static void recreate_indexattr(Oid OIDOldHeap, List *indexes);
static void swap_relfilenodes(Oid r1, Oid r2);
-Relation RelationIdGetRelation(Oid relationId);
/*
* cluster
* swapping the relfilenodes of the new table and the old table, so
* the OID of the original table is preserved. Thus we do not lose
* GRANT, inheritance nor references to this table (this was a bug
- * in releases thru 7.3)
+ * in releases thru 7.3).
*
- * Also create new indexes and swap the filenodes with the old indexes
- * the same way we do for the relation.
+ * Also create new indexes and swap the filenodes with the old indexes the
+ * same way we do for the relation. Since we are effectively bulk-loading
+ * the new table, it's better to create the indexes afterwards than to fill
+ * them incrementally while we load the table.
*
- * TODO:
- * maybe we can get away with AccessShareLock for the table.
- * Concurrency would be much improved. Only acquire
- * AccessExclusiveLock right before swapping the filenodes.
- * This would allow users to CLUSTER on a regular basis,
- * practically eliminating the need for auto-clustered indexes.
+ * Permissions checks were done already.
*/
void
cluster(RangeVar *oldrelation, char *oldindexname)
RelationGetNamespace(OldHeap));
if (!OidIsValid(OIDOldIndex))
elog(ERROR, "CLUSTER: cannot find index \"%s\" for table \"%s\"",
- oldindexname, oldrelation->relname);
+ oldindexname, RelationGetRelationName(OldHeap));
OldIndex = index_open(OIDOldIndex);
LockRelation(OldIndex, AccessExclusiveLock);
/*
* Check that index is in fact an index on the given relation
*/
- if (OldIndex->rd_index->indrelid != OIDOldHeap)
+ if (OldIndex->rd_index == NULL ||
+ OldIndex->rd_index->indrelid != OIDOldHeap)
elog(ERROR, "CLUSTER: \"%s\" is not an index for table \"%s\"",
- oldindexname, oldrelation->relname);
+ RelationGetRelationName(OldIndex),
+ RelationGetRelationName(OldHeap));
- /* Drop relcache refcnts, but do NOT give up the locks */
- heap_close(OldHeap, NoLock);
- index_close(OldIndex);
+ /*
+ * Disallow clustering system relations. This will definitely NOT work
+ * for shared relations (we have no way to update pg_class rows in other
+ * databases), nor for nailed-in-cache relations (the relfilenode values
+ * for those are hardwired, see relcache.c). It might work for other
+ * system relations, but I ain't gonna risk it.
+ */
+ if (IsSystemRelation(OldHeap))
+ elog(ERROR, "CLUSTER: cannot cluster system relation \"%s\"",
+ RelationGetRelationName(OldHeap));
/* Save the information of all indexes on the relation. */
- indexes = get_indexattr_list(OIDOldHeap);
+ indexes = get_indexattr_list(OldHeap);
+
+ /* Drop relcache refcnts, but do NOT give up the locks */
+ index_close(OldIndex);
+ heap_close(OldHeap, NoLock);
/*
- * Create the new heap with a temporary name.
+ * Create the new heap, using a temporary name in the same namespace
+ * as the existing table. NOTE: there is some risk of collision with user
+ * relnames. Working around this seems more trouble than it's worth; in
+ * particular, we can't create the new heap in a different namespace from
+ * the old, or we will have problems with the TEMP status of temp tables.
*/
- snprintf(NewHeapName, NAMEDATALEN, "temp_%u", OIDOldHeap);
+ snprintf(NewHeapName, NAMEDATALEN, "pg_temp_%u", OIDOldHeap);
- OIDNewHeap = copy_heap(OIDOldHeap, NewHeapName);
+ OIDNewHeap = make_new_heap(OIDOldHeap, NewHeapName);
- /* We do not need CommandCounterIncrement() because copy_heap did it. */
+ /* We don't need CommandCounterIncrement() because make_new_heap did it. */
/*
* Copy the heap data into the new table in the desired order.
*/
- rebuildheap(OIDNewHeap, OIDOldHeap, OIDOldIndex);
+ copy_heap_data(OIDNewHeap, OIDOldHeap, OIDOldIndex);
- /* To make the new heap's data visible. */
+ /* To make the new heap's data visible (probably not needed?). */
CommandCounterIncrement();
/* Swap the relfilenodes of the old and new heaps. */
- swap_relfilenodes(OIDNewHeap, OIDOldHeap);
+ swap_relfilenodes(OIDOldHeap, OIDNewHeap);
CommandCounterIncrement();
object.objectId = OIDNewHeap;
object.objectSubId = 0;
- /* The relation is local to our transaction and we know nothin
+ /*
+ * The new relation is local to our transaction and we know nothing
* depends on it, so DROP_RESTRICT should be OK.
*/
performDeletion(&object, DROP_RESTRICT);
/* performDeletion does CommandCounterIncrement at end */
- /* Recreate the indexes on the relation. We do not need
- * CommandCounterIncrement() because recreate_indexattr does it.
- */
- recreate_indexattr(OIDOldHeap, indexes);
+ /*
+ * Recreate each index on the relation. We do not need
+ * CommandCounterIncrement() because recreate_indexattr does it.
+ */
+ recreate_indexattr(OIDOldHeap, indexes);
}
+/*
+ * Create the new table that we will fill with correctly-ordered data.
+ */
static Oid
-copy_heap(Oid OIDOldHeap, const char *NewName)
+make_new_heap(Oid OIDOldHeap, const char *NewName)
{
TupleDesc OldHeapDesc,
tupdesc;
return OIDNewHeap;
}
+/*
+ * Do the physical copying of heap data.
+ */
static void
-rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
+copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
{
- Relation LocalNewHeap,
- LocalOldHeap,
- LocalOldIndex;
- IndexScanDesc ScanDesc;
- HeapTuple LocalHeapTuple;
+ Relation NewHeap,
+ OldHeap,
+ OldIndex;
+ IndexScanDesc scan;
+ HeapTuple tuple;
/*
* Open the relations I need. Scan through the OldHeap on the OldIndex
* and insert each tuple into the NewHeap.
*/
- LocalNewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
- LocalOldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
- LocalOldIndex = index_open(OIDOldIndex);
+ NewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
+ OldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
+ OldIndex = index_open(OIDOldIndex);
- ScanDesc = index_beginscan(LocalOldHeap, LocalOldIndex,
- SnapshotNow, 0, (ScanKey) NULL);
+ scan = index_beginscan(OldHeap, OldIndex, SnapshotNow, 0, (ScanKey) NULL);
- while ((LocalHeapTuple = index_getnext(ScanDesc, ForwardScanDirection)) != NULL)
+ while ((tuple = index_getnext(scan, ForwardScanDirection)) != NULL)
{
/*
* We must copy the tuple because heap_insert() will overwrite
* retrieved tuple will actually be in a disk buffer! Thus,
* the source relation would get trashed, which is bad news if
* we abort later on. (This was a bug in releases thru 7.0)
+ *
+ * Note that the copied tuple will have the original OID, if any,
+ * so this does preserve OIDs.
*/
- HeapTuple copiedTuple = heap_copytuple(LocalHeapTuple);
+ HeapTuple copiedTuple = heap_copytuple(tuple);
+
+ simple_heap_insert(NewHeap, copiedTuple);
- simple_heap_insert(LocalNewHeap, copiedTuple);
heap_freetuple(copiedTuple);
CHECK_FOR_INTERRUPTS();
}
- index_endscan(ScanDesc);
+ index_endscan(scan);
- index_close(LocalOldIndex);
- heap_close(LocalOldHeap, NoLock);
- heap_close(LocalNewHeap, NoLock);
+ index_close(OldIndex);
+ heap_close(OldHeap, NoLock);
+ heap_close(NewHeap, NoLock);
}
-/* Get the necessary info about the indexes in the relation and
- * return a List of IndexAttrs.
+/*
+ * Get the necessary info about the indexes of the relation and
+ * return a list of IndexAttrs structures.
*/
-List *
-get_indexattr_list (Oid OIDOldHeap)
+static List *
+get_indexattr_list(Relation OldHeap)
{
- ScanKeyData entry;
- HeapScanDesc scan;
- Relation indexRelation;
- HeapTuple indexTuple;
List *indexes = NIL;
- IndexAttrs *attrs;
- HeapTuple tuple;
- Form_pg_index index;
-
- /* Grab the index tuples by looking into RelationRelationName
- * by the OID of the old heap.
- */
- indexRelation = heap_openr(IndexRelationName, AccessShareLock);
- ScanKeyEntryInitialize(&entry, 0, Anum_pg_index_indrelid,
- F_OIDEQ, ObjectIdGetDatum(OIDOldHeap));
- scan = heap_beginscan(indexRelation, SnapshotNow, 1, &entry);
- while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ List *indlist;
+
+ /* Ask the relcache to produce a list of the indexes of the old rel */
+ foreach(indlist, RelationGetIndexList(OldHeap))
{
- index = (Form_pg_index) GETSTRUCT(indexTuple);
+ Oid indexOID = (Oid) lfirsti(indlist);
+ HeapTuple indexTuple;
+ HeapTuple classTuple;
+ Form_pg_index indexForm;
+ Form_pg_class classForm;
+ IndexAttrs *attrs;
+
+ indexTuple = SearchSysCache(INDEXRELID,
+ ObjectIdGetDatum(indexOID),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "Cache lookup failed for index %u", indexOID);
+ indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+ Assert(indexForm->indexrelid == indexOID);
attrs = (IndexAttrs *) palloc(sizeof(IndexAttrs));
- attrs->indexInfo = BuildIndexInfo(index);
- attrs->isPrimary = index->indisprimary;
- attrs->indexOID = index->indexrelid;
-
- /* The opclasses are copied verbatim from the original indexes.
- */
- attrs->classOID = (Oid *)palloc(sizeof(Oid) *
- attrs->indexInfo->ii_NumIndexAttrs);
- memcpy(attrs->classOID, index->indclass,
- sizeof(Oid) * attrs->indexInfo->ii_NumIndexAttrs);
-
- /* Name and access method of each index come from
- * RelationRelationName.
- */
- tuple = SearchSysCache(RELOID,
- ObjectIdGetDatum(attrs->indexOID),
- 0, 0, 0);
- if (!HeapTupleIsValid(tuple))
- elog(ERROR, "CLUSTER: cannot find index %u", attrs->indexOID);
- attrs->indexName = pstrdup(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname));
- attrs->accessMethodOID = ((Form_pg_class) GETSTRUCT(tuple))->relam;
- ReleaseSysCache(tuple);
+ attrs->indexOID = indexOID;
+ attrs->indexInfo = BuildIndexInfo(indexForm);
+ attrs->classOID = (Oid *)
+ palloc(sizeof(Oid) * attrs->indexInfo->ii_NumIndexAttrs);
+ memcpy(attrs->classOID, indexForm->indclass,
+ sizeof(Oid) * attrs->indexInfo->ii_NumIndexAttrs);
+
+ /* Name and access method of each index come from pg_class */
+ classTuple = SearchSysCache(RELOID,
+ ObjectIdGetDatum(indexOID),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(classTuple))
+ elog(ERROR, "Cache lookup failed for index %u", indexOID);
+ classForm = (Form_pg_class) GETSTRUCT(classTuple);
+
+ attrs->indexName = pstrdup(NameStr(classForm->relname));
+ attrs->accessMethodOID = classForm->relam;
+
+ ReleaseSysCache(classTuple);
+ ReleaseSysCache(indexTuple);
/* Cons the gathered data into the list. We do not care about
* ordering, and this is more efficient than append.
*/
- indexes=lcons((void *)attrs, indexes);
+ indexes = lcons(attrs, indexes);
}
- heap_endscan(scan);
- heap_close(indexRelation, AccessShareLock);
+
return indexes;
}
-/* Create new indexes and swap the filenodes with old indexes. Then drop
- * the new index (carrying the old heap along).
+/*
+ * Create new indexes and swap the filenodes with old indexes. Then drop
+ * the new index (carrying the old index filenode along).
*/
-void
+static void
recreate_indexattr(Oid OIDOldHeap, List *indexes)
{
- IndexAttrs *attrs;
List *elem;
- Oid newIndexOID;
- char newIndexName[NAMEDATALEN];
- ObjectAddress object;
- foreach (elem, indexes)
+ foreach(elem, indexes)
{
- attrs=(IndexAttrs *) lfirst(elem);
+ IndexAttrs *attrs = (IndexAttrs *) lfirst(elem);
+ Oid newIndexOID;
+ char newIndexName[NAMEDATALEN];
+ ObjectAddress object;
/* Create the new index under a temporary name */
- snprintf(newIndexName, NAMEDATALEN, "temp_%u", attrs->indexOID);
+ snprintf(newIndexName, NAMEDATALEN, "pg_temp_%u", attrs->indexOID);
- /* The new index will have constraint status set to false,
+ /*
+ * The new index will have primary and constraint status set to false,
* but since we will only use its filenode it doesn't matter:
* after the filenode swap the index will keep the constraint
* status of the old index.
*/
newIndexOID = index_create(OIDOldHeap, newIndexName,
attrs->indexInfo, attrs->accessMethodOID,
- attrs->classOID, attrs->isPrimary,
+ attrs->classOID, false,
false, allowSystemTableMods);
CommandCounterIncrement();
/* Swap the filenodes. */
- swap_relfilenodes(newIndexOID, attrs->indexOID);
- setRelhasindex(OIDOldHeap, true, attrs->isPrimary, InvalidOid);
+ swap_relfilenodes(attrs->indexOID, newIndexOID);
+
+ CommandCounterIncrement();
/* Destroy new index with old filenode */
object.classId = RelOid_pg_class;
object.objectId = newIndexOID;
object.objectSubId = 0;
- /* The relation is local to our transaction and we know
+ /*
+ * The relation is local to our transaction and we know
* nothing depends on it, so DROP_RESTRICT should be OK.
*/
performDeletion(&object, DROP_RESTRICT);
/* performDeletion does CommandCounterIncrement() at its end */
-
- pfree(attrs->classOID);
- pfree(attrs);
}
- freeList(indexes);
}
-/* Swap the relfilenodes for two given relations.
+/*
+ * Swap the relfilenodes for two given relations.
+ *
+ * Also swap any TOAST links, so that the toast data moves along with
+ * the main-table data.
*/
-void
+static void
swap_relfilenodes(Oid r1, Oid r2)
{
- /* I can probably keep RelationRelationName open in the main
- * function and pass the Relation around so I don't have to open
- * it every time.
- */
Relation relRelation,
rel;
- HeapTuple reltup[2];
- Oid tempRFNode;
+ HeapTuple reltup1,
+ reltup2;
+ Form_pg_class relform1,
+ relform2;
+ Oid swaptemp;
int i;
CatalogIndexState indstate;
- /* We need both RelationRelationName tuples. */
+ /* We need writable copies of both pg_class tuples. */
relRelation = heap_openr(RelationRelationName, RowExclusiveLock);
- reltup[0] = SearchSysCacheCopy(RELOID,
- ObjectIdGetDatum(r1),
- 0, 0, 0);
- if (!HeapTupleIsValid(reltup[0]))
+ reltup1 = SearchSysCacheCopy(RELOID,
+ ObjectIdGetDatum(r1),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(reltup1))
elog(ERROR, "CLUSTER: Cannot find tuple for relation %u", r1);
- reltup[1] = SearchSysCacheCopy(RELOID,
- ObjectIdGetDatum(r2),
- 0, 0, 0);
- if (!HeapTupleIsValid(reltup[1]))
+ relform1 = (Form_pg_class) GETSTRUCT(reltup1);
+
+ reltup2 = SearchSysCacheCopy(RELOID,
+ ObjectIdGetDatum(r2),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(reltup2))
elog(ERROR, "CLUSTER: Cannot find tuple for relation %u", r2);
+ relform2 = (Form_pg_class) GETSTRUCT(reltup2);
- /* The buffer manager gets confused if we swap relfilenodes for
+ /*
+ * The buffer manager gets confused if we swap relfilenodes for
* relations that are not both local or non-local to this transaction.
* Flush the buffers on both relations so the buffer manager can
- * forget about'em.
+ * forget about'em. (XXX this might not be necessary anymore?)
*/
-
- rel = RelationIdGetRelation(r1);
+ rel = relation_open(r1, NoLock);
i = FlushRelationBuffers(rel, 0);
if (i < 0)
elog(ERROR, "CLUSTER: FlushRelationBuffers returned %d", i);
- RelationClose(rel);
- rel = RelationIdGetRelation(r1);
+ relation_close(rel, NoLock);
+
+ rel = relation_open(r2, NoLock);
i = FlushRelationBuffers(rel, 0);
if (i < 0)
elog(ERROR, "CLUSTER: FlushRelationBuffers returned %d", i);
- RelationClose(rel);
+ relation_close(rel, NoLock);
+
+ /*
+ * Actually swap the filenode and TOAST fields in the two tuples
+ */
+ swaptemp = relform1->relfilenode;
+ relform1->relfilenode = relform2->relfilenode;
+ relform2->relfilenode = swaptemp;
- /* Actually swap the filenodes */
+ swaptemp = relform1->reltoastrelid;
+ relform1->reltoastrelid = relform2->reltoastrelid;
+ relform2->reltoastrelid = swaptemp;
- tempRFNode = ((Form_pg_class) GETSTRUCT(reltup[0]))->relfilenode;
- ((Form_pg_class) GETSTRUCT(reltup[0]))->relfilenode =
- ((Form_pg_class) GETSTRUCT(reltup[1]))->relfilenode;
- ((Form_pg_class) GETSTRUCT(reltup[1]))->relfilenode = tempRFNode;
+ /* we should not swap reltoastidxid */
- /* Update the RelationRelationName tuples */
- simple_heap_update(relRelation, &reltup[1]->t_self, reltup[1]);
- simple_heap_update(relRelation, &reltup[0]->t_self, reltup[0]);
+ /* Update the tuples in pg_class */
+ simple_heap_update(relRelation, &reltup1->t_self, reltup1);
+ simple_heap_update(relRelation, &reltup2->t_self, reltup2);
- /* To keep system catalogs current. */
+ /* Keep system catalogs current */
indstate = CatalogOpenIndexes(relRelation);
- CatalogIndexInsert(indstate, reltup[1]);
- CatalogIndexInsert(indstate, reltup[0]);
+ CatalogIndexInsert(indstate, reltup1);
+ CatalogIndexInsert(indstate, reltup2);
CatalogCloseIndexes(indstate);
- heap_close(relRelation, NoLock);
- heap_freetuple(reltup[0]);
- heap_freetuple(reltup[1]);
+ /*
+ * If we have toast tables associated with the relations being swapped,
+ * change their dependency links to re-associate them with their new
+ * owning relations. Otherwise the wrong one will get dropped ...
+ *
+ * NOTE: for now, we can assume the new table will have a TOAST table
+ * if and only if the old one does. This logic might need work if we
+ * get smarter about dropped columns.
+ *
+ * NOTE: at present, a TOAST table's only dependency is the one on
+ * its owning table. If more are ever created, we'd need to use something
+ * more selective than deleteDependencyRecordsFor() to get rid of only
+ * the link we want.
+ */
+ if (relform1->reltoastrelid || relform2->reltoastrelid)
+ {
+ ObjectAddress baseobject,
+ toastobject;
+ long count;
+
+ if (!(relform1->reltoastrelid && relform2->reltoastrelid))
+ elog(ERROR, "CLUSTER: expected both swapped tables to have TOAST tables");
+
+ /* Delete old dependencies */
+ count = deleteDependencyRecordsFor(RelOid_pg_class,
+ relform1->reltoastrelid);
+ if (count != 1)
+ elog(ERROR, "CLUSTER: expected one dependency record for TOAST table, found %ld",
+ count);
+ count = deleteDependencyRecordsFor(RelOid_pg_class,
+ relform2->reltoastrelid);
+ if (count != 1)
+ elog(ERROR, "CLUSTER: expected one dependency record for TOAST table, found %ld",
+ count);
+
+ /* Register new dependencies */
+ baseobject.classId = RelOid_pg_class;
+ baseobject.objectId = r1;
+ baseobject.objectSubId = 0;
+ toastobject.classId = RelOid_pg_class;
+ toastobject.objectId = relform1->reltoastrelid;
+ toastobject.objectSubId = 0;
+
+ recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
+
+ baseobject.objectId = r2;
+ toastobject.objectId = relform2->reltoastrelid;
+
+ recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
+ }
+
+ /*
+ * Blow away the old relcache entries now. We need this kluge because
+ * relcache.c indexes relcache entries by rd_node as well as OID.
+ * It will get confused if it is asked to (re)build an entry with a new
+ * rd_node value when there is still another entry laying about with
+ * that same rd_node value. (Fortunately, since one of the entries
+ * is local in our transaction, it's sufficient to clear out our own
+ * relcache this way; the problem cannot arise for other backends when
+ * they see our update on the non-local relation.)
+ */
+ RelationForgetRelation(r1);
+ RelationForgetRelation(r2);
+
+ /* Clean up. */
+ heap_freetuple(reltup1);
+ heap_freetuple(reltup2);
+
+ heap_close(relRelation, RowExclusiveLock);
}
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.171 2002/08/06 02:36:35 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.172 2002/08/11 21:17:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* normal relations are not nailed into the cache; nor can a pre-existing
- * relation be new or temp.
+ * relation be new. It could be temp though. (Actually, it could be new
+ * too, but it's okay to forget that fact if forced to flush the entry.)
*/
relation->rd_isnailed = false;
relation->rd_isnew = false;
- relation->rd_istemp = false;
+ relation->rd_istemp = isTempNamespace(relation->rd_rel->relnamespace);
/*
* initialize the tuple descriptor (relation->rd_att).
relation->rd_fd = -1;
/*
- * insert newly created relation into proper relcaches, restore memory
- * context and return the new reldesc.
+ * Insert newly created relation into relcache hash tables.
*/
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
RelationCacheInsert(relation);
MemoryContextSwitchTo(oldcxt);
+ /*
+ * If it's a temp rel, RelationGetNumberOfBlocks will assume that
+ * rd_nblocks is correct. Must forcibly update the block count when
+ * creating the relcache entry. But if we are doing a rebuild, don't
+ * do this yet; leave it to RelationClearRelation to do at the end.
+ * (Otherwise, an elog in RelationUpdateNumberOfBlocks would leave us
+ * with inconsistent relcache state.)
+ */
+ if (relation->rd_istemp && oldrelation == NULL)
+ RelationUpdateNumberOfBlocks(relation);
+
return relation;
}
#ifdef RELCACHE_FORCE_RELEASE
if (RelationHasReferenceCountZero(relation) &&
- !relation->rd_isnew &&
- !relation->rd_istemp)
+ !relation->rd_isnew)
RelationClearRelation(relation, false);
#endif
}
{
/*
* When rebuilding an open relcache entry, must preserve ref count
- * and new/temp flags. Also attempt to preserve the tupledesc,
- * rewrite rules, and trigger substructures in place. Furthermore
- * we save/restore rd_nblocks (in case it is a new/temp relation)
- * *and* call RelationGetNumberOfBlocks (in case it isn't).
+ * and rd_isnew flag. Also attempt to preserve the tupledesc,
+ * rewrite rules, and trigger substructures in place.
*/
int old_refcnt = relation->rd_refcnt;
bool old_isnew = relation->rd_isnew;
- bool old_istemp = relation->rd_istemp;
TupleDesc old_att = relation->rd_att;
RuleLock *old_rules = relation->rd_rules;
MemoryContext old_rulescxt = relation->rd_rulescxt;
TriggerDesc *old_trigdesc = relation->trigdesc;
- BlockNumber old_nblocks = relation->rd_nblocks;
RelationBuildDescInfo buildinfo;
buildinfo.infotype = INFO_RELID;
}
RelationSetReferenceCount(relation, old_refcnt);
relation->rd_isnew = old_isnew;
- relation->rd_istemp = old_istemp;
if (equalTupleDescs(old_att, relation->rd_att))
{
FreeTupleDesc(relation->rd_att);
}
else
FreeTriggerDesc(old_trigdesc);
- relation->rd_nblocks = old_nblocks;
/*
- * this is kind of expensive, but I think we must do it in case
- * relation has been truncated...
+ * Update rd_nblocks. This is kind of expensive, but I think we must
+ * do it in case relation has been truncated... we definitely must
+ * do it if the rel is new or temp, since RelationGetNumberOfBlocks
+ * will subsequently assume that the block count is correct.
*/
- relation->rd_nblocks = RelationGetNumberOfBlocks(relation);
+ RelationUpdateNumberOfBlocks(relation);
}
}
{
bool rebuild;
- if (relation->rd_isnew || relation->rd_istemp)
+ if (relation->rd_isnew)
{
/*
- * New and temp relcache entries must always be rebuilt, not
- * flushed; else we'd forget those two important status bits.
+ * New relcache entries are always rebuilt, not flushed; else we'd
+ * forget the "new" status of the relation, which is a useful
+ * optimization to have.
*/
rebuild = true;
}
else
{
/*
- * Nonlocal rels can be dropped from the relcache if not open.
+ * Pre-existing rels can be dropped from the relcache if not open.
*/
rebuild = !RelationHasReferenceCountZero(relation);
}
relcacheInvalsReceived++;
- if (RelationHasReferenceCountZero(relation) && !relation->rd_istemp)
+ if (RelationHasReferenceCountZero(relation))
{
/* Delete this entry immediately */
RelationClearRelation(relation, false);
*
* During commit, reset the flag to false, since we are now out of the
* creating transaction. During abort, simply delete the relcache
- * entry --- it isn't interesting any longer.
+ * entry --- it isn't interesting any longer. (NOTE: if we have
+ * forgotten the isnew state of a new relation due to a forced cache
+ * flush, the entry will get deleted anyway by shared-cache-inval
+ * processing of the aborted pg_class insertion.)
*/
if (relation->rd_isnew)
{
CREATE TABLE clstr_tst (a SERIAL PRIMARY KEY,
b INT,
c TEXT,
+ d TEXT,
CONSTRAINT clstr_tst_con FOREIGN KEY (b) REFERENCES clstr_tst_s);
NOTICE: CREATE TABLE will create implicit sequence 'clstr_tst_a_seq' for SERIAL column 'clstr_tst.a'
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index 'clstr_tst_pkey' for table 'clstr_tst'
INSERT INTO clstr_tst (b, c) VALUES (7, 'siete');
INSERT INTO clstr_tst (b, c) VALUES (16, 'dieciseis');
INSERT INTO clstr_tst (b, c) VALUES (8, 'ocho');
-INSERT INTO clstr_tst (b, c) VALUES (6, 'seis');
+-- This entry is needed to test that TOASTED values are copied correctly.
+INSERT INTO clstr_tst (b, c, d) VALUES (6, 'seis', repeat('xyzzy', 100000));
CLUSTER clstr_tst_c ON clstr_tst;
-SELECT * from clstr_tst;
- a | b | c
-----+----+---------------
- 10 | 14 | catorce
- 18 | 5 | cinco
- 9 | 4 | cuatro
- 26 | 19 | diecinueve
- 12 | 18 | dieciocho
- 30 | 16 | dieciseis
- 24 | 17 | diecisiete
- 2 | 10 | diez
- 23 | 12 | doce
- 11 | 2 | dos
- 25 | 9 | nueve
- 31 | 8 | ocho
- 1 | 11 | once
- 28 | 15 | quince
- 32 | 6 | seis
- 29 | 7 | siete
- 15 | 13 | trece
- 22 | 30 | treinta
- 17 | 32 | treinta y dos
- 3 | 31 | treinta y uno
- 5 | 3 | tres
- 20 | 1 | uno
- 6 | 20 | veinte
- 14 | 25 | veinticinco
- 21 | 24 | veinticuatro
- 4 | 22 | veintidos
- 19 | 29 | veintinueve
- 16 | 28 | veintiocho
- 27 | 26 | veintiseis
- 13 | 27 | veintisiete
- 7 | 23 | veintitres
- 8 | 21 | veintiuno
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst;
+ a | b | c | substring | length
+----+----+---------------+--------------------------------+--------
+ 10 | 14 | catorce | |
+ 18 | 5 | cinco | |
+ 9 | 4 | cuatro | |
+ 26 | 19 | diecinueve | |
+ 12 | 18 | dieciocho | |
+ 30 | 16 | dieciseis | |
+ 24 | 17 | diecisiete | |
+ 2 | 10 | diez | |
+ 23 | 12 | doce | |
+ 11 | 2 | dos | |
+ 25 | 9 | nueve | |
+ 31 | 8 | ocho | |
+ 1 | 11 | once | |
+ 28 | 15 | quince | |
+ 32 | 6 | seis | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000
+ 29 | 7 | siete | |
+ 15 | 13 | trece | |
+ 22 | 30 | treinta | |
+ 17 | 32 | treinta y dos | |
+ 3 | 31 | treinta y uno | |
+ 5 | 3 | tres | |
+ 20 | 1 | uno | |
+ 6 | 20 | veinte | |
+ 14 | 25 | veinticinco | |
+ 21 | 24 | veinticuatro | |
+ 4 | 22 | veintidos | |
+ 19 | 29 | veintinueve | |
+ 16 | 28 | veintiocho | |
+ 27 | 26 | veintiseis | |
+ 13 | 27 | veintisiete | |
+ 7 | 23 | veintitres | |
+ 8 | 21 | veintiuno | |
(32 rows)
-SELECT * from clstr_tst ORDER BY a;
- a | b | c
-----+----+---------------
- 1 | 11 | once
- 2 | 10 | diez
- 3 | 31 | treinta y uno
- 4 | 22 | veintidos
- 5 | 3 | tres
- 6 | 20 | veinte
- 7 | 23 | veintitres
- 8 | 21 | veintiuno
- 9 | 4 | cuatro
- 10 | 14 | catorce
- 11 | 2 | dos
- 12 | 18 | dieciocho
- 13 | 27 | veintisiete
- 14 | 25 | veinticinco
- 15 | 13 | trece
- 16 | 28 | veintiocho
- 17 | 32 | treinta y dos
- 18 | 5 | cinco
- 19 | 29 | veintinueve
- 20 | 1 | uno
- 21 | 24 | veinticuatro
- 22 | 30 | treinta
- 23 | 12 | doce
- 24 | 17 | diecisiete
- 25 | 9 | nueve
- 26 | 19 | diecinueve
- 27 | 26 | veintiseis
- 28 | 15 | quince
- 29 | 7 | siete
- 30 | 16 | dieciseis
- 31 | 8 | ocho
- 32 | 6 | seis
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY a;
+ a | b | c | substring | length
+----+----+---------------+--------------------------------+--------
+ 1 | 11 | once | |
+ 2 | 10 | diez | |
+ 3 | 31 | treinta y uno | |
+ 4 | 22 | veintidos | |
+ 5 | 3 | tres | |
+ 6 | 20 | veinte | |
+ 7 | 23 | veintitres | |
+ 8 | 21 | veintiuno | |
+ 9 | 4 | cuatro | |
+ 10 | 14 | catorce | |
+ 11 | 2 | dos | |
+ 12 | 18 | dieciocho | |
+ 13 | 27 | veintisiete | |
+ 14 | 25 | veinticinco | |
+ 15 | 13 | trece | |
+ 16 | 28 | veintiocho | |
+ 17 | 32 | treinta y dos | |
+ 18 | 5 | cinco | |
+ 19 | 29 | veintinueve | |
+ 20 | 1 | uno | |
+ 21 | 24 | veinticuatro | |
+ 22 | 30 | treinta | |
+ 23 | 12 | doce | |
+ 24 | 17 | diecisiete | |
+ 25 | 9 | nueve | |
+ 26 | 19 | diecinueve | |
+ 27 | 26 | veintiseis | |
+ 28 | 15 | quince | |
+ 29 | 7 | siete | |
+ 30 | 16 | dieciseis | |
+ 31 | 8 | ocho | |
+ 32 | 6 | seis | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000
(32 rows)
-SELECT * from clstr_tst ORDER BY b;
- a | b | c
-----+----+---------------
- 20 | 1 | uno
- 11 | 2 | dos
- 5 | 3 | tres
- 9 | 4 | cuatro
- 18 | 5 | cinco
- 32 | 6 | seis
- 29 | 7 | siete
- 31 | 8 | ocho
- 25 | 9 | nueve
- 2 | 10 | diez
- 1 | 11 | once
- 23 | 12 | doce
- 15 | 13 | trece
- 10 | 14 | catorce
- 28 | 15 | quince
- 30 | 16 | dieciseis
- 24 | 17 | diecisiete
- 12 | 18 | dieciocho
- 26 | 19 | diecinueve
- 6 | 20 | veinte
- 8 | 21 | veintiuno
- 4 | 22 | veintidos
- 7 | 23 | veintitres
- 21 | 24 | veinticuatro
- 14 | 25 | veinticinco
- 27 | 26 | veintiseis
- 13 | 27 | veintisiete
- 16 | 28 | veintiocho
- 19 | 29 | veintinueve
- 22 | 30 | treinta
- 3 | 31 | treinta y uno
- 17 | 32 | treinta y dos
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY b;
+ a | b | c | substring | length
+----+----+---------------+--------------------------------+--------
+ 20 | 1 | uno | |
+ 11 | 2 | dos | |
+ 5 | 3 | tres | |
+ 9 | 4 | cuatro | |
+ 18 | 5 | cinco | |
+ 32 | 6 | seis | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000
+ 29 | 7 | siete | |
+ 31 | 8 | ocho | |
+ 25 | 9 | nueve | |
+ 2 | 10 | diez | |
+ 1 | 11 | once | |
+ 23 | 12 | doce | |
+ 15 | 13 | trece | |
+ 10 | 14 | catorce | |
+ 28 | 15 | quince | |
+ 30 | 16 | dieciseis | |
+ 24 | 17 | diecisiete | |
+ 12 | 18 | dieciocho | |
+ 26 | 19 | diecinueve | |
+ 6 | 20 | veinte | |
+ 8 | 21 | veintiuno | |
+ 4 | 22 | veintidos | |
+ 7 | 23 | veintitres | |
+ 21 | 24 | veinticuatro | |
+ 14 | 25 | veinticinco | |
+ 27 | 26 | veintiseis | |
+ 13 | 27 | veintisiete | |
+ 16 | 28 | veintiocho | |
+ 19 | 29 | veintinueve | |
+ 22 | 30 | treinta | |
+ 3 | 31 | treinta y uno | |
+ 17 | 32 | treinta y dos | |
(32 rows)
-SELECT * from clstr_tst ORDER BY c;
- a | b | c
-----+----+---------------
- 10 | 14 | catorce
- 18 | 5 | cinco
- 9 | 4 | cuatro
- 26 | 19 | diecinueve
- 12 | 18 | dieciocho
- 30 | 16 | dieciseis
- 24 | 17 | diecisiete
- 2 | 10 | diez
- 23 | 12 | doce
- 11 | 2 | dos
- 25 | 9 | nueve
- 31 | 8 | ocho
- 1 | 11 | once
- 28 | 15 | quince
- 32 | 6 | seis
- 29 | 7 | siete
- 15 | 13 | trece
- 22 | 30 | treinta
- 17 | 32 | treinta y dos
- 3 | 31 | treinta y uno
- 5 | 3 | tres
- 20 | 1 | uno
- 6 | 20 | veinte
- 14 | 25 | veinticinco
- 21 | 24 | veinticuatro
- 4 | 22 | veintidos
- 19 | 29 | veintinueve
- 16 | 28 | veintiocho
- 27 | 26 | veintiseis
- 13 | 27 | veintisiete
- 7 | 23 | veintitres
- 8 | 21 | veintiuno
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY c;
+ a | b | c | substring | length
+----+----+---------------+--------------------------------+--------
+ 10 | 14 | catorce | |
+ 18 | 5 | cinco | |
+ 9 | 4 | cuatro | |
+ 26 | 19 | diecinueve | |
+ 12 | 18 | dieciocho | |
+ 30 | 16 | dieciseis | |
+ 24 | 17 | diecisiete | |
+ 2 | 10 | diez | |
+ 23 | 12 | doce | |
+ 11 | 2 | dos | |
+ 25 | 9 | nueve | |
+ 31 | 8 | ocho | |
+ 1 | 11 | once | |
+ 28 | 15 | quince | |
+ 32 | 6 | seis | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000
+ 29 | 7 | siete | |
+ 15 | 13 | trece | |
+ 22 | 30 | treinta | |
+ 17 | 32 | treinta y dos | |
+ 3 | 31 | treinta y uno | |
+ 5 | 3 | tres | |
+ 20 | 1 | uno | |
+ 6 | 20 | veinte | |
+ 14 | 25 | veinticinco | |
+ 21 | 24 | veinticuatro | |
+ 4 | 22 | veintidos | |
+ 19 | 29 | veintinueve | |
+ 16 | 28 | veintiocho | |
+ 27 | 26 | veintiseis | |
+ 13 | 27 | veintisiete | |
+ 7 | 23 | veintitres | |
+ 8 | 21 | veintiuno | |
(32 rows)
-- Verify that inheritance link still works
INSERT INTO clstr_tst_inh VALUES (0, 100, 'in child table');
-SELECT * from clstr_tst;
- a | b | c
-----+-----+----------------
- 10 | 14 | catorce
- 18 | 5 | cinco
- 9 | 4 | cuatro
- 26 | 19 | diecinueve
- 12 | 18 | dieciocho
- 30 | 16 | dieciseis
- 24 | 17 | diecisiete
- 2 | 10 | diez
- 23 | 12 | doce
- 11 | 2 | dos
- 25 | 9 | nueve
- 31 | 8 | ocho
- 1 | 11 | once
- 28 | 15 | quince
- 32 | 6 | seis
- 29 | 7 | siete
- 15 | 13 | trece
- 22 | 30 | treinta
- 17 | 32 | treinta y dos
- 3 | 31 | treinta y uno
- 5 | 3 | tres
- 20 | 1 | uno
- 6 | 20 | veinte
- 14 | 25 | veinticinco
- 21 | 24 | veinticuatro
- 4 | 22 | veintidos
- 19 | 29 | veintinueve
- 16 | 28 | veintiocho
- 27 | 26 | veintiseis
- 13 | 27 | veintisiete
- 7 | 23 | veintitres
- 8 | 21 | veintiuno
- 0 | 100 | in child table
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst;
+ a | b | c | substring | length
+----+-----+----------------+--------------------------------+--------
+ 10 | 14 | catorce | |
+ 18 | 5 | cinco | |
+ 9 | 4 | cuatro | |
+ 26 | 19 | diecinueve | |
+ 12 | 18 | dieciocho | |
+ 30 | 16 | dieciseis | |
+ 24 | 17 | diecisiete | |
+ 2 | 10 | diez | |
+ 23 | 12 | doce | |
+ 11 | 2 | dos | |
+ 25 | 9 | nueve | |
+ 31 | 8 | ocho | |
+ 1 | 11 | once | |
+ 28 | 15 | quince | |
+ 32 | 6 | seis | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000
+ 29 | 7 | siete | |
+ 15 | 13 | trece | |
+ 22 | 30 | treinta | |
+ 17 | 32 | treinta y dos | |
+ 3 | 31 | treinta y uno | |
+ 5 | 3 | tres | |
+ 20 | 1 | uno | |
+ 6 | 20 | veinte | |
+ 14 | 25 | veinticinco | |
+ 21 | 24 | veinticuatro | |
+ 4 | 22 | veintidos | |
+ 19 | 29 | veintinueve | |
+ 16 | 28 | veintiocho | |
+ 27 | 26 | veintiseis | |
+ 13 | 27 | veintisiete | |
+ 7 | 23 | veintitres | |
+ 8 | 21 | veintiuno | |
+ 0 | 100 | in child table | |
(33 rows)
-- Verify that foreign key link still works
INSERT INTO clstr_tst (b, c) VALUES (1111, 'this should fail');
ERROR: clstr_tst_con referential integrity violation - key referenced from clstr_tst not found in clstr_tst_s
-SELECT conname FROM pg_constraint WHERE conrelid=(SELECT oid FROM pg_class
- WHERE relname='clstr_tst');
+SELECT conname FROM pg_constraint WHERE conrelid = 'clstr_tst'::regclass;
conname
----------------
clstr_tst_pkey
clstr_tst_con
(2 rows)
-SELECT relname FROM pg_class WHERE relname LIKE 'clstr_tst%' ORDER BY relname;
- relname
-----------------------
- clstr_tst
- clstr_tst_a_seq
- clstr_tst_b
- clstr_tst_b_c
- clstr_tst_c
- clstr_tst_c_b
- clstr_tst_inh
- clstr_tst_pkey
- clstr_tst_s
- clstr_tst_s_pkey
- clstr_tst_s_rf_a_seq
+SELECT relname, relkind,
+ EXISTS(SELECT 1 FROM pg_class WHERE oid = c.reltoastrelid) AS hastoast
+FROM pg_class c WHERE relname LIKE 'clstr_tst%' ORDER BY relname;
+ relname | relkind | hastoast
+----------------------+---------+----------
+ clstr_tst | r | t
+ clstr_tst_a_seq | S | f
+ clstr_tst_b | i | f
+ clstr_tst_b_c | i | f
+ clstr_tst_c | i | f
+ clstr_tst_c_b | i | f
+ clstr_tst_inh | r | t
+ clstr_tst_pkey | i | f
+ clstr_tst_s | r | f
+ clstr_tst_s_pkey | i | f
+ clstr_tst_s_rf_a_seq | S | f
(11 rows)