OSDN Git Service

Code review of CLUSTER patch. Clean up problems with relcache getting
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 11 Aug 2002 21:17:35 +0000 (21:17 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 11 Aug 2002 21:17:35 +0000 (21:17 +0000)
confused, toasted data getting lost, etc.

12 files changed:
doc/src/sgml/release.sgml
src/backend/catalog/heap.c
src/backend/catalog/index.c
src/backend/catalog/pg_depend.c
src/backend/commands/cluster.c
src/backend/storage/buffer/bufmgr.c
src/backend/utils/cache/relcache.c
src/include/catalog/dependency.h
src/include/storage/bufmgr.h
src/include/utils/rel.h
src/test/regress/expected/cluster.out
src/test/regress/sql/cluster.sql

index 0da3d9e..8e06e8a 100644 (file)
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/release.sgml,v 1.145 2002/08/02 18:15:04 tgl Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/release.sgml,v 1.146 2002/08/11 21:17:34 tgl Exp $
 -->
 
 <appendix id="release">
@@ -24,6 +24,7 @@ CDATA means the content is "SGML-free", so you can write without
 worries about funny characters.
 -->
 <literallayout><![CDATA[
+CLUSTER is no longer hazardous to your schema
 COPY accepts a list of columns to copy
 ALTER TABLE DROP COLUMN
 CREATE OPERATOR CLASS/DROP OPERATOR CLASS
index 6bf905c..747dcb9 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.219 2002/08/06 02:36:33 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.220 2002/08/11 21:17:34 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -198,7 +198,8 @@ SystemAttributeByName(const char *attname, bool relhasoids)
  *             Remove the system relation specific code to elsewhere eventually.
  *
  * If storage_create is TRUE then heap_storage_create is called here,
- * else caller must call heap_storage_create later.
+ * else caller must call heap_storage_create later (or not at all,
+ * if the relation doesn't need physical storage).
  * ----------------------------------------------------------------
  */
 Relation
@@ -291,7 +292,7 @@ heap_create(const char *relname,
                                                                         nailme);
 
        /*
-        * have the storage manager create the relation.
+        * have the storage manager create the relation's disk file, if wanted.
         */
        if (storage_create)
                heap_storage_create(rel);
@@ -684,20 +685,21 @@ heap_create_with_catalog(const char *relname,
        tupdesc->tdhasoid = BoolToHasOid(relhasoids);
        
        /*
-        * Tell heap_create not to create a physical file; we'll do that below
-        * after all our catalog updates are done.      (This isn't really
-        * necessary anymore, but we may as well avoid the cycles of creating
-        * and deleting the file in case we fail.)
+        * Create the relcache entry (mostly dummy at this point) and the
+        * physical disk file.  (If we fail further down, it's the smgr's
+        * responsibility to remove the disk file again.)
+        *
+        * NB: create a physical file only if it's not a view.
         */
        new_rel_desc = heap_create(relname,
                                                           relnamespace,
                                                           tupdesc,
                                                           shared_relation,
-                                                          false,
+                                                          (relkind != RELKIND_VIEW),
                                                           allow_system_table_mods);
 
        /* Fetch the relation OID assigned by heap_create */
-       new_rel_oid = new_rel_desc->rd_att->attrs[0]->attrelid;
+       new_rel_oid = RelationGetRelid(new_rel_desc);
 
        /* Assign an OID for the relation's tuple type */
        new_type_oid = newoid();
@@ -762,12 +764,6 @@ heap_create_with_catalog(const char *relname,
        StoreConstraints(new_rel_desc, tupdesc);
 
        /*
-        * We create the disk file for this relation here
-        */
-       if (relkind != RELKIND_VIEW)
-               heap_storage_create(new_rel_desc);
-
-       /*
         * ok, the relation has been cataloged, so close our relations and
         * return the oid of the newly created relation.
         */
index e75df63..e47a9b4 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.188 2002/08/05 03:29:16 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.189 2002/08/11 21:17:34 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -578,14 +578,18 @@ index_create(Oid heapRelationId,
 
        indexTupDesc->tdhasoid = WITHOUTOID;
        /*
-        * create the index relation (but don't create storage yet)
+        * create the index relation's relcache entry and physical disk file.
+        * (If we fail further down, it's the smgr's responsibility to remove
+        * the disk file again.)
         */
        indexRelation = heap_create(indexRelationName,
                                                                namespaceId,
                                                                indexTupDesc,
                                                                shared_relation,
-                                                               false,
+                                                               true,
                                                                allow_system_table_mods);
+
+       /* Fetch the relation OID assigned by heap_create */
        indexoid = RelationGetRelid(indexRelation);
 
        /*
@@ -612,11 +616,6 @@ index_create(Oid heapRelationId,
        UpdateRelationRelation(indexRelation);
 
        /*
-        * We create the disk file for this relation here
-        */
-       heap_storage_create(indexRelation);
-
-       /*
         * now update the object id's of all the attribute tuple forms in the
         * index relation's tuple descriptor
         */
index 44a0842..0427b37 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/catalog/pg_depend.c,v 1.4 2002/08/05 03:29:16 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/catalog/pg_depend.c,v 1.5 2002/08/11 21:17:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -121,15 +121,16 @@ recordMultipleDependencies(const ObjectAddress *depender,
 
 /*
  * deleteDependencyRecordsFor -- delete all records with given depender
- * classId/objectId.
+ * classId/objectId.  Returns the number of records deleted.
  *
  * This is used when redefining an existing object.  Links leading to the
  * object do not change, and links leading from it will be recreated
  * (possibly with some differences from before).
  */
-void
+long
 deleteDependencyRecordsFor(Oid classId, Oid objectId)
 {
+       long                    count = 0;
        Relation                depRel;
        ScanKeyData             key[2];
        SysScanDesc             scan;
@@ -150,11 +151,14 @@ deleteDependencyRecordsFor(Oid classId, Oid objectId)
        while (HeapTupleIsValid(tup = systable_getnext(scan)))
        {
                simple_heap_delete(depRel, &tup->t_self);
+               count++;
        }
 
        systable_endscan(scan);
 
        heap_close(depRel, RowExclusiveLock);
+
+       return count;
 }
 
 /*
index 9362c8f..7ca8e1d 100644 (file)
@@ -1,29 +1,25 @@
 /*-------------------------------------------------------------------------
  *
  * cluster.c
- *       Paul Brown's implementation of cluster index.
+ *       CLUSTER a table on an index.
+ *
+ * There is hardly anything left of Paul Brown's original implementation...
  *
- *       I am going to use the rename function as a model for this in the
- *       parser and executor, and the vacuum code as an example in this
- *       file. As I go - in contrast to the rest of postgres - there will
- *       be BUCKETS of comments. This is to allow reviewers to understand
- *       my (probably bogus) assumptions about the way this works.
- *                                                                                                             [pbrown '94]
  *
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994-5, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/cluster.c,v 1.85 2002/08/10 21:00:34 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/cluster.c,v 1.86 2002/08/11 21:17:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-
 #include "postgres.h"
 
 #include "access/genam.h"
 #include "access/heapam.h"
+#include "catalog/catalog.h"
 #include "catalog/dependency.h"
 #include "catalog/heap.h"
 #include "catalog/index.h"
 /*
  * We need one of these structs for each index in the relation to be
  * clustered.  It's basically the data needed by index_create() so
- * we can recreate the indexes after destroying the old heap.
+ * we can rebuild the indexes on the new heap.
  */
 typedef struct
 {
+       Oid                     indexOID;
        char       *indexName;
        IndexInfo  *indexInfo;
        Oid                     accessMethodOID;
        Oid                *classOID;
-       Oid                     indexOID;
-       bool            isPrimary;
 } IndexAttrs;
 
-static Oid     copy_heap(Oid OIDOldHeap, const char *NewName);
-static void rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex);
-static List *get_indexattr_list (Oid OIDOldHeap);
+static Oid     make_new_heap(Oid OIDOldHeap, const char *NewName);
+static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex);
+static List *get_indexattr_list(Relation OldHeap);
 static void recreate_indexattr(Oid OIDOldHeap, List *indexes);
 static void swap_relfilenodes(Oid r1, Oid r2);
 
-Relation RelationIdGetRelation(Oid relationId);
 
 /*
  * cluster
@@ -67,17 +61,14 @@ Relation RelationIdGetRelation(Oid relationId);
  * swapping the relfilenodes of the new table and the old table, so
  * the OID of the original table is preserved.  Thus we do not lose
  * GRANT, inheritance nor references to this table (this was a bug
- * in releases thru 7.3)
+ * in releases thru 7.3).
  *
- * Also create new indexes and swap the filenodes with the old indexes
- * the same way we do for the relation.
+ * Also create new indexes and swap the filenodes with the old indexes the
+ * same way we do for the relation.  Since we are effectively bulk-loading
+ * the new table, it's better to create the indexes afterwards than to fill
+ * them incrementally while we load the table.
  *
- * TODO:
- *             maybe we can get away with AccessShareLock for the table.
- *             Concurrency would be much improved.  Only acquire
- *             AccessExclusiveLock right before swapping the filenodes.
- *             This would allow users to CLUSTER on a regular basis,
- *             practically eliminating the need for auto-clustered indexes.
+ * Permissions checks were done already.
  */
 void
 cluster(RangeVar *oldrelation, char *oldindexname)
@@ -105,43 +96,60 @@ cluster(RangeVar *oldrelation, char *oldindexname)
                                                                        RelationGetNamespace(OldHeap));
        if (!OidIsValid(OIDOldIndex))
                elog(ERROR, "CLUSTER: cannot find index \"%s\" for table \"%s\"",
-                        oldindexname, oldrelation->relname);
+                        oldindexname, RelationGetRelationName(OldHeap));
        OldIndex = index_open(OIDOldIndex);
        LockRelation(OldIndex, AccessExclusiveLock);
 
        /*
         * Check that index is in fact an index on the given relation
         */
-       if (OldIndex->rd_index->indrelid != OIDOldHeap)
+       if (OldIndex->rd_index == NULL ||
+               OldIndex->rd_index->indrelid != OIDOldHeap)
                elog(ERROR, "CLUSTER: \"%s\" is not an index for table \"%s\"",
-                        oldindexname, oldrelation->relname);
+                        RelationGetRelationName(OldIndex),
+                        RelationGetRelationName(OldHeap));
 
-       /* Drop relcache refcnts, but do NOT give up the locks */
-       heap_close(OldHeap, NoLock);
-       index_close(OldIndex);
+       /*
+        * Disallow clustering system relations.  This will definitely NOT work
+        * for shared relations (we have no way to update pg_class rows in other
+        * databases), nor for nailed-in-cache relations (the relfilenode values
+        * for those are hardwired, see relcache.c).  It might work for other
+        * system relations, but I ain't gonna risk it.
+        */
+       if (IsSystemRelation(OldHeap))
+               elog(ERROR, "CLUSTER: cannot cluster system relation \"%s\"",
+                        RelationGetRelationName(OldHeap));
 
        /* Save the information of all indexes on the relation. */
-       indexes = get_indexattr_list(OIDOldHeap);
+       indexes = get_indexattr_list(OldHeap);
+
+       /* Drop relcache refcnts, but do NOT give up the locks */
+       index_close(OldIndex);
+       heap_close(OldHeap, NoLock);
 
        /*
-        * Create the new heap with a temporary name.
+        * Create the new heap, using a temporary name in the same namespace
+        * as the existing table.  NOTE: there is some risk of collision with user
+        * relnames.  Working around this seems more trouble than it's worth; in
+        * particular, we can't create the new heap in a different namespace from
+        * the old, or we will have problems with the TEMP status of temp tables.
         */
-       snprintf(NewHeapName, NAMEDATALEN, "temp_%u", OIDOldHeap);
+       snprintf(NewHeapName, NAMEDATALEN, "pg_temp_%u", OIDOldHeap);
 
-       OIDNewHeap = copy_heap(OIDOldHeap, NewHeapName);
+       OIDNewHeap = make_new_heap(OIDOldHeap, NewHeapName);
 
-       /* We do not need CommandCounterIncrement() because copy_heap did it. */
+       /* We don't need CommandCounterIncrement() because make_new_heap did it. */
 
        /*
         * Copy the heap data into the new table in the desired order.
         */
-       rebuildheap(OIDNewHeap, OIDOldHeap, OIDOldIndex);
+       copy_heap_data(OIDNewHeap, OIDOldHeap, OIDOldIndex);
 
-       /* To make the new heap's data visible. */
+       /* To make the new heap's data visible (probably not needed?). */
        CommandCounterIncrement();
 
        /* Swap the relfilenodes of the old and new heaps. */
-       swap_relfilenodes(OIDNewHeap, OIDOldHeap);
+       swap_relfilenodes(OIDOldHeap, OIDNewHeap);
 
        CommandCounterIncrement();
 
@@ -150,21 +158,26 @@ cluster(RangeVar *oldrelation, char *oldindexname)
        object.objectId = OIDNewHeap;
        object.objectSubId = 0;
 
-       /* The relation is local to our transaction and we know nothin
+       /*
+        * The new relation is local to our transaction and we know nothing
         * depends on it, so DROP_RESTRICT should be OK.
         */
        performDeletion(&object, DROP_RESTRICT);
 
        /* performDeletion does CommandCounterIncrement at end */
 
-       /* Recreate the indexes on the relation.  We do not need
-        * CommandCounterIncrement() because recreate_indexattr does it.
-        */
-       recreate_indexattr(OIDOldHeap, indexes);
+       /*
+        * Recreate each index on the relation.  We do not need
+        * CommandCounterIncrement() because recreate_indexattr does it.
+        */
+       recreate_indexattr(OIDOldHeap, indexes);
 }
 
+/*
+ * Create the new table that we will fill with correctly-ordered data.
+ */
 static Oid
-copy_heap(Oid OIDOldHeap, const char *NewName)
+make_new_heap(Oid OIDOldHeap, const char *NewName)
 {
        TupleDesc       OldHeapDesc,
                                tupdesc;
@@ -206,27 +219,29 @@ copy_heap(Oid OIDOldHeap, const char *NewName)
        return OIDNewHeap;
 }
 
+/*
+ * Do the physical copying of heap data.
+ */
 static void
-rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
+copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
 {
-       Relation        LocalNewHeap,
-                               LocalOldHeap,
-                               LocalOldIndex;
-       IndexScanDesc ScanDesc;
-       HeapTuple       LocalHeapTuple;
+       Relation        NewHeap,
+                               OldHeap,
+                               OldIndex;
+       IndexScanDesc scan;
+       HeapTuple       tuple;
 
        /*
         * Open the relations I need. Scan through the OldHeap on the OldIndex
         * and insert each tuple into the NewHeap.
         */
-       LocalNewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
-       LocalOldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
-       LocalOldIndex = index_open(OIDOldIndex);
+       NewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
+       OldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
+       OldIndex = index_open(OIDOldIndex);
 
-       ScanDesc = index_beginscan(LocalOldHeap, LocalOldIndex,
-                                                          SnapshotNow, 0, (ScanKey) NULL);
+       scan = index_beginscan(OldHeap, OldIndex, SnapshotNow, 0, (ScanKey) NULL);
 
-       while ((LocalHeapTuple = index_getnext(ScanDesc, ForwardScanDirection)) != NULL)
+       while ((tuple = index_getnext(scan, ForwardScanDirection)) != NULL)
        {
                /*
                 * We must copy the tuple because heap_insert() will overwrite
@@ -234,199 +249,280 @@ rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
                 * retrieved tuple will actually be in a disk buffer!  Thus,
                 * the source relation would get trashed, which is bad news if
                 * we abort later on.  (This was a bug in releases thru 7.0)
+                *
+                * Note that the copied tuple will have the original OID, if any,
+                * so this does preserve OIDs.
                 */
-               HeapTuple       copiedTuple = heap_copytuple(LocalHeapTuple);
+               HeapTuple       copiedTuple = heap_copytuple(tuple);
+
+               simple_heap_insert(NewHeap, copiedTuple);
 
-               simple_heap_insert(LocalNewHeap, copiedTuple);
                heap_freetuple(copiedTuple);
 
                CHECK_FOR_INTERRUPTS();
        }
 
-       index_endscan(ScanDesc);
+       index_endscan(scan);
 
-       index_close(LocalOldIndex);
-       heap_close(LocalOldHeap, NoLock);
-       heap_close(LocalNewHeap, NoLock);
+       index_close(OldIndex);
+       heap_close(OldHeap, NoLock);
+       heap_close(NewHeap, NoLock);
 }
 
-/* Get the necessary info about the indexes in the relation and
- * return a List of IndexAttrs.
+/*
+ * Get the necessary info about the indexes of the relation and
+ * return a list of IndexAttrs structures.
  */
-List *
-get_indexattr_list (Oid OIDOldHeap)
+static List *
+get_indexattr_list(Relation OldHeap)
 {
-       ScanKeyData     entry;
-       HeapScanDesc scan;
-       Relation indexRelation;
-       HeapTuple indexTuple;
        List *indexes = NIL;
-       IndexAttrs *attrs;
-       HeapTuple tuple;
-       Form_pg_index index;
-       
-       /* Grab the index tuples by looking into RelationRelationName
-        * by the OID of the old heap.
-        */
-       indexRelation = heap_openr(IndexRelationName, AccessShareLock);
-       ScanKeyEntryInitialize(&entry, 0, Anum_pg_index_indrelid,
-                       F_OIDEQ, ObjectIdGetDatum(OIDOldHeap));
-       scan = heap_beginscan(indexRelation, SnapshotNow, 1, &entry);
-       while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       List       *indlist;
+
+       /* Ask the relcache to produce a list of the indexes of the old rel */
+       foreach(indlist, RelationGetIndexList(OldHeap))
        {
-               index = (Form_pg_index) GETSTRUCT(indexTuple);
+               Oid             indexOID = (Oid) lfirsti(indlist);
+               HeapTuple indexTuple;
+               HeapTuple classTuple;
+               Form_pg_index indexForm;
+               Form_pg_class classForm;
+               IndexAttrs *attrs;
+
+               indexTuple = SearchSysCache(INDEXRELID,
+                                                                       ObjectIdGetDatum(indexOID),
+                                                                       0, 0, 0);
+               if (!HeapTupleIsValid(indexTuple))
+                       elog(ERROR, "Cache lookup failed for index %u", indexOID);
+               indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+               Assert(indexForm->indexrelid == indexOID);
 
                attrs = (IndexAttrs *) palloc(sizeof(IndexAttrs));
-               attrs->indexInfo = BuildIndexInfo(index);
-               attrs->isPrimary = index->indisprimary;
-               attrs->indexOID = index->indexrelid;
-
-               /* The opclasses are copied verbatim from the original indexes.
-               */
-               attrs->classOID = (Oid *)palloc(sizeof(Oid) *
-                               attrs->indexInfo->ii_NumIndexAttrs);
-               memcpy(attrs->classOID, index->indclass,
-                               sizeof(Oid) * attrs->indexInfo->ii_NumIndexAttrs);
-
-               /* Name and access method of each index come from
-                * RelationRelationName.
-                */
-               tuple = SearchSysCache(RELOID,
-                               ObjectIdGetDatum(attrs->indexOID),
-                               0, 0, 0);
-               if (!HeapTupleIsValid(tuple))
-                       elog(ERROR, "CLUSTER: cannot find index %u", attrs->indexOID);
-               attrs->indexName = pstrdup(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname));
-               attrs->accessMethodOID = ((Form_pg_class) GETSTRUCT(tuple))->relam;
-               ReleaseSysCache(tuple);
+               attrs->indexOID = indexOID;
+               attrs->indexInfo = BuildIndexInfo(indexForm);
+               attrs->classOID = (Oid *)
+                       palloc(sizeof(Oid) * attrs->indexInfo->ii_NumIndexAttrs);
+               memcpy(attrs->classOID, indexForm->indclass,
+                          sizeof(Oid) * attrs->indexInfo->ii_NumIndexAttrs);
+
+               /* Name and access method of each index come from pg_class */
+               classTuple = SearchSysCache(RELOID,
+                                                                       ObjectIdGetDatum(indexOID),
+                                                                       0, 0, 0);
+               if (!HeapTupleIsValid(classTuple))
+                       elog(ERROR, "Cache lookup failed for index %u", indexOID);
+               classForm = (Form_pg_class) GETSTRUCT(classTuple);
+
+               attrs->indexName = pstrdup(NameStr(classForm->relname));
+               attrs->accessMethodOID = classForm->relam;
+
+               ReleaseSysCache(classTuple);
+               ReleaseSysCache(indexTuple);
 
                /* Cons the gathered data into the list.  We do not care about
                 * ordering, and this is more efficient than append.
                 */
-               indexes=lcons((void *)attrs, indexes);
+               indexes = lcons(attrs, indexes);
        }
-       heap_endscan(scan);
-       heap_close(indexRelation, AccessShareLock);
+
        return indexes;
 }
 
-/* Create new indexes and swap the filenodes with old indexes.  Then drop
- * the new index (carrying the old heap along).
+/*
+ * Create new indexes and swap the filenodes with old indexes.  Then drop
+ * the new index (carrying the old index filenode along).
  */
-void
+static void
 recreate_indexattr(Oid OIDOldHeap, List *indexes)
 {
-       IndexAttrs *attrs;
        List       *elem;
-       Oid                     newIndexOID;
-       char            newIndexName[NAMEDATALEN];
-       ObjectAddress object;
 
-       foreach (elem, indexes)
+       foreach(elem, indexes)
        {
-               attrs=(IndexAttrs *) lfirst(elem);
+               IndexAttrs *attrs = (IndexAttrs *) lfirst(elem);
+               Oid                     newIndexOID;
+               char            newIndexName[NAMEDATALEN];
+               ObjectAddress object;
 
                /* Create the new index under a temporary name */
-               snprintf(newIndexName, NAMEDATALEN, "temp_%u", attrs->indexOID);
+               snprintf(newIndexName, NAMEDATALEN, "pg_temp_%u", attrs->indexOID);
 
-               /* The new index will have constraint status set to false,
+               /*
+                * The new index will have primary and constraint status set to false,
                 * but since we will only use its filenode it doesn't matter:
                 * after the filenode swap the index will keep the constraint
                 * status of the old index.
                 */
                newIndexOID = index_create(OIDOldHeap, newIndexName,
                                                                   attrs->indexInfo, attrs->accessMethodOID,
-                                                                  attrs->classOID, attrs->isPrimary,
+                                                                  attrs->classOID, false,
                                                                   false, allowSystemTableMods);
                CommandCounterIncrement();
 
                /* Swap the filenodes. */
-               swap_relfilenodes(newIndexOID, attrs->indexOID);
-               setRelhasindex(OIDOldHeap, true, attrs->isPrimary, InvalidOid);
+               swap_relfilenodes(attrs->indexOID, newIndexOID);
+
+               CommandCounterIncrement();
 
                /* Destroy new index with old filenode */
                object.classId = RelOid_pg_class;
                object.objectId = newIndexOID;
                object.objectSubId = 0;
                
-               /* The relation is local to our transaction and we know
+               /*
+                * The relation is local to our transaction and we know
                 * nothing depends on it, so DROP_RESTRICT should be OK.
                 */
                performDeletion(&object, DROP_RESTRICT);
                
                /* performDeletion does CommandCounterIncrement() at its end */
-               
-               pfree(attrs->classOID);
-               pfree(attrs);
        }
-       freeList(indexes);
 }
 
-/* Swap the relfilenodes for two given relations.
+/*
+ * Swap the relfilenodes for two given relations.
+ *
+ * Also swap any TOAST links, so that the toast data moves along with
+ * the main-table data.
  */
-void
+static void
 swap_relfilenodes(Oid r1, Oid r2)
 {
-       /* I can probably keep RelationRelationName open in the main
-        * function and pass the Relation around so I don't have to open
-        * it every time.
-        */
        Relation        relRelation,
                                rel;
-       HeapTuple       reltup[2];
-       Oid                     tempRFNode;
+       HeapTuple       reltup1,
+                               reltup2;
+       Form_pg_class relform1,
+                               relform2;
+       Oid                     swaptemp;
        int                     i;
        CatalogIndexState indstate;
 
-       /* We need both RelationRelationName tuples.  */
+       /* We need writable copies of both pg_class tuples. */
        relRelation = heap_openr(RelationRelationName, RowExclusiveLock);
 
-       reltup[0] = SearchSysCacheCopy(RELOID,
-                                                                  ObjectIdGetDatum(r1),
-                                                                  0, 0, 0);
-       if (!HeapTupleIsValid(reltup[0]))
+       reltup1 = SearchSysCacheCopy(RELOID,
+                                                                ObjectIdGetDatum(r1),
+                                                                0, 0, 0);
+       if (!HeapTupleIsValid(reltup1))
                elog(ERROR, "CLUSTER: Cannot find tuple for relation %u", r1);
-       reltup[1] = SearchSysCacheCopy(RELOID,
-                                                                  ObjectIdGetDatum(r2),
-                                                                  0, 0, 0);
-       if (!HeapTupleIsValid(reltup[1]))
+       relform1 = (Form_pg_class) GETSTRUCT(reltup1);
+
+       reltup2 = SearchSysCacheCopy(RELOID,
+                                                                ObjectIdGetDatum(r2),
+                                                                0, 0, 0);
+       if (!HeapTupleIsValid(reltup2))
                elog(ERROR, "CLUSTER: Cannot find tuple for relation %u", r2);
+       relform2 = (Form_pg_class) GETSTRUCT(reltup2);
 
-       /* The buffer manager gets confused if we swap relfilenodes for
+       /*
+        * The buffer manager gets confused if we swap relfilenodes for
         * relations that are not both local or non-local to this transaction.
         * Flush the buffers on both relations so the buffer manager can
-        * forget about'em.
+        * forget about'em.  (XXX this might not be necessary anymore?)
         */
-
-       rel = RelationIdGetRelation(r1);
+       rel = relation_open(r1, NoLock);
        i = FlushRelationBuffers(rel, 0);
        if (i < 0)
                elog(ERROR, "CLUSTER: FlushRelationBuffers returned %d", i);
-       RelationClose(rel);
-       rel = RelationIdGetRelation(r1);
+       relation_close(rel, NoLock);
+
+       rel = relation_open(r2, NoLock);
        i = FlushRelationBuffers(rel, 0);
        if (i < 0)
                elog(ERROR, "CLUSTER: FlushRelationBuffers returned %d", i);
-       RelationClose(rel);
+       relation_close(rel, NoLock);
+
+       /*
+        * Actually swap the filenode and TOAST fields in the two tuples
+        */
+       swaptemp = relform1->relfilenode;
+       relform1->relfilenode = relform2->relfilenode;
+       relform2->relfilenode = swaptemp;
 
-       /* Actually swap the filenodes */
+       swaptemp = relform1->reltoastrelid;
+       relform1->reltoastrelid = relform2->reltoastrelid;
+       relform2->reltoastrelid = swaptemp;
 
-       tempRFNode = ((Form_pg_class) GETSTRUCT(reltup[0]))->relfilenode;
-       ((Form_pg_class) GETSTRUCT(reltup[0]))->relfilenode =
-               ((Form_pg_class) GETSTRUCT(reltup[1]))->relfilenode;
-       ((Form_pg_class) GETSTRUCT(reltup[1]))->relfilenode = tempRFNode;
+       /* we should not swap reltoastidxid */
 
-       /* Update the RelationRelationName tuples */
-       simple_heap_update(relRelation, &reltup[1]->t_self, reltup[1]);
-       simple_heap_update(relRelation, &reltup[0]->t_self, reltup[0]);
+       /* Update the tuples in pg_class */
+       simple_heap_update(relRelation, &reltup1->t_self, reltup1);
+       simple_heap_update(relRelation, &reltup2->t_self, reltup2);
        
-       /* To keep system catalogs current. */
+       /* Keep system catalogs current */
        indstate = CatalogOpenIndexes(relRelation);
-       CatalogIndexInsert(indstate, reltup[1]);
-       CatalogIndexInsert(indstate, reltup[0]);
+       CatalogIndexInsert(indstate, reltup1);
+       CatalogIndexInsert(indstate, reltup2);
        CatalogCloseIndexes(indstate);
 
-       heap_close(relRelation, NoLock);
-       heap_freetuple(reltup[0]);
-       heap_freetuple(reltup[1]);
+       /*
+        * If we have toast tables associated with the relations being swapped,
+        * change their dependency links to re-associate them with their new
+        * owning relations.  Otherwise the wrong one will get dropped ...
+        *
+        * NOTE: for now, we can assume the new table will have a TOAST table
+        * if and only if the old one does.  This logic might need work if we
+        * get smarter about dropped columns.
+        *
+        * NOTE: at present, a TOAST table's only dependency is the one on
+        * its owning table.  If more are ever created, we'd need to use something
+        * more selective than deleteDependencyRecordsFor() to get rid of only
+        * the link we want.
+        */
+       if (relform1->reltoastrelid || relform2->reltoastrelid)
+       {
+               ObjectAddress baseobject,
+                                       toastobject;
+               long            count;
+
+               if (!(relform1->reltoastrelid && relform2->reltoastrelid))
+                       elog(ERROR, "CLUSTER: expected both swapped tables to have TOAST tables");
+
+               /* Delete old dependencies */
+               count = deleteDependencyRecordsFor(RelOid_pg_class,
+                                                                                  relform1->reltoastrelid);
+               if (count != 1)
+                       elog(ERROR, "CLUSTER: expected one dependency record for TOAST table, found %ld",
+                                count);
+               count = deleteDependencyRecordsFor(RelOid_pg_class,
+                                                                                  relform2->reltoastrelid);
+               if (count != 1)
+                       elog(ERROR, "CLUSTER: expected one dependency record for TOAST table, found %ld",
+                                count);
+
+               /* Register new dependencies */
+               baseobject.classId = RelOid_pg_class;
+               baseobject.objectId = r1;
+               baseobject.objectSubId = 0;
+               toastobject.classId = RelOid_pg_class;
+               toastobject.objectId = relform1->reltoastrelid;
+               toastobject.objectSubId = 0;
+
+               recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
+
+               baseobject.objectId = r2;
+               toastobject.objectId = relform2->reltoastrelid;
+
+               recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
+       }
+
+       /*
+        * Blow away the old relcache entries now.  We need this kluge because
+        * relcache.c indexes relcache entries by rd_node as well as OID.
+        * It will get confused if it is asked to (re)build an entry with a new
+        * rd_node value when there is still another entry laying about with
+        * that same rd_node value.  (Fortunately, since one of the entries
+        * is local in our transaction, it's sufficient to clear out our own
+        * relcache this way; the problem cannot arise for other backends when
+        * they see our update on the non-local relation.)
+        */
+       RelationForgetRelation(r1);
+       RelationForgetRelation(r2);
+
+       /* Clean up. */
+       heap_freetuple(reltup1);
+       heap_freetuple(reltup2);
+
+       heap_close(relRelation, RowExclusiveLock);
 }
index 1ca7af3..11df91c 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.128 2002/08/06 02:36:34 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.129 2002/08/11 21:17:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1038,11 +1038,6 @@ BufferReplace(BufferDesc *bufHdr)
  * RelationGetNumberOfBlocks
  *             Determines the current number of pages in the relation.
  *             Side effect: relation->rd_nblocks is updated.
- *
- * Note:
- *             XXX may fail for huge relations.
- *             XXX should be elsewhere.
- *             XXX maybe should be hidden
  */
 BlockNumber
 RelationGetNumberOfBlocks(Relation relation)
@@ -1061,6 +1056,23 @@ RelationGetNumberOfBlocks(Relation relation)
        return relation->rd_nblocks;
 }
 
+/*
+ * RelationUpdateNumberOfBlocks
+ *             Forcibly update relation->rd_nblocks.
+ *
+ * If the relcache drops an entry for a temp relation, it must call this
+ * routine after recreating the relcache entry, so that rd_nblocks is
+ * re-sync'd with reality.  See RelationGetNumberOfBlocks.
+ */
+void
+RelationUpdateNumberOfBlocks(Relation relation)
+{
+       if (relation->rd_rel->relkind == RELKIND_VIEW)
+               relation->rd_nblocks = 0;
+       else
+               relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation);
+}
+
 /* ---------------------------------------------------------------------
  *             DropRelationBuffers
  *
index f6c1120..1b57170 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.171 2002/08/06 02:36:35 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.172 2002/08/11 21:17:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -860,11 +860,12 @@ RelationBuildDesc(RelationBuildDescInfo buildinfo,
 
        /*
         * normal relations are not nailed into the cache; nor can a pre-existing
-        * relation be new or temp.
+        * relation be new.  It could be temp though.  (Actually, it could be new
+        * too, but it's okay to forget that fact if forced to flush the entry.)
         */
        relation->rd_isnailed = false;
        relation->rd_isnew = false;
-       relation->rd_istemp = false;
+       relation->rd_istemp = isTempNamespace(relation->rd_rel->relnamespace);
 
        /*
         * initialize the tuple descriptor (relation->rd_att).
@@ -909,13 +910,23 @@ RelationBuildDesc(RelationBuildDescInfo buildinfo,
        relation->rd_fd = -1;
 
        /*
-        * insert newly created relation into proper relcaches, restore memory
-        * context and return the new reldesc.
+        * Insert newly created relation into relcache hash tables.
         */
        oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
        RelationCacheInsert(relation);
        MemoryContextSwitchTo(oldcxt);
 
+       /*
+        * If it's a temp rel, RelationGetNumberOfBlocks will assume that
+        * rd_nblocks is correct.  Must forcibly update the block count when
+        * creating the relcache entry.  But if we are doing a rebuild, don't
+        * do this yet; leave it to RelationClearRelation to do at the end.
+        * (Otherwise, an elog in RelationUpdateNumberOfBlocks would leave us
+        * with inconsistent relcache state.)
+        */
+       if (relation->rd_istemp && oldrelation == NULL)
+               RelationUpdateNumberOfBlocks(relation);
+
        return relation;
 }
 
@@ -1601,8 +1612,7 @@ RelationClose(Relation relation)
 
 #ifdef RELCACHE_FORCE_RELEASE
        if (RelationHasReferenceCountZero(relation) &&
-               !relation->rd_isnew &&
-               !relation->rd_istemp)
+               !relation->rd_isnew)
                RelationClearRelation(relation, false);
 #endif
 }
@@ -1733,19 +1743,15 @@ RelationClearRelation(Relation relation, bool rebuild)
        {
                /*
                 * When rebuilding an open relcache entry, must preserve ref count
-                * and new/temp flags.  Also attempt to preserve the tupledesc,
-                * rewrite rules, and trigger substructures in place. Furthermore
-                * we save/restore rd_nblocks (in case it is a new/temp relation)
-                * *and* call RelationGetNumberOfBlocks (in case it isn't).
+                * and rd_isnew flag.  Also attempt to preserve the tupledesc,
+                * rewrite rules, and trigger substructures in place.
                 */
                int                     old_refcnt = relation->rd_refcnt;
                bool            old_isnew = relation->rd_isnew;
-               bool            old_istemp = relation->rd_istemp;
                TupleDesc       old_att = relation->rd_att;
                RuleLock   *old_rules = relation->rd_rules;
                MemoryContext old_rulescxt = relation->rd_rulescxt;
                TriggerDesc *old_trigdesc = relation->trigdesc;
-               BlockNumber old_nblocks = relation->rd_nblocks;
                RelationBuildDescInfo buildinfo;
 
                buildinfo.infotype = INFO_RELID;
@@ -1764,7 +1770,6 @@ RelationClearRelation(Relation relation, bool rebuild)
                }
                RelationSetReferenceCount(relation, old_refcnt);
                relation->rd_isnew = old_isnew;
-               relation->rd_istemp = old_istemp;
                if (equalTupleDescs(old_att, relation->rd_att))
                {
                        FreeTupleDesc(relation->rd_att);
@@ -1791,13 +1796,14 @@ RelationClearRelation(Relation relation, bool rebuild)
                }
                else
                        FreeTriggerDesc(old_trigdesc);
-               relation->rd_nblocks = old_nblocks;
 
                /*
-                * this is kind of expensive, but I think we must do it in case
-                * relation has been truncated...
+                * Update rd_nblocks.  This is kind of expensive, but I think we must
+                * do it in case relation has been truncated... we definitely must
+                * do it if the rel is new or temp, since RelationGetNumberOfBlocks
+                * will subsequently assume that the block count is correct.
                 */
-               relation->rd_nblocks = RelationGetNumberOfBlocks(relation);
+               RelationUpdateNumberOfBlocks(relation);
        }
 }
 
@@ -1811,18 +1817,19 @@ RelationFlushRelation(Relation relation)
 {
        bool            rebuild;
 
-       if (relation->rd_isnew || relation->rd_istemp)
+       if (relation->rd_isnew)
        {
                /*
-                * New and temp relcache entries must always be rebuilt, not
-                * flushed; else we'd forget those two important status bits.
+                * New relcache entries are always rebuilt, not flushed; else we'd
+                * forget the "new" status of the relation, which is a useful
+                * optimization to have.
                 */
                rebuild = true;
        }
        else
        {
                /*
-                * Nonlocal rels can be dropped from the relcache if not open.
+                * Pre-existing rels can be dropped from the relcache if not open.
                 */
                rebuild = !RelationHasReferenceCountZero(relation);
        }
@@ -1921,7 +1928,7 @@ RelationCacheInvalidate(void)
 
                relcacheInvalsReceived++;
 
-               if (RelationHasReferenceCountZero(relation) && !relation->rd_istemp)
+               if (RelationHasReferenceCountZero(relation))
                {
                        /* Delete this entry immediately */
                        RelationClearRelation(relation, false);
@@ -1965,7 +1972,10 @@ AtEOXact_RelationCache(bool commit)
                 *
                 * During commit, reset the flag to false, since we are now out of the
                 * creating transaction.  During abort, simply delete the relcache
-                * entry --- it isn't interesting any longer.
+                * entry --- it isn't interesting any longer.  (NOTE: if we have
+                * forgotten the isnew state of a new relation due to a forced cache
+                * flush, the entry will get deleted anyway by shared-cache-inval
+                * processing of the aborted pg_class insertion.)
                 */
                if (relation->rd_isnew)
                {
index b278007..86d621b 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: dependency.h,v 1.3 2002/07/16 22:12:20 tgl Exp $
+ * $Id: dependency.h,v 1.4 2002/08/11 21:17:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -98,6 +98,6 @@ extern void recordMultipleDependencies(const ObjectAddress *depender,
                                                                           int nreferenced,
                                                                           DependencyType behavior);
 
-extern void deleteDependencyRecordsFor(Oid classId, Oid objectId);
+extern long deleteDependencyRecordsFor(Oid classId, Oid objectId);
 
 #endif   /* DEPENDENCY_H */
index 7aebaa7..e92cc65 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: bufmgr.h,v 1.62 2002/08/06 02:36:35 tgl Exp $
+ * $Id: bufmgr.h,v 1.63 2002/08/11 21:17:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -161,6 +161,7 @@ extern void AtEOXact_Buffers(bool isCommit);
 extern void FlushBufferPool(void);
 extern BlockNumber BufferGetBlockNumber(Buffer buffer);
 extern BlockNumber RelationGetNumberOfBlocks(Relation relation);
+extern void RelationUpdateNumberOfBlocks(Relation relation);
 extern int     FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock);
 extern void DropRelationBuffers(Relation rel);
 extern void DropRelFileNodeBuffers(RelFileNode rnode, bool istemp);
index d913f28..a0bcd22 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: rel.h,v 1.61 2002/08/06 02:36:35 tgl Exp $
+ * $Id: rel.h,v 1.62 2002/08/11 21:17:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -113,6 +113,10 @@ typedef struct RelationData
                                                                 * InvalidBlockNumber */
        int                     rd_refcnt;              /* reference count */
        bool            rd_isnew;               /* rel was created in current xact */
+       /*
+        * NOTE: rd_isnew should be relied on only for optimization purposes;
+        * it is possible for new-ness to be "forgotten" (eg, after CLUSTER).
+        */
        bool            rd_istemp;              /* rel uses the local buffer mgr */
        bool            rd_isnailed;    /* rel is nailed in cache */
        bool            rd_indexfound;  /* true if rd_indexlist is valid */
index 12ae4a2..a76536a 100644 (file)
@@ -8,6 +8,7 @@ NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'clstr_tst_s_pkey
 CREATE TABLE clstr_tst (a SERIAL PRIMARY KEY,
        b INT,
        c TEXT,
+       d TEXT,
        CONSTRAINT clstr_tst_con FOREIGN KEY (b) REFERENCES clstr_tst_s);
 NOTICE:  CREATE TABLE will create implicit sequence 'clstr_tst_a_seq' for SERIAL column 'clstr_tst.a'
 NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index 'clstr_tst_pkey' for table 'clstr_tst'
@@ -54,220 +55,222 @@ INSERT INTO clstr_tst (b, c) VALUES (15, 'quince');
 INSERT INTO clstr_tst (b, c) VALUES (7, 'siete');
 INSERT INTO clstr_tst (b, c) VALUES (16, 'dieciseis');
 INSERT INTO clstr_tst (b, c) VALUES (8, 'ocho');
-INSERT INTO clstr_tst (b, c) VALUES (6, 'seis');
+-- This entry is needed to test that TOASTED values are copied correctly.
+INSERT INTO clstr_tst (b, c, d) VALUES (6, 'seis', repeat('xyzzy', 100000));
 CLUSTER clstr_tst_c ON clstr_tst;
-SELECT * from clstr_tst;
- a  | b  |       c       
-----+----+---------------
- 10 | 14 | catorce
- 18 |  5 | cinco
-  9 |  4 | cuatro
- 26 | 19 | diecinueve
- 12 | 18 | dieciocho
- 30 | 16 | dieciseis
- 24 | 17 | diecisiete
-  2 | 10 | diez
- 23 | 12 | doce
- 11 |  2 | dos
- 25 |  9 | nueve
- 31 |  8 | ocho
-  1 | 11 | once
- 28 | 15 | quince
- 32 |  6 | seis
- 29 |  7 | siete
- 15 | 13 | trece
- 22 | 30 | treinta
- 17 | 32 | treinta y dos
-  3 | 31 | treinta y uno
-  5 |  3 | tres
- 20 |  1 | uno
-  6 | 20 | veinte
- 14 | 25 | veinticinco
- 21 | 24 | veinticuatro
-  4 | 22 | veintidos
- 19 | 29 | veintinueve
- 16 | 28 | veintiocho
- 27 | 26 | veintiseis
- 13 | 27 | veintisiete
-  7 | 23 | veintitres
-  8 | 21 | veintiuno
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst;
+ a  | b  |       c       |           substring            | length 
+----+----+---------------+--------------------------------+--------
+ 10 | 14 | catorce       |                                |       
+ 18 |  5 | cinco         |                                |       
+  9 |  4 | cuatro        |                                |       
+ 26 | 19 | diecinueve    |                                |       
+ 12 | 18 | dieciocho     |                                |       
+ 30 | 16 | dieciseis     |                                |       
+ 24 | 17 | diecisiete    |                                |       
+  2 | 10 | diez          |                                |       
+ 23 | 12 | doce          |                                |       
+ 11 |  2 | dos           |                                |       
+ 25 |  9 | nueve         |                                |       
+ 31 |  8 | ocho          |                                |       
+  1 | 11 | once          |                                |       
+ 28 | 15 | quince        |                                |       
+ 32 |  6 | seis          | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000
+ 29 |  7 | siete         |                                |       
+ 15 | 13 | trece         |                                |       
+ 22 | 30 | treinta       |                                |       
+ 17 | 32 | treinta y dos |                                |       
+  3 | 31 | treinta y uno |                                |       
+  5 |  3 | tres          |                                |       
+ 20 |  1 | uno           |                                |       
+  6 | 20 | veinte        |                                |       
+ 14 | 25 | veinticinco   |                                |       
+ 21 | 24 | veinticuatro  |                                |       
+  4 | 22 | veintidos     |                                |       
+ 19 | 29 | veintinueve   |                                |       
+ 16 | 28 | veintiocho    |                                |       
+ 27 | 26 | veintiseis    |                                |       
+ 13 | 27 | veintisiete   |                                |       
+  7 | 23 | veintitres    |                                |       
+  8 | 21 | veintiuno     |                                |       
 (32 rows)
 
-SELECT * from clstr_tst ORDER BY a;
- a  | b  |       c       
-----+----+---------------
-  1 | 11 | once
-  2 | 10 | diez
-  3 | 31 | treinta y uno
-  4 | 22 | veintidos
-  5 |  3 | tres
-  6 | 20 | veinte
-  7 | 23 | veintitres
-  8 | 21 | veintiuno
-  9 |  4 | cuatro
- 10 | 14 | catorce
- 11 |  2 | dos
- 12 | 18 | dieciocho
- 13 | 27 | veintisiete
- 14 | 25 | veinticinco
- 15 | 13 | trece
- 16 | 28 | veintiocho
- 17 | 32 | treinta y dos
- 18 |  5 | cinco
- 19 | 29 | veintinueve
- 20 |  1 | uno
- 21 | 24 | veinticuatro
- 22 | 30 | treinta
- 23 | 12 | doce
- 24 | 17 | diecisiete
- 25 |  9 | nueve
- 26 | 19 | diecinueve
- 27 | 26 | veintiseis
- 28 | 15 | quince
- 29 |  7 | siete
- 30 | 16 | dieciseis
- 31 |  8 | ocho
- 32 |  6 | seis
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY a;
+ a  | b  |       c       |           substring            | length 
+----+----+---------------+--------------------------------+--------
+  1 | 11 | once          |                                |       
+  2 | 10 | diez          |                                |       
+  3 | 31 | treinta y uno |                                |       
+  4 | 22 | veintidos     |                                |       
+  5 |  3 | tres          |                                |       
+  6 | 20 | veinte        |                                |       
+  7 | 23 | veintitres    |                                |       
+  8 | 21 | veintiuno     |                                |       
+  9 |  4 | cuatro        |                                |       
+ 10 | 14 | catorce       |                                |       
+ 11 |  2 | dos           |                                |       
+ 12 | 18 | dieciocho     |                                |       
+ 13 | 27 | veintisiete   |                                |       
+ 14 | 25 | veinticinco   |                                |       
+ 15 | 13 | trece         |                                |       
+ 16 | 28 | veintiocho    |                                |       
+ 17 | 32 | treinta y dos |                                |       
+ 18 |  5 | cinco         |                                |       
+ 19 | 29 | veintinueve   |                                |       
+ 20 |  1 | uno           |                                |       
+ 21 | 24 | veinticuatro  |                                |       
+ 22 | 30 | treinta       |                                |       
+ 23 | 12 | doce          |                                |       
+ 24 | 17 | diecisiete    |                                |       
+ 25 |  9 | nueve         |                                |       
+ 26 | 19 | diecinueve    |                                |       
+ 27 | 26 | veintiseis    |                                |       
+ 28 | 15 | quince        |                                |       
+ 29 |  7 | siete         |                                |       
+ 30 | 16 | dieciseis     |                                |       
+ 31 |  8 | ocho          |                                |       
+ 32 |  6 | seis          | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000
 (32 rows)
 
-SELECT * from clstr_tst ORDER BY b;
- a  | b  |       c       
-----+----+---------------
- 20 |  1 | uno
- 11 |  2 | dos
-  5 |  3 | tres
-  9 |  4 | cuatro
- 18 |  5 | cinco
- 32 |  6 | seis
- 29 |  7 | siete
- 31 |  8 | ocho
- 25 |  9 | nueve
-  2 | 10 | diez
-  1 | 11 | once
- 23 | 12 | doce
- 15 | 13 | trece
- 10 | 14 | catorce
- 28 | 15 | quince
- 30 | 16 | dieciseis
- 24 | 17 | diecisiete
- 12 | 18 | dieciocho
- 26 | 19 | diecinueve
-  6 | 20 | veinte
-  8 | 21 | veintiuno
-  4 | 22 | veintidos
-  7 | 23 | veintitres
- 21 | 24 | veinticuatro
- 14 | 25 | veinticinco
- 27 | 26 | veintiseis
- 13 | 27 | veintisiete
- 16 | 28 | veintiocho
- 19 | 29 | veintinueve
- 22 | 30 | treinta
-  3 | 31 | treinta y uno
- 17 | 32 | treinta y dos
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY b;
+ a  | b  |       c       |           substring            | length 
+----+----+---------------+--------------------------------+--------
+ 20 |  1 | uno           |                                |       
+ 11 |  2 | dos           |                                |       
+  5 |  3 | tres          |                                |       
+  9 |  4 | cuatro        |                                |       
+ 18 |  5 | cinco         |                                |       
+ 32 |  6 | seis          | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000
+ 29 |  7 | siete         |                                |       
+ 31 |  8 | ocho          |                                |       
+ 25 |  9 | nueve         |                                |       
+  2 | 10 | diez          |                                |       
+  1 | 11 | once          |                                |       
+ 23 | 12 | doce          |                                |       
+ 15 | 13 | trece         |                                |       
+ 10 | 14 | catorce       |                                |       
+ 28 | 15 | quince        |                                |       
+ 30 | 16 | dieciseis     |                                |       
+ 24 | 17 | diecisiete    |                                |       
+ 12 | 18 | dieciocho     |                                |       
+ 26 | 19 | diecinueve    |                                |       
+  6 | 20 | veinte        |                                |       
+  8 | 21 | veintiuno     |                                |       
+  4 | 22 | veintidos     |                                |       
+  7 | 23 | veintitres    |                                |       
+ 21 | 24 | veinticuatro  |                                |       
+ 14 | 25 | veinticinco   |                                |       
+ 27 | 26 | veintiseis    |                                |       
+ 13 | 27 | veintisiete   |                                |       
+ 16 | 28 | veintiocho    |                                |       
+ 19 | 29 | veintinueve   |                                |       
+ 22 | 30 | treinta       |                                |       
+  3 | 31 | treinta y uno |                                |       
+ 17 | 32 | treinta y dos |                                |       
 (32 rows)
 
-SELECT * from clstr_tst ORDER BY c;
- a  | b  |       c       
-----+----+---------------
- 10 | 14 | catorce
- 18 |  5 | cinco
-  9 |  4 | cuatro
- 26 | 19 | diecinueve
- 12 | 18 | dieciocho
- 30 | 16 | dieciseis
- 24 | 17 | diecisiete
-  2 | 10 | diez
- 23 | 12 | doce
- 11 |  2 | dos
- 25 |  9 | nueve
- 31 |  8 | ocho
-  1 | 11 | once
- 28 | 15 | quince
- 32 |  6 | seis
- 29 |  7 | siete
- 15 | 13 | trece
- 22 | 30 | treinta
- 17 | 32 | treinta y dos
-  3 | 31 | treinta y uno
-  5 |  3 | tres
- 20 |  1 | uno
-  6 | 20 | veinte
- 14 | 25 | veinticinco
- 21 | 24 | veinticuatro
-  4 | 22 | veintidos
- 19 | 29 | veintinueve
- 16 | 28 | veintiocho
- 27 | 26 | veintiseis
- 13 | 27 | veintisiete
-  7 | 23 | veintitres
-  8 | 21 | veintiuno
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY c;
+ a  | b  |       c       |           substring            | length 
+----+----+---------------+--------------------------------+--------
+ 10 | 14 | catorce       |                                |       
+ 18 |  5 | cinco         |                                |       
+  9 |  4 | cuatro        |                                |       
+ 26 | 19 | diecinueve    |                                |       
+ 12 | 18 | dieciocho     |                                |       
+ 30 | 16 | dieciseis     |                                |       
+ 24 | 17 | diecisiete    |                                |       
+  2 | 10 | diez          |                                |       
+ 23 | 12 | doce          |                                |       
+ 11 |  2 | dos           |                                |       
+ 25 |  9 | nueve         |                                |       
+ 31 |  8 | ocho          |                                |       
+  1 | 11 | once          |                                |       
+ 28 | 15 | quince        |                                |       
+ 32 |  6 | seis          | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000
+ 29 |  7 | siete         |                                |       
+ 15 | 13 | trece         |                                |       
+ 22 | 30 | treinta       |                                |       
+ 17 | 32 | treinta y dos |                                |       
+  3 | 31 | treinta y uno |                                |       
+  5 |  3 | tres          |                                |       
+ 20 |  1 | uno           |                                |       
+  6 | 20 | veinte        |                                |       
+ 14 | 25 | veinticinco   |                                |       
+ 21 | 24 | veinticuatro  |                                |       
+  4 | 22 | veintidos     |                                |       
+ 19 | 29 | veintinueve   |                                |       
+ 16 | 28 | veintiocho    |                                |       
+ 27 | 26 | veintiseis    |                                |       
+ 13 | 27 | veintisiete   |                                |       
+  7 | 23 | veintitres    |                                |       
+  8 | 21 | veintiuno     |                                |       
 (32 rows)
 
 -- Verify that inheritance link still works
 INSERT INTO clstr_tst_inh VALUES (0, 100, 'in child table');
-SELECT * from clstr_tst;
- a  |  b  |       c        
-----+-----+----------------
- 10 |  14 | catorce
- 18 |   5 | cinco
-  9 |   4 | cuatro
- 26 |  19 | diecinueve
- 12 |  18 | dieciocho
- 30 |  16 | dieciseis
- 24 |  17 | diecisiete
-  2 |  10 | diez
- 23 |  12 | doce
- 11 |   2 | dos
- 25 |   9 | nueve
- 31 |   8 | ocho
-  1 |  11 | once
- 28 |  15 | quince
- 32 |   6 | seis
- 29 |   7 | siete
- 15 |  13 | trece
- 22 |  30 | treinta
- 17 |  32 | treinta y dos
-  3 |  31 | treinta y uno
-  5 |   3 | tres
- 20 |   1 | uno
-  6 |  20 | veinte
- 14 |  25 | veinticinco
- 21 |  24 | veinticuatro
-  4 |  22 | veintidos
- 19 |  29 | veintinueve
- 16 |  28 | veintiocho
- 27 |  26 | veintiseis
- 13 |  27 | veintisiete
-  7 |  23 | veintitres
-  8 |  21 | veintiuno
-  0 | 100 | in child table
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst;
+ a  |  b  |       c        |           substring            | length 
+----+-----+----------------+--------------------------------+--------
+ 10 |  14 | catorce        |                                |       
+ 18 |   5 | cinco          |                                |       
+  9 |   4 | cuatro         |                                |       
+ 26 |  19 | diecinueve     |                                |       
+ 12 |  18 | dieciocho      |                                |       
+ 30 |  16 | dieciseis      |                                |       
+ 24 |  17 | diecisiete     |                                |       
+  2 |  10 | diez           |                                |       
+ 23 |  12 | doce           |                                |       
+ 11 |   2 | dos            |                                |       
+ 25 |   9 | nueve          |                                |       
+ 31 |   8 | ocho           |                                |       
+  1 |  11 | once           |                                |       
+ 28 |  15 | quince         |                                |       
+ 32 |   6 | seis           | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000
+ 29 |   7 | siete          |                                |       
+ 15 |  13 | trece          |                                |       
+ 22 |  30 | treinta        |                                |       
+ 17 |  32 | treinta y dos  |                                |       
+  3 |  31 | treinta y uno  |                                |       
+  5 |   3 | tres           |                                |       
+ 20 |   1 | uno            |                                |       
+  6 |  20 | veinte         |                                |       
+ 14 |  25 | veinticinco    |                                |       
+ 21 |  24 | veinticuatro   |                                |       
+  4 |  22 | veintidos      |                                |       
+ 19 |  29 | veintinueve    |                                |       
+ 16 |  28 | veintiocho     |                                |       
+ 27 |  26 | veintiseis     |                                |       
+ 13 |  27 | veintisiete    |                                |       
+  7 |  23 | veintitres     |                                |       
+  8 |  21 | veintiuno      |                                |       
+  0 | 100 | in child table |                                |       
 (33 rows)
 
 -- Verify that foreign key link still works
 INSERT INTO clstr_tst (b, c) VALUES (1111, 'this should fail');
 ERROR:  clstr_tst_con referential integrity violation - key referenced from clstr_tst not found in clstr_tst_s
-SELECT conname FROM pg_constraint WHERE conrelid=(SELECT oid FROM pg_class
-       WHERE relname='clstr_tst');
+SELECT conname FROM pg_constraint WHERE conrelid = 'clstr_tst'::regclass;
     conname     
 ----------------
  clstr_tst_pkey
  clstr_tst_con
 (2 rows)
 
-SELECT relname FROM pg_class WHERE relname LIKE 'clstr_tst%' ORDER BY relname;
-       relname        
-----------------------
- clstr_tst
- clstr_tst_a_seq
- clstr_tst_b
- clstr_tst_b_c
- clstr_tst_c
- clstr_tst_c_b
- clstr_tst_inh
- clstr_tst_pkey
- clstr_tst_s
- clstr_tst_s_pkey
- clstr_tst_s_rf_a_seq
+SELECT relname, relkind,
+    EXISTS(SELECT 1 FROM pg_class WHERE oid = c.reltoastrelid) AS hastoast
+FROM pg_class c WHERE relname LIKE 'clstr_tst%' ORDER BY relname;
+       relname        | relkind | hastoast 
+----------------------+---------+----------
+ clstr_tst            | r       | t
+ clstr_tst_a_seq      | S       | f
+ clstr_tst_b          | i       | f
+ clstr_tst_b_c        | i       | f
+ clstr_tst_c          | i       | f
+ clstr_tst_c_b        | i       | f
+ clstr_tst_inh        | r       | t
+ clstr_tst_pkey       | i       | f
+ clstr_tst_s          | r       | f
+ clstr_tst_s_pkey     | i       | f
+ clstr_tst_s_rf_a_seq | S       | f
 (11 rows)
 
index 32041c7..599f6eb 100644 (file)
@@ -8,6 +8,7 @@ CREATE TABLE clstr_tst_s (rf_a SERIAL PRIMARY KEY,
 CREATE TABLE clstr_tst (a SERIAL PRIMARY KEY,
        b INT,
        c TEXT,
+       d TEXT,
        CONSTRAINT clstr_tst_con FOREIGN KEY (b) REFERENCES clstr_tst_s);
 
 CREATE INDEX clstr_tst_b ON clstr_tst (b);
@@ -55,24 +56,26 @@ INSERT INTO clstr_tst (b, c) VALUES (15, 'quince');
 INSERT INTO clstr_tst (b, c) VALUES (7, 'siete');
 INSERT INTO clstr_tst (b, c) VALUES (16, 'dieciseis');
 INSERT INTO clstr_tst (b, c) VALUES (8, 'ocho');
-INSERT INTO clstr_tst (b, c) VALUES (6, 'seis');
+-- This entry is needed to test that TOASTED values are copied correctly.
+INSERT INTO clstr_tst (b, c, d) VALUES (6, 'seis', repeat('xyzzy', 100000));
 
 CLUSTER clstr_tst_c ON clstr_tst;
 
-SELECT * from clstr_tst;
-SELECT * from clstr_tst ORDER BY a;
-SELECT * from clstr_tst ORDER BY b;
-SELECT * from clstr_tst ORDER BY c;
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst;
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY a;
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY b;
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY c;
 
 -- Verify that inheritance link still works
 INSERT INTO clstr_tst_inh VALUES (0, 100, 'in child table');
-SELECT * from clstr_tst;
+SELECT a,b,c,substring(d for 30), length(d) from clstr_tst;
 
 -- Verify that foreign key link still works
 INSERT INTO clstr_tst (b, c) VALUES (1111, 'this should fail');
 
-SELECT conname FROM pg_constraint WHERE conrelid=(SELECT oid FROM pg_class
-       WHERE relname='clstr_tst');
+SELECT conname FROM pg_constraint WHERE conrelid = 'clstr_tst'::regclass;
 
 
-SELECT relname FROM pg_class WHERE relname LIKE 'clstr_tst%' ORDER BY relname;
+SELECT relname, relkind,
+    EXISTS(SELECT 1 FROM pg_class WHERE oid = c.reltoastrelid) AS hastoast
+FROM pg_class c WHERE relname LIKE 'clstr_tst%' ORDER BY relname;