1 /*-------------------------------------------------------------------------
4 * Paul Brown's implementation of cluster index.
6 * I am going to use the rename function as a model for this in the
7 * parser and executor, and the vacuum code as an example in this
8 * file. As I go - in contrast to the rest of postgres - there will
9 * be BUCKETS of comments. This is to allow reviewers to understand
10 * my (probably bogus) assumptions about the way this works.
13 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
14 * Portions Copyright (c) 1994-5, Regents of the University of California
18 * $Header: /cvsroot/pgsql/src/backend/commands/cluster.c,v 1.58 2000/07/14 22:17:42 tgl Exp $
20 *-------------------------------------------------------------------------
25 #include "access/genam.h"
26 #include "access/heapam.h"
27 #include "catalog/heap.h"
28 #include "catalog/index.h"
29 #include "catalog/pg_index.h"
30 #include "catalog/pg_proc.h"
31 #include "commands/cluster.h"
32 #include "commands/rename.h"
33 #include "miscadmin.h"
34 #include "utils/builtins.h"
35 #include "utils/syscache.h"
37 static Relation copy_heap(Oid OIDOldHeap);
38 static void copy_index(Oid OIDOldIndex, Oid OIDNewHeap);
39 static void rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex);
44 * Check that the relation is a relation in the appropriate user
45 * ACL. I will use the same security that limits users on the
46 * renamerel() function.
48 * Check that the index specified is appropriate for the task
49 * ( ie it's an index over this relation ). This is trickier.
51 * Create a list of all the other indicies on this relation. Because
52 * the cluster will wreck all the tids, I'll need to destroy bogus
53 * indicies. The user will have to re-create them. Not nice, but
54 * I'm not a nice guy. The alternative is to try some kind of post
55 * destroy re-build. This may be possible. I'll check out what the
56 * index create functiond want in the way of paramaters. On the other
57 * hand, re-creating n indicies may blow out the space.
59 * Create new (temporary) relations for the base heap and the new
62 * Exclusively lock the relations.
64 * Create new clustered index and base heap relation.
68 cluster(char *oldrelname, char *oldindexname)
78 char NewIndexName[NAMEDATALEN];
79 char NewHeapName[NAMEDATALEN];
80 char saveoldrelname[NAMEDATALEN];
81 char saveoldindexname[NAMEDATALEN];
84 * Copy the arguments into local storage, because they are probably
85 * in palloc'd storage that will go away when we commit a transaction.
87 strcpy(saveoldrelname, oldrelname);
88 strcpy(saveoldindexname, oldindexname);
91 * Like vacuum, cluster spans transactions, so I'm going to handle it
92 * in the same way: commit and restart transactions where needed.
94 * We grab exclusive access to the target rel and index for the duration
95 * of the initial transaction.
98 OldHeap = heap_openr(saveoldrelname, AccessExclusiveLock);
99 OIDOldHeap = RelationGetRelid(OldHeap);
101 OldIndex = index_openr(saveoldindexname); /* Open old index relation */
102 LockRelation(OldIndex, AccessExclusiveLock);
103 OIDOldIndex = RelationGetRelid(OldIndex);
106 * XXX Should check that index is in fact an index on this relation?
109 heap_close(OldHeap, NoLock);/* do NOT give up the locks */
110 index_close(OldIndex);
113 * I need to build the copies of the heap and the index. The Commit()
114 * between here is *very* bogus. If someone is appending stuff, they
115 * will get the lock after being blocked and add rows which won't be
116 * present in the new table. Bleagh! I'd be best to try and ensure
117 * that no-one's in the tables for the entire duration of this process
118 * with a pg_vlock. XXX Isn't the above comment now invalid?
120 NewHeap = copy_heap(OIDOldHeap);
121 OIDNewHeap = RelationGetRelid(NewHeap);
122 strcpy(NewHeapName, RelationGetRelationName(NewHeap));
124 /* To make the new heap visible (which is until now empty). */
125 CommandCounterIncrement();
127 rebuildheap(OIDNewHeap, OIDOldHeap, OIDOldIndex);
129 /* To flush the filled new heap (and the statistics about it). */
130 CommandCounterIncrement();
132 /* Create new index over the tuples of the new heap. */
133 copy_index(OIDOldIndex, OIDNewHeap);
134 snprintf(NewIndexName, NAMEDATALEN, "temp_%x", OIDOldIndex);
137 * make this really happen. Flush all the buffers. (Believe me, it is
138 * necessary ... ended up in a mess without it.)
140 CommitTransactionCommand();
141 StartTransactionCommand();
143 /* Destroy old heap (along with its index) and rename new. */
144 heap_drop_with_catalog(saveoldrelname, allowSystemTableMods);
146 CommitTransactionCommand();
147 StartTransactionCommand();
149 renamerel(NewHeapName, saveoldrelname);
150 renamerel(NewIndexName, saveoldindexname);
154 copy_heap(Oid OIDOldHeap)
156 char NewName[NAMEDATALEN];
157 TupleDesc OldHeapDesc,
164 * Create a new heap relation with a temporary name, which has the
165 * same tuple description as the old one.
167 snprintf(NewName, NAMEDATALEN, "temp_%x", OIDOldHeap);
169 OldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
170 OldHeapDesc = RelationGetDescr(OldHeap);
173 * Need to make a copy of the tuple descriptor,
174 * heap_create_with_catalog modifies it.
177 tupdesc = CreateTupleDescCopy(OldHeapDesc);
179 OIDNewHeap = heap_create_with_catalog(NewName, tupdesc,
180 RELKIND_RELATION, false,
181 allowSystemTableMods);
183 if (!OidIsValid(OIDNewHeap))
184 elog(ERROR, "clusterheap: cannot create temporary heap relation\n");
186 /* XXX why are we bothering to do this: */
187 NewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
189 heap_close(NewHeap, AccessExclusiveLock);
190 heap_close(OldHeap, AccessExclusiveLock);
196 copy_index(Oid OIDOldIndex, Oid OIDNewHeap)
200 HeapTuple Old_pg_index_Tuple,
201 Old_pg_index_relation_Tuple;
202 Form_pg_index Old_pg_index_Form;
203 Form_pg_class Old_pg_index_relation_Form;
204 IndexInfo *indexInfo;
207 NewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
208 OldIndex = index_open(OIDOldIndex);
211 * OK. Create a new (temporary) index for the one that's already here.
212 * To do this I get the info from pg_index, and add a new index with
215 Old_pg_index_Tuple = SearchSysCacheTupleCopy(INDEXRELID,
216 ObjectIdGetDatum(RelationGetRelid(OldIndex)),
218 Assert(Old_pg_index_Tuple);
219 Old_pg_index_Form = (Form_pg_index) GETSTRUCT(Old_pg_index_Tuple);
221 indexInfo = BuildIndexInfo(Old_pg_index_Tuple);
223 Old_pg_index_relation_Tuple = SearchSysCacheTupleCopy(RELOID,
224 ObjectIdGetDatum(RelationGetRelid(OldIndex)),
226 Assert(Old_pg_index_relation_Tuple);
227 Old_pg_index_relation_Form = (Form_pg_class) GETSTRUCT(Old_pg_index_relation_Tuple);
230 NewIndexName = palloc(NAMEDATALEN); /* XXX */
231 snprintf(NewIndexName, NAMEDATALEN, "temp_%x", OIDOldIndex);
233 index_create(RelationGetRelationName(NewHeap),
236 Old_pg_index_relation_Form->relam,
237 Old_pg_index_Form->indclass,
238 Old_pg_index_Form->indislossy,
239 Old_pg_index_Form->indisprimary,
240 allowSystemTableMods);
242 setRelhasindexInplace(OIDNewHeap, true, false);
244 index_close(OldIndex);
245 heap_close(NewHeap, AccessExclusiveLock);
250 rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
252 Relation LocalNewHeap,
255 IndexScanDesc ScanDesc;
256 RetrieveIndexResult ScanResult;
259 * Open the relations I need. Scan through the OldHeap on the OldIndex
260 * and insert each tuple into the NewHeap.
262 LocalNewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
263 LocalOldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
264 LocalOldIndex = index_open(OIDOldIndex);
266 ScanDesc = index_beginscan(LocalOldIndex, false, 0, (ScanKey) NULL);
268 while ((ScanResult = index_getnext(ScanDesc, ForwardScanDirection)) != NULL)
270 HeapTupleData LocalHeapTuple;
273 LocalHeapTuple.t_self = ScanResult->heap_iptr;
274 LocalHeapTuple.t_datamcxt = NULL;
275 LocalHeapTuple.t_data = NULL;
276 heap_fetch(LocalOldHeap, SnapshotNow, &LocalHeapTuple, &LocalBuffer);
277 if (LocalHeapTuple.t_data != NULL) {
279 * We must copy the tuple because heap_insert() will overwrite
280 * the commit-status fields of the tuple it's handed, and the
281 * retrieved tuple will actually be in a disk buffer! Thus,
282 * the source relation would get trashed, which is bad news
283 * if we abort later on. (This was a bug in releases thru 7.0)
285 HeapTuple copiedTuple = heap_copytuple(&LocalHeapTuple);
287 ReleaseBuffer(LocalBuffer);
288 heap_insert(LocalNewHeap, copiedTuple);
289 heap_freetuple(copiedTuple);
294 index_endscan(ScanDesc);
296 index_close(LocalOldIndex);
297 heap_close(LocalOldHeap, AccessExclusiveLock);
298 heap_close(LocalNewHeap, AccessExclusiveLock);