OSDN Git Service

b059f9d784bd42e574eeb39228440a5292d84bdb
[pg-rex/syncrep.git] / src / backend / catalog / toasting.c
1 /*-------------------------------------------------------------------------
2  *
3  * toasting.c
4  *        This file contains routines to support creation of toast tables
5  *
6  *
7  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *        src/backend/catalog/toasting.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "access/heapam.h"
18 #include "access/tuptoaster.h"
19 #include "access/xact.h"
20 #include "catalog/dependency.h"
21 #include "catalog/heap.h"
22 #include "catalog/index.h"
23 #include "catalog/indexing.h"
24 #include "catalog/namespace.h"
25 #include "catalog/pg_namespace.h"
26 #include "catalog/pg_opclass.h"
27 #include "catalog/pg_type.h"
28 #include "catalog/toasting.h"
29 #include "miscadmin.h"
30 #include "nodes/makefuncs.h"
31 #include "utils/builtins.h"
32 #include "utils/syscache.h"
33
34 /* Potentially set by contrib/pg_upgrade_support functions */
35 extern Oid      binary_upgrade_next_toast_pg_class_oid;
36
37 Oid                     binary_upgrade_next_toast_pg_type_oid = InvalidOid;
38
39 static bool create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
40                                    Datum reloptions);
41 static bool needs_toast_table(Relation rel);
42
43
44 /*
45  * AlterTableCreateToastTable
46  *              If the table needs a toast table, and doesn't already have one,
47  *              then create a toast table for it.
48  *
49  * reloptions for the toast table can be passed, too.  Pass (Datum) 0
50  * for default reloptions.
51  *
52  * We expect the caller to have verified that the relation is a table and have
53  * already done any necessary permission checks.  Callers expect this function
54  * to end with CommandCounterIncrement if it makes any changes.
55  */
56 void
57 AlterTableCreateToastTable(Oid relOid, Datum reloptions)
58 {
59         Relation        rel;
60
61         /*
62          * Grab a DDL-exclusive lock on the target table, since we'll update the
63          * pg_class tuple.      This is redundant for all present users.  Tuple
64          * toasting behaves safely in the face of a concurrent TOAST table add.
65          */
66         rel = heap_open(relOid, ShareUpdateExclusiveLock);
67
68         /* create_toast_table does all the work */
69         (void) create_toast_table(rel, InvalidOid, InvalidOid, reloptions);
70
71         heap_close(rel, NoLock);
72 }
73
74 /*
75  * Create a toast table during bootstrap
76  *
77  * Here we need to prespecify the OIDs of the toast table and its index
78  */
79 void
80 BootstrapToastTable(char *relName, Oid toastOid, Oid toastIndexOid)
81 {
82         Relation        rel;
83
84         rel = heap_openrv(makeRangeVar(NULL, relName, -1), AccessExclusiveLock);
85
86         /* Note: during bootstrap may see uncataloged relation */
87         if (rel->rd_rel->relkind != RELKIND_RELATION &&
88                 rel->rd_rel->relkind != RELKIND_UNCATALOGED)
89                 ereport(ERROR,
90                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
91                                  errmsg("\"%s\" is not a table",
92                                                 relName)));
93
94         /* create_toast_table does all the work */
95         if (!create_toast_table(rel, toastOid, toastIndexOid, (Datum) 0))
96                 elog(ERROR, "\"%s\" does not require a toast table",
97                          relName);
98
99         heap_close(rel, NoLock);
100 }
101
102
103 /*
104  * create_toast_table --- internal workhorse
105  *
106  * rel is already opened and locked
107  * toastOid and toastIndexOid are normally InvalidOid, but during
108  * bootstrap they can be nonzero to specify hand-assigned OIDs
109  */
110 static bool
111 create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, Datum reloptions)
112 {
113         Oid                     relOid = RelationGetRelid(rel);
114         HeapTuple       reltup;
115         TupleDesc       tupdesc;
116         bool            shared_relation;
117         bool            mapped_relation;
118         Relation        toast_rel;
119         Relation        class_rel;
120         Oid                     toast_relid;
121         Oid                     toast_typid = InvalidOid;
122         Oid                     namespaceid;
123         char            toast_relname[NAMEDATALEN];
124         char            toast_idxname[NAMEDATALEN];
125         IndexInfo  *indexInfo;
126         Oid                     collationObjectId[2];
127         Oid                     classObjectId[2];
128         int16           coloptions[2];
129         ObjectAddress baseobject,
130                                 toastobject;
131
132         /*
133          * Toast table is shared if and only if its parent is.
134          *
135          * We cannot allow toasting a shared relation after initdb (because
136          * there's no way to mark it toasted in other databases' pg_class).
137          */
138         shared_relation = rel->rd_rel->relisshared;
139         if (shared_relation && !IsBootstrapProcessingMode())
140                 ereport(ERROR,
141                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
142                                  errmsg("shared tables cannot be toasted after initdb")));
143
144         /* It's mapped if and only if its parent is, too */
145         mapped_relation = RelationIsMapped(rel);
146
147         /*
148          * Is it already toasted?
149          */
150         if (rel->rd_rel->reltoastrelid != InvalidOid)
151                 return false;
152
153         /*
154          * Check to see whether the table actually needs a TOAST table.
155          *
156          * If an update-in-place toast relfilenode is specified, force toast file
157          * creation even if it seems not to need one.
158          */
159         if (!needs_toast_table(rel) &&
160                 (!IsBinaryUpgrade ||
161                  !OidIsValid(binary_upgrade_next_toast_pg_class_oid)))
162                 return false;
163
164         /*
165          * Create the toast table and its index
166          */
167         snprintf(toast_relname, sizeof(toast_relname),
168                          "pg_toast_%u", relOid);
169         snprintf(toast_idxname, sizeof(toast_idxname),
170                          "pg_toast_%u_index", relOid);
171
172         /* this is pretty painful...  need a tuple descriptor */
173         tupdesc = CreateTemplateTupleDesc(3, false);
174         TupleDescInitEntry(tupdesc, (AttrNumber) 1,
175                                            "chunk_id",
176                                            OIDOID,
177                                            -1, 0);
178         TupleDescInitEntry(tupdesc, (AttrNumber) 2,
179                                            "chunk_seq",
180                                            INT4OID,
181                                            -1, 0);
182         TupleDescInitEntry(tupdesc, (AttrNumber) 3,
183                                            "chunk_data",
184                                            BYTEAOID,
185                                            -1, 0);
186
187         /*
188          * Ensure that the toast table doesn't itself get toasted, or we'll be
189          * toast :-(.  This is essential for chunk_data because type bytea is
190          * toastable; hit the other two just to be sure.
191          */
192         tupdesc->attrs[0]->attstorage = 'p';
193         tupdesc->attrs[1]->attstorage = 'p';
194         tupdesc->attrs[2]->attstorage = 'p';
195
196         /*
197          * Toast tables for regular relations go in pg_toast; those for temp
198          * relations go into the per-backend temp-toast-table namespace.
199          */
200         if (RelationUsesTempNamespace(rel))
201                 namespaceid = GetTempToastNamespace();
202         else
203                 namespaceid = PG_TOAST_NAMESPACE;
204
205         /* Use binary-upgrade override for pg_type.oid, if supplied. */
206         if (IsBinaryUpgrade && OidIsValid(binary_upgrade_next_toast_pg_type_oid))
207         {
208                 toast_typid = binary_upgrade_next_toast_pg_type_oid;
209                 binary_upgrade_next_toast_pg_type_oid = InvalidOid;
210         }
211
212         toast_relid = heap_create_with_catalog(toast_relname,
213                                                                                    namespaceid,
214                                                                                    rel->rd_rel->reltablespace,
215                                                                                    toastOid,
216                                                                                    toast_typid,
217                                                                                    InvalidOid,
218                                                                                    rel->rd_rel->relowner,
219                                                                                    tupdesc,
220                                                                                    NIL,
221                                                                                    RELKIND_TOASTVALUE,
222                                                                                    rel->rd_rel->relpersistence,
223                                                                                    shared_relation,
224                                                                                    mapped_relation,
225                                                                                    true,
226                                                                                    0,
227                                                                                    ONCOMMIT_NOOP,
228                                                                                    reloptions,
229                                                                                    false,
230                                                                                    true);
231         Assert(toast_relid != InvalidOid);
232
233         /* make the toast relation visible, else heap_open will fail */
234         CommandCounterIncrement();
235
236         /* ShareLock is not really needed here, but take it anyway */
237         toast_rel = heap_open(toast_relid, ShareLock);
238
239         /*
240          * Create unique index on chunk_id, chunk_seq.
241          *
242          * NOTE: the normal TOAST access routines could actually function with a
243          * single-column index on chunk_id only. However, the slice access
244          * routines use both columns for faster access to an individual chunk. In
245          * addition, we want it to be unique as a check against the possibility of
246          * duplicate TOAST chunk OIDs. The index might also be a little more
247          * efficient this way, since btree isn't all that happy with large numbers
248          * of equal keys.
249          */
250
251         indexInfo = makeNode(IndexInfo);
252         indexInfo->ii_NumIndexAttrs = 2;
253         indexInfo->ii_KeyAttrNumbers[0] = 1;
254         indexInfo->ii_KeyAttrNumbers[1] = 2;
255         indexInfo->ii_Expressions = NIL;
256         indexInfo->ii_ExpressionsState = NIL;
257         indexInfo->ii_Predicate = NIL;
258         indexInfo->ii_PredicateState = NIL;
259         indexInfo->ii_ExclusionOps = NULL;
260         indexInfo->ii_ExclusionProcs = NULL;
261         indexInfo->ii_ExclusionStrats = NULL;
262         indexInfo->ii_Unique = true;
263         indexInfo->ii_ReadyForInserts = true;
264         indexInfo->ii_Concurrent = false;
265         indexInfo->ii_BrokenHotChain = false;
266
267         collationObjectId[0] = InvalidOid;
268         collationObjectId[1] = InvalidOid;
269
270         classObjectId[0] = OID_BTREE_OPS_OID;
271         classObjectId[1] = INT4_BTREE_OPS_OID;
272
273         coloptions[0] = 0;
274         coloptions[1] = 0;
275
276         index_create(toast_rel, toast_idxname, toastIndexOid,
277                                  indexInfo,
278                                  list_make2("chunk_id", "chunk_seq"),
279                                  BTREE_AM_OID,
280                                  rel->rd_rel->reltablespace,
281                                  collationObjectId, classObjectId, coloptions, (Datum) 0,
282                                  true, false, false, false,
283                                  true, false, false);
284
285         heap_close(toast_rel, NoLock);
286
287         /*
288          * Store the toast table's OID in the parent relation's pg_class row
289          */
290         class_rel = heap_open(RelationRelationId, RowExclusiveLock);
291
292         reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid));
293         if (!HeapTupleIsValid(reltup))
294                 elog(ERROR, "cache lookup failed for relation %u", relOid);
295
296         ((Form_pg_class) GETSTRUCT(reltup))->reltoastrelid = toast_relid;
297
298         if (!IsBootstrapProcessingMode())
299         {
300                 /* normal case, use a transactional update */
301                 simple_heap_update(class_rel, &reltup->t_self, reltup);
302
303                 /* Keep catalog indexes current */
304                 CatalogUpdateIndexes(class_rel, reltup);
305         }
306         else
307         {
308                 /* While bootstrapping, we cannot UPDATE, so overwrite in-place */
309                 heap_inplace_update(class_rel, reltup);
310         }
311
312         heap_freetuple(reltup);
313
314         heap_close(class_rel, RowExclusiveLock);
315
316         /*
317          * Register dependency from the toast table to the master, so that the
318          * toast table will be deleted if the master is.  Skip this in bootstrap
319          * mode.
320          */
321         if (!IsBootstrapProcessingMode())
322         {
323                 baseobject.classId = RelationRelationId;
324                 baseobject.objectId = relOid;
325                 baseobject.objectSubId = 0;
326                 toastobject.classId = RelationRelationId;
327                 toastobject.objectId = toast_relid;
328                 toastobject.objectSubId = 0;
329
330                 recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
331         }
332
333         /*
334          * Make changes visible
335          */
336         CommandCounterIncrement();
337
338         return true;
339 }
340
341 /*
342  * Check to see whether the table needs a TOAST table.  It does only if
343  * (1) there are any toastable attributes, and (2) the maximum length
344  * of a tuple could exceed TOAST_TUPLE_THRESHOLD.  (We don't want to
345  * create a toast table for something like "f1 varchar(20)".)
346  */
347 static bool
348 needs_toast_table(Relation rel)
349 {
350         int32           data_length = 0;
351         bool            maxlength_unknown = false;
352         bool            has_toastable_attrs = false;
353         TupleDesc       tupdesc;
354         Form_pg_attribute *att;
355         int32           tuple_length;
356         int                     i;
357
358         tupdesc = rel->rd_att;
359         att = tupdesc->attrs;
360
361         for (i = 0; i < tupdesc->natts; i++)
362         {
363                 if (att[i]->attisdropped)
364                         continue;
365                 data_length = att_align_nominal(data_length, att[i]->attalign);
366                 if (att[i]->attlen > 0)
367                 {
368                         /* Fixed-length types are never toastable */
369                         data_length += att[i]->attlen;
370                 }
371                 else
372                 {
373                         int32           maxlen = type_maximum_size(att[i]->atttypid,
374                                                                                                    att[i]->atttypmod);
375
376                         if (maxlen < 0)
377                                 maxlength_unknown = true;
378                         else
379                                 data_length += maxlen;
380                         if (att[i]->attstorage != 'p')
381                                 has_toastable_attrs = true;
382                 }
383         }
384         if (!has_toastable_attrs)
385                 return false;                   /* nothing to toast? */
386         if (maxlength_unknown)
387                 return true;                    /* any unlimited-length attrs? */
388         tuple_length = MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) +
389                                                         BITMAPLEN(tupdesc->natts)) +
390                 MAXALIGN(data_length);
391         return (tuple_length > TOAST_TUPLE_THRESHOLD);
392 }