OSDN Git Service

Remove cvs keywords from all files.
[pg-rex/syncrep.git] / src / backend / utils / cache / relcache.c
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  *        POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/utils/cache/relcache.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *              RelationCacheInitialize                 - initialize relcache (to empty)
18  *              RelationCacheInitializePhase2   - initialize shared-catalog entries
19  *              RelationCacheInitializePhase3   - finish initializing relcache
20  *              RelationIdGetRelation                   - get a reldesc by relation id
21  *              RelationClose                                   - close an open relation
22  *
23  * NOTES
24  *              The following code contains many undocumented hacks.  Please be
25  *              careful....
26  */
27 #include "postgres.h"
28
29 #include <sys/file.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32
33 #include "access/genam.h"
34 #include "access/reloptions.h"
35 #include "access/sysattr.h"
36 #include "access/transam.h"
37 #include "access/xact.h"
38 #include "catalog/catalog.h"
39 #include "catalog/index.h"
40 #include "catalog/indexing.h"
41 #include "catalog/namespace.h"
42 #include "catalog/pg_amop.h"
43 #include "catalog/pg_amproc.h"
44 #include "catalog/pg_attrdef.h"
45 #include "catalog/pg_authid.h"
46 #include "catalog/pg_auth_members.h"
47 #include "catalog/pg_constraint.h"
48 #include "catalog/pg_database.h"
49 #include "catalog/pg_namespace.h"
50 #include "catalog/pg_opclass.h"
51 #include "catalog/pg_operator.h"
52 #include "catalog/pg_proc.h"
53 #include "catalog/pg_rewrite.h"
54 #include "catalog/pg_tablespace.h"
55 #include "catalog/pg_trigger.h"
56 #include "catalog/pg_type.h"
57 #include "catalog/schemapg.h"
58 #include "catalog/storage.h"
59 #include "commands/trigger.h"
60 #include "miscadmin.h"
61 #include "optimizer/clauses.h"
62 #include "optimizer/planmain.h"
63 #include "optimizer/prep.h"
64 #include "optimizer/var.h"
65 #include "rewrite/rewriteDefine.h"
66 #include "storage/fd.h"
67 #include "storage/lmgr.h"
68 #include "storage/smgr.h"
69 #include "utils/array.h"
70 #include "utils/builtins.h"
71 #include "utils/fmgroids.h"
72 #include "utils/inval.h"
73 #include "utils/lsyscache.h"
74 #include "utils/memutils.h"
75 #include "utils/relcache.h"
76 #include "utils/relmapper.h"
77 #include "utils/resowner.h"
78 #include "utils/syscache.h"
79 #include "utils/tqual.h"
80
81
82 /*
83  *              name of relcache init file(s), used to speed up backend startup
84  */
85 #define RELCACHE_INIT_FILENAME  "pg_internal.init"
86
87 #define RELCACHE_INIT_FILEMAGIC         0x573265        /* version ID value */
88
89 /*
90  *              hardcoded tuple descriptors, generated by genbki.pl
91  */
92 static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
93 static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
94 static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
95 static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
96 static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
97 static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
98 static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
99 static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
100
101 /*
102  *              Hash tables that index the relation cache
103  *
104  *              We used to index the cache by both name and OID, but now there
105  *              is only an index by OID.
106  */
107 typedef struct relidcacheent
108 {
109         Oid                     reloid;
110         Relation        reldesc;
111 } RelIdCacheEnt;
112
113 static HTAB *RelationIdCache;
114
115 /*
116  * This flag is false until we have prepared the critical relcache entries
117  * that are needed to do indexscans on the tables read by relcache building.
118  */
119 bool            criticalRelcachesBuilt = false;
120
121 /*
122  * This flag is false until we have prepared the critical relcache entries
123  * for shared catalogs (which are the tables needed for login).
124  */
125 bool            criticalSharedRelcachesBuilt = false;
126
127 /*
128  * This counter counts relcache inval events received since backend startup
129  * (but only for rels that are actually in cache).      Presently, we use it only
130  * to detect whether data about to be written by write_relcache_init_file()
131  * might already be obsolete.
132  */
133 static long relcacheInvalsReceived = 0L;
134
135 /*
136  * This list remembers the OIDs of the non-shared relations cached in the
137  * database's local relcache init file.  Note that there is no corresponding
138  * list for the shared relcache init file, for reasons explained in the
139  * comments for RelationCacheInitFileRemove.
140  */
141 static List *initFileRelationIds = NIL;
142
143 /*
144  * This flag lets us optimize away work in AtEO(Sub)Xact_RelationCache().
145  */
146 static bool need_eoxact_work = false;
147
148
149 /*
150  *              macros to manipulate the lookup hashtables
151  */
152 #define RelationCacheInsert(RELATION)   \
153 do { \
154         RelIdCacheEnt *idhentry; bool found; \
155         idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
156                                                                                    (void *) &(RELATION->rd_id), \
157                                                                                    HASH_ENTER, &found); \
158         /* used to give notice if found -- now just keep quiet */ \
159         idhentry->reldesc = RELATION; \
160 } while(0)
161
162 #define RelationIdCacheLookup(ID, RELATION) \
163 do { \
164         RelIdCacheEnt *hentry; \
165         hentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
166                                                                                  (void *) &(ID), \
167                                                                                  HASH_FIND, NULL); \
168         if (hentry) \
169                 RELATION = hentry->reldesc; \
170         else \
171                 RELATION = NULL; \
172 } while(0)
173
174 #define RelationCacheDelete(RELATION) \
175 do { \
176         RelIdCacheEnt *idhentry; \
177         idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
178                                                                                    (void *) &(RELATION->rd_id), \
179                                                                                    HASH_REMOVE, NULL); \
180         if (idhentry == NULL) \
181                 elog(WARNING, "trying to delete a rd_id reldesc that does not exist"); \
182 } while(0)
183
184
185 /*
186  * Special cache for opclass-related information
187  *
188  * Note: only default operators and support procs get cached, ie, those with
189  * lefttype = righttype = opcintype.
190  */
191 typedef struct opclasscacheent
192 {
193         Oid                     opclassoid;             /* lookup key: OID of opclass */
194         bool            valid;                  /* set TRUE after successful fill-in */
195         StrategyNumber numStrats;       /* max # of strategies (from pg_am) */
196         StrategyNumber numSupport;      /* max # of support procs (from pg_am) */
197         Oid                     opcfamily;              /* OID of opclass's family */
198         Oid                     opcintype;              /* OID of opclass's declared input type */
199         Oid                *operatorOids;       /* strategy operators' OIDs */
200         RegProcedure *supportProcs; /* support procs */
201 } OpClassCacheEnt;
202
203 static HTAB *OpClassCache = NULL;
204
205
206 /* non-export function prototypes */
207
208 static void RelationDestroyRelation(Relation relation);
209 static void RelationClearRelation(Relation relation, bool rebuild);
210
211 static void RelationReloadIndexInfo(Relation relation);
212 static void RelationFlushRelation(Relation relation);
213 static bool load_relcache_init_file(bool shared);
214 static void write_relcache_init_file(bool shared);
215 static void write_item(const void *data, Size len, FILE *fp);
216
217 static void formrdesc(const char *relationName, Oid relationReltype,
218                   bool isshared, bool hasoids,
219                   int natts, const FormData_pg_attribute *attrs);
220
221 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK);
222 static Relation AllocateRelationDesc(Form_pg_class relp);
223 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
224 static void RelationBuildTupleDesc(Relation relation);
225 static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
226 static void RelationInitPhysicalAddr(Relation relation);
227 static void load_critical_index(Oid indexoid, Oid heapoid);
228 static TupleDesc GetPgClassDescriptor(void);
229 static TupleDesc GetPgIndexDescriptor(void);
230 static void AttrDefaultFetch(Relation relation);
231 static void CheckConstraintFetch(Relation relation);
232 static List *insert_ordered_oid(List *list, Oid datum);
233 static void IndexSupportInitialize(oidvector *indclass,
234                                            Oid *indexOperator,
235                                            RegProcedure *indexSupport,
236                                            Oid *opFamily,
237                                            Oid *opcInType,
238                                            StrategyNumber maxStrategyNumber,
239                                            StrategyNumber maxSupportNumber,
240                                            AttrNumber maxAttributeNumber);
241 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
242                                   StrategyNumber numStrats,
243                                   StrategyNumber numSupport);
244 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
245 static void unlink_initfile(const char *initfilename);
246
247
248 /*
249  *              ScanPgRelation
250  *
251  *              This is used by RelationBuildDesc to find a pg_class
252  *              tuple matching targetRelId.  The caller must hold at least
253  *              AccessShareLock on the target relid to prevent concurrent-update
254  *              scenarios --- else our SnapshotNow scan might fail to find any
255  *              version that it thinks is live.
256  *
257  *              NB: the returned tuple has been copied into palloc'd storage
258  *              and must eventually be freed with heap_freetuple.
259  */
260 static HeapTuple
261 ScanPgRelation(Oid targetRelId, bool indexOK)
262 {
263         HeapTuple       pg_class_tuple;
264         Relation        pg_class_desc;
265         SysScanDesc pg_class_scan;
266         ScanKeyData key[1];
267
268         /*
269          * If something goes wrong during backend startup, we might find ourselves
270          * trying to read pg_class before we've selected a database.  That ain't
271          * gonna work, so bail out with a useful error message.  If this happens,
272          * it probably means a relcache entry that needs to be nailed isn't.
273          */
274         if (!OidIsValid(MyDatabaseId))
275                 elog(FATAL, "cannot read pg_class without having selected a database");
276
277         /*
278          * form a scan key
279          */
280         ScanKeyInit(&key[0],
281                                 ObjectIdAttributeNumber,
282                                 BTEqualStrategyNumber, F_OIDEQ,
283                                 ObjectIdGetDatum(targetRelId));
284
285         /*
286          * Open pg_class and fetch a tuple.  Force heap scan if we haven't yet
287          * built the critical relcache entries (this includes initdb and startup
288          * without a pg_internal.init file).  The caller can also force a heap
289          * scan by setting indexOK == false.
290          */
291         pg_class_desc = heap_open(RelationRelationId, AccessShareLock);
292         pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
293                                                                            indexOK && criticalRelcachesBuilt,
294                                                                            SnapshotNow,
295                                                                            1, key);
296
297         pg_class_tuple = systable_getnext(pg_class_scan);
298
299         /*
300          * Must copy tuple before releasing buffer.
301          */
302         if (HeapTupleIsValid(pg_class_tuple))
303                 pg_class_tuple = heap_copytuple(pg_class_tuple);
304
305         /* all done */
306         systable_endscan(pg_class_scan);
307         heap_close(pg_class_desc, AccessShareLock);
308
309         return pg_class_tuple;
310 }
311
312 /*
313  *              AllocateRelationDesc
314  *
315  *              This is used to allocate memory for a new relation descriptor
316  *              and initialize the rd_rel field from the given pg_class tuple.
317  */
318 static Relation
319 AllocateRelationDesc(Form_pg_class relp)
320 {
321         Relation        relation;
322         MemoryContext oldcxt;
323         Form_pg_class relationForm;
324
325         /* Relcache entries must live in CacheMemoryContext */
326         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
327
328         /*
329          * allocate and zero space for new relation descriptor
330          */
331         relation = (Relation) palloc0(sizeof(RelationData));
332
333         /* make sure relation is marked as having no open file yet */
334         relation->rd_smgr = NULL;
335
336         /*
337          * Copy the relation tuple form
338          *
339          * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
340          * variable-length fields (relacl, reloptions) are NOT stored in the
341          * relcache --- there'd be little point in it, since we don't copy the
342          * tuple's nulls bitmap and hence wouldn't know if the values are valid.
343          * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
344          * it from the syscache if you need it.  The same goes for the original
345          * form of reloptions (however, we do store the parsed form of reloptions
346          * in rd_options).
347          */
348         relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
349
350         memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
351
352         /* initialize relation tuple form */
353         relation->rd_rel = relationForm;
354
355         /* and allocate attribute tuple form storage */
356         relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts,
357                                                                                            relationForm->relhasoids);
358         /* which we mark as a reference-counted tupdesc */
359         relation->rd_att->tdrefcount = 1;
360
361         MemoryContextSwitchTo(oldcxt);
362
363         return relation;
364 }
365
366 /*
367  * RelationParseRelOptions
368  *              Convert pg_class.reloptions into pre-parsed rd_options
369  *
370  * tuple is the real pg_class tuple (not rd_rel!) for relation
371  *
372  * Note: rd_rel and (if an index) rd_am must be valid already
373  */
374 static void
375 RelationParseRelOptions(Relation relation, HeapTuple tuple)
376 {
377         bytea      *options;
378
379         relation->rd_options = NULL;
380
381         /* Fall out if relkind should not have options */
382         switch (relation->rd_rel->relkind)
383         {
384                 case RELKIND_RELATION:
385                 case RELKIND_TOASTVALUE:
386                 case RELKIND_INDEX:
387                         break;
388                 default:
389                         return;
390         }
391
392         /*
393          * Fetch reloptions from tuple; have to use a hardwired descriptor because
394          * we might not have any other for pg_class yet (consider executing this
395          * code for pg_class itself)
396          */
397         options = extractRelOptions(tuple,
398                                                                 GetPgClassDescriptor(),
399                                                                 relation->rd_rel->relkind == RELKIND_INDEX ?
400                                                                 relation->rd_am->amoptions : InvalidOid);
401
402         /*
403          * Copy parsed data into CacheMemoryContext.  To guard against the
404          * possibility of leaks in the reloptions code, we want to do the actual
405          * parsing in the caller's memory context and copy the results into
406          * CacheMemoryContext after the fact.
407          */
408         if (options)
409         {
410                 relation->rd_options = MemoryContextAlloc(CacheMemoryContext,
411                                                                                                   VARSIZE(options));
412                 memcpy(relation->rd_options, options, VARSIZE(options));
413                 pfree(options);
414         }
415 }
416
417 /*
418  *              RelationBuildTupleDesc
419  *
420  *              Form the relation's tuple descriptor from information in
421  *              the pg_attribute, pg_attrdef & pg_constraint system catalogs.
422  */
423 static void
424 RelationBuildTupleDesc(Relation relation)
425 {
426         HeapTuple       pg_attribute_tuple;
427         Relation        pg_attribute_desc;
428         SysScanDesc pg_attribute_scan;
429         ScanKeyData skey[2];
430         int                     need;
431         TupleConstr *constr;
432         AttrDefault *attrdef = NULL;
433         int                     ndef = 0;
434
435         /* copy some fields from pg_class row to rd_att */
436         relation->rd_att->tdtypeid = relation->rd_rel->reltype;
437         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
438         relation->rd_att->tdhasoid = relation->rd_rel->relhasoids;
439
440         constr = (TupleConstr *) MemoryContextAlloc(CacheMemoryContext,
441                                                                                                 sizeof(TupleConstr));
442         constr->has_not_null = false;
443
444         /*
445          * Form a scan key that selects only user attributes (attnum > 0).
446          * (Eliminating system attribute rows at the index level is lots faster
447          * than fetching them.)
448          */
449         ScanKeyInit(&skey[0],
450                                 Anum_pg_attribute_attrelid,
451                                 BTEqualStrategyNumber, F_OIDEQ,
452                                 ObjectIdGetDatum(RelationGetRelid(relation)));
453         ScanKeyInit(&skey[1],
454                                 Anum_pg_attribute_attnum,
455                                 BTGreaterStrategyNumber, F_INT2GT,
456                                 Int16GetDatum(0));
457
458         /*
459          * Open pg_attribute and begin a scan.  Force heap scan if we haven't yet
460          * built the critical relcache entries (this includes initdb and startup
461          * without a pg_internal.init file).
462          */
463         pg_attribute_desc = heap_open(AttributeRelationId, AccessShareLock);
464         pg_attribute_scan = systable_beginscan(pg_attribute_desc,
465                                                                                    AttributeRelidNumIndexId,
466                                                                                    criticalRelcachesBuilt,
467                                                                                    SnapshotNow,
468                                                                                    2, skey);
469
470         /*
471          * add attribute data to relation->rd_att
472          */
473         need = relation->rd_rel->relnatts;
474
475         while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
476         {
477                 Form_pg_attribute attp;
478
479                 attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
480
481                 if (attp->attnum <= 0 ||
482                         attp->attnum > relation->rd_rel->relnatts)
483                         elog(ERROR, "invalid attribute number %d for %s",
484                                  attp->attnum, RelationGetRelationName(relation));
485
486                 memcpy(relation->rd_att->attrs[attp->attnum - 1],
487                            attp,
488                            ATTRIBUTE_FIXED_PART_SIZE);
489
490                 /* Update constraint/default info */
491                 if (attp->attnotnull)
492                         constr->has_not_null = true;
493
494                 if (attp->atthasdef)
495                 {
496                         if (attrdef == NULL)
497                                 attrdef = (AttrDefault *)
498                                         MemoryContextAllocZero(CacheMemoryContext,
499                                                                                    relation->rd_rel->relnatts *
500                                                                                    sizeof(AttrDefault));
501                         attrdef[ndef].adnum = attp->attnum;
502                         attrdef[ndef].adbin = NULL;
503                         ndef++;
504                 }
505                 need--;
506                 if (need == 0)
507                         break;
508         }
509
510         /*
511          * end the scan and close the attribute relation
512          */
513         systable_endscan(pg_attribute_scan);
514         heap_close(pg_attribute_desc, AccessShareLock);
515
516         if (need != 0)
517                 elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
518                          need, RelationGetRelid(relation));
519
520         /*
521          * The attcacheoff values we read from pg_attribute should all be -1
522          * ("unknown").  Verify this if assert checking is on.  They will be
523          * computed when and if needed during tuple access.
524          */
525 #ifdef USE_ASSERT_CHECKING
526         {
527                 int                     i;
528
529                 for (i = 0; i < relation->rd_rel->relnatts; i++)
530                         Assert(relation->rd_att->attrs[i]->attcacheoff == -1);
531         }
532 #endif
533
534         /*
535          * However, we can easily set the attcacheoff value for the first
536          * attribute: it must be zero.  This eliminates the need for special cases
537          * for attnum=1 that used to exist in fastgetattr() and index_getattr().
538          */
539         if (relation->rd_rel->relnatts > 0)
540                 relation->rd_att->attrs[0]->attcacheoff = 0;
541
542         /*
543          * Set up constraint/default info
544          */
545         if (constr->has_not_null || ndef > 0 || relation->rd_rel->relchecks)
546         {
547                 relation->rd_att->constr = constr;
548
549                 if (ndef > 0)                   /* DEFAULTs */
550                 {
551                         if (ndef < relation->rd_rel->relnatts)
552                                 constr->defval = (AttrDefault *)
553                                         repalloc(attrdef, ndef * sizeof(AttrDefault));
554                         else
555                                 constr->defval = attrdef;
556                         constr->num_defval = ndef;
557                         AttrDefaultFetch(relation);
558                 }
559                 else
560                         constr->num_defval = 0;
561
562                 if (relation->rd_rel->relchecks > 0)    /* CHECKs */
563                 {
564                         constr->num_check = relation->rd_rel->relchecks;
565                         constr->check = (ConstrCheck *)
566                                 MemoryContextAllocZero(CacheMemoryContext,
567                                                                         constr->num_check * sizeof(ConstrCheck));
568                         CheckConstraintFetch(relation);
569                 }
570                 else
571                         constr->num_check = 0;
572         }
573         else
574         {
575                 pfree(constr);
576                 relation->rd_att->constr = NULL;
577         }
578 }
579
580 /*
581  *              RelationBuildRuleLock
582  *
583  *              Form the relation's rewrite rules from information in
584  *              the pg_rewrite system catalog.
585  *
586  * Note: The rule parsetrees are potentially very complex node structures.
587  * To allow these trees to be freed when the relcache entry is flushed,
588  * we make a private memory context to hold the RuleLock information for
589  * each relcache entry that has associated rules.  The context is used
590  * just for rule info, not for any other subsidiary data of the relcache
591  * entry, because that keeps the update logic in RelationClearRelation()
592  * manageable.  The other subsidiary data structures are simple enough
593  * to be easy to free explicitly, anyway.
594  */
595 static void
596 RelationBuildRuleLock(Relation relation)
597 {
598         MemoryContext rulescxt;
599         MemoryContext oldcxt;
600         HeapTuple       rewrite_tuple;
601         Relation        rewrite_desc;
602         TupleDesc       rewrite_tupdesc;
603         SysScanDesc rewrite_scan;
604         ScanKeyData key;
605         RuleLock   *rulelock;
606         int                     numlocks;
607         RewriteRule **rules;
608         int                     maxlocks;
609
610         /*
611          * Make the private context.  Parameters are set on the assumption that
612          * it'll probably not contain much data.
613          */
614         rulescxt = AllocSetContextCreate(CacheMemoryContext,
615                                                                          RelationGetRelationName(relation),
616                                                                          ALLOCSET_SMALL_MINSIZE,
617                                                                          ALLOCSET_SMALL_INITSIZE,
618                                                                          ALLOCSET_SMALL_MAXSIZE);
619         relation->rd_rulescxt = rulescxt;
620
621         /*
622          * allocate an array to hold the rewrite rules (the array is extended if
623          * necessary)
624          */
625         maxlocks = 4;
626         rules = (RewriteRule **)
627                 MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
628         numlocks = 0;
629
630         /*
631          * form a scan key
632          */
633         ScanKeyInit(&key,
634                                 Anum_pg_rewrite_ev_class,
635                                 BTEqualStrategyNumber, F_OIDEQ,
636                                 ObjectIdGetDatum(RelationGetRelid(relation)));
637
638         /*
639          * open pg_rewrite and begin a scan
640          *
641          * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
642          * be reading the rules in name order, except possibly during
643          * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
644          * ensures that rules will be fired in name order.
645          */
646         rewrite_desc = heap_open(RewriteRelationId, AccessShareLock);
647         rewrite_tupdesc = RelationGetDescr(rewrite_desc);
648         rewrite_scan = systable_beginscan(rewrite_desc,
649                                                                           RewriteRelRulenameIndexId,
650                                                                           true, SnapshotNow,
651                                                                           1, &key);
652
653         while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
654         {
655                 Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
656                 bool            isnull;
657                 Datum           rule_datum;
658                 char       *rule_str;
659                 RewriteRule *rule;
660
661                 rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
662                                                                                                   sizeof(RewriteRule));
663
664                 rule->ruleId = HeapTupleGetOid(rewrite_tuple);
665
666                 rule->event = rewrite_form->ev_type - '0';
667                 rule->attrno = rewrite_form->ev_attr;
668                 rule->enabled = rewrite_form->ev_enabled;
669                 rule->isInstead = rewrite_form->is_instead;
670
671                 /*
672                  * Must use heap_getattr to fetch ev_action and ev_qual.  Also, the
673                  * rule strings are often large enough to be toasted.  To avoid
674                  * leaking memory in the caller's context, do the detoasting here so
675                  * we can free the detoasted version.
676                  */
677                 rule_datum = heap_getattr(rewrite_tuple,
678                                                                   Anum_pg_rewrite_ev_action,
679                                                                   rewrite_tupdesc,
680                                                                   &isnull);
681                 Assert(!isnull);
682                 rule_str = TextDatumGetCString(rule_datum);
683                 oldcxt = MemoryContextSwitchTo(rulescxt);
684                 rule->actions = (List *) stringToNode(rule_str);
685                 MemoryContextSwitchTo(oldcxt);
686                 pfree(rule_str);
687
688                 rule_datum = heap_getattr(rewrite_tuple,
689                                                                   Anum_pg_rewrite_ev_qual,
690                                                                   rewrite_tupdesc,
691                                                                   &isnull);
692                 Assert(!isnull);
693                 rule_str = TextDatumGetCString(rule_datum);
694                 oldcxt = MemoryContextSwitchTo(rulescxt);
695                 rule->qual = (Node *) stringToNode(rule_str);
696                 MemoryContextSwitchTo(oldcxt);
697                 pfree(rule_str);
698
699                 /*
700                  * We want the rule's table references to be checked as though by the
701                  * table owner, not the user referencing the rule.      Therefore, scan
702                  * through the rule's actions and set the checkAsUser field on all
703                  * rtable entries.      We have to look at the qual as well, in case it
704                  * contains sublinks.
705                  *
706                  * The reason for doing this when the rule is loaded, rather than when
707                  * it is stored, is that otherwise ALTER TABLE OWNER would have to
708                  * grovel through stored rules to update checkAsUser fields. Scanning
709                  * the rule tree during load is relatively cheap (compared to
710                  * constructing it in the first place), so we do it here.
711                  */
712                 setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
713                 setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
714
715                 if (numlocks >= maxlocks)
716                 {
717                         maxlocks *= 2;
718                         rules = (RewriteRule **)
719                                 repalloc(rules, sizeof(RewriteRule *) * maxlocks);
720                 }
721                 rules[numlocks++] = rule;
722         }
723
724         /*
725          * end the scan and close the attribute relation
726          */
727         systable_endscan(rewrite_scan);
728         heap_close(rewrite_desc, AccessShareLock);
729
730         /*
731          * there might not be any rules (if relhasrules is out-of-date)
732          */
733         if (numlocks == 0)
734         {
735                 relation->rd_rules = NULL;
736                 relation->rd_rulescxt = NULL;
737                 MemoryContextDelete(rulescxt);
738                 return;
739         }
740
741         /*
742          * form a RuleLock and insert into relation
743          */
744         rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
745         rulelock->numLocks = numlocks;
746         rulelock->rules = rules;
747
748         relation->rd_rules = rulelock;
749 }
750
751 /*
752  *              equalRuleLocks
753  *
754  *              Determine whether two RuleLocks are equivalent
755  *
756  *              Probably this should be in the rules code someplace...
757  */
758 static bool
759 equalRuleLocks(RuleLock *rlock1, RuleLock *rlock2)
760 {
761         int                     i;
762
763         /*
764          * As of 7.3 we assume the rule ordering is repeatable, because
765          * RelationBuildRuleLock should read 'em in a consistent order.  So just
766          * compare corresponding slots.
767          */
768         if (rlock1 != NULL)
769         {
770                 if (rlock2 == NULL)
771                         return false;
772                 if (rlock1->numLocks != rlock2->numLocks)
773                         return false;
774                 for (i = 0; i < rlock1->numLocks; i++)
775                 {
776                         RewriteRule *rule1 = rlock1->rules[i];
777                         RewriteRule *rule2 = rlock2->rules[i];
778
779                         if (rule1->ruleId != rule2->ruleId)
780                                 return false;
781                         if (rule1->event != rule2->event)
782                                 return false;
783                         if (rule1->attrno != rule2->attrno)
784                                 return false;
785                         if (rule1->enabled != rule2->enabled)
786                                 return false;
787                         if (rule1->isInstead != rule2->isInstead)
788                                 return false;
789                         if (!equal(rule1->qual, rule2->qual))
790                                 return false;
791                         if (!equal(rule1->actions, rule2->actions))
792                                 return false;
793                 }
794         }
795         else if (rlock2 != NULL)
796                 return false;
797         return true;
798 }
799
800
801 /*
802  *              RelationBuildDesc
803  *
804  *              Build a relation descriptor.  The caller must hold at least
805  *              AccessShareLock on the target relid.
806  *
807  *              The new descriptor is inserted into the hash table if insertIt is true.
808  *
809  *              Returns NULL if no pg_class row could be found for the given relid
810  *              (suggesting we are trying to access a just-deleted relation).
811  *              Any other error is reported via elog.
812  */
813 static Relation
814 RelationBuildDesc(Oid targetRelId, bool insertIt)
815 {
816         Relation        relation;
817         Oid                     relid;
818         HeapTuple       pg_class_tuple;
819         Form_pg_class relp;
820
821         /*
822          * find the tuple in pg_class corresponding to the given relation id
823          */
824         pg_class_tuple = ScanPgRelation(targetRelId, true);
825
826         /*
827          * if no such tuple exists, return NULL
828          */
829         if (!HeapTupleIsValid(pg_class_tuple))
830                 return NULL;
831
832         /*
833          * get information from the pg_class_tuple
834          */
835         relid = HeapTupleGetOid(pg_class_tuple);
836         relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
837         Assert(relid == targetRelId);
838
839         /*
840          * allocate storage for the relation descriptor, and copy pg_class_tuple
841          * to relation->rd_rel.
842          */
843         relation = AllocateRelationDesc(relp);
844
845         /*
846          * initialize the relation's relation id (relation->rd_id)
847          */
848         RelationGetRelid(relation) = relid;
849
850         /*
851          * normal relations are not nailed into the cache; nor can a pre-existing
852          * relation be new.  It could be temp though.  (Actually, it could be new
853          * too, but it's okay to forget that fact if forced to flush the entry.)
854          */
855         relation->rd_refcnt = 0;
856         relation->rd_isnailed = false;
857         relation->rd_createSubid = InvalidSubTransactionId;
858         relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
859         relation->rd_istemp = relation->rd_rel->relistemp;
860         if (!relation->rd_istemp)
861                 relation->rd_backend = InvalidBackendId;
862         else if (isTempOrToastNamespace(relation->rd_rel->relnamespace))
863                 relation->rd_backend = MyBackendId;
864         else
865         {
866                 /*
867                  * If it's a temporary table, but not one of ours, we have to use
868                  * the slow, grotty method to figure out the owning backend.
869                  */
870                 relation->rd_backend =
871                         GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
872                 Assert(relation->rd_backend != InvalidBackendId);
873         }
874
875         /*
876          * initialize the tuple descriptor (relation->rd_att).
877          */
878         RelationBuildTupleDesc(relation);
879
880         /*
881          * Fetch rules and triggers that affect this relation
882          */
883         if (relation->rd_rel->relhasrules)
884                 RelationBuildRuleLock(relation);
885         else
886         {
887                 relation->rd_rules = NULL;
888                 relation->rd_rulescxt = NULL;
889         }
890
891         if (relation->rd_rel->relhastriggers)
892                 RelationBuildTriggers(relation);
893         else
894                 relation->trigdesc = NULL;
895
896         /*
897          * if it's an index, initialize index-related information
898          */
899         if (OidIsValid(relation->rd_rel->relam))
900                 RelationInitIndexAccessInfo(relation);
901
902         /* extract reloptions if any */
903         RelationParseRelOptions(relation, pg_class_tuple);
904
905         /*
906          * initialize the relation lock manager information
907          */
908         RelationInitLockInfo(relation);         /* see lmgr.c */
909
910         /*
911          * initialize physical addressing information for the relation
912          */
913         RelationInitPhysicalAddr(relation);
914
915         /* make sure relation is marked as having no open file yet */
916         relation->rd_smgr = NULL;
917
918         /*
919          * now we can free the memory allocated for pg_class_tuple
920          */
921         heap_freetuple(pg_class_tuple);
922
923         /*
924          * Insert newly created relation into relcache hash table, if requested.
925          */
926         if (insertIt)
927                 RelationCacheInsert(relation);
928
929         /* It's fully valid */
930         relation->rd_isvalid = true;
931
932         return relation;
933 }
934
935 /*
936  * Initialize the physical addressing info (RelFileNode) for a relcache entry
937  *
938  * Note: at the physical level, relations in the pg_global tablespace must
939  * be treated as shared, even if relisshared isn't set.  Hence we do not
940  * look at relisshared here.
941  */
942 static void
943 RelationInitPhysicalAddr(Relation relation)
944 {
945         if (relation->rd_rel->reltablespace)
946                 relation->rd_node.spcNode = relation->rd_rel->reltablespace;
947         else
948                 relation->rd_node.spcNode = MyDatabaseTableSpace;
949         if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID)
950                 relation->rd_node.dbNode = InvalidOid;
951         else
952                 relation->rd_node.dbNode = MyDatabaseId;
953         if (relation->rd_rel->relfilenode)
954                 relation->rd_node.relNode = relation->rd_rel->relfilenode;
955         else
956         {
957                 /* Consult the relation mapper */
958                 relation->rd_node.relNode =
959                         RelationMapOidToFilenode(relation->rd_id,
960                                                                          relation->rd_rel->relisshared);
961                 if (!OidIsValid(relation->rd_node.relNode))
962                         elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
963                                  RelationGetRelationName(relation), relation->rd_id);
964         }
965 }
966
967 /*
968  * Initialize index-access-method support data for an index relation
969  */
970 void
971 RelationInitIndexAccessInfo(Relation relation)
972 {
973         HeapTuple       tuple;
974         Form_pg_am      aform;
975         Datum           indclassDatum;
976         Datum           indoptionDatum;
977         bool            isnull;
978         oidvector  *indclass;
979         int2vector *indoption;
980         MemoryContext indexcxt;
981         MemoryContext oldcontext;
982         int                     natts;
983         uint16          amstrategies;
984         uint16          amsupport;
985
986         /*
987          * Make a copy of the pg_index entry for the index.  Since pg_index
988          * contains variable-length and possibly-null fields, we have to do this
989          * honestly rather than just treating it as a Form_pg_index struct.
990          */
991         tuple = SearchSysCache1(INDEXRELID,
992                                                         ObjectIdGetDatum(RelationGetRelid(relation)));
993         if (!HeapTupleIsValid(tuple))
994                 elog(ERROR, "cache lookup failed for index %u",
995                          RelationGetRelid(relation));
996         oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
997         relation->rd_indextuple = heap_copytuple(tuple);
998         relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
999         MemoryContextSwitchTo(oldcontext);
1000         ReleaseSysCache(tuple);
1001
1002         /*
1003          * Make a copy of the pg_am entry for the index's access method
1004          */
1005         tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1006         if (!HeapTupleIsValid(tuple))
1007                 elog(ERROR, "cache lookup failed for access method %u",
1008                          relation->rd_rel->relam);
1009         aform = (Form_pg_am) MemoryContextAlloc(CacheMemoryContext, sizeof *aform);
1010         memcpy(aform, GETSTRUCT(tuple), sizeof *aform);
1011         ReleaseSysCache(tuple);
1012         relation->rd_am = aform;
1013
1014         natts = relation->rd_rel->relnatts;
1015         if (natts != relation->rd_index->indnatts)
1016                 elog(ERROR, "relnatts disagrees with indnatts for index %u",
1017                          RelationGetRelid(relation));
1018         amstrategies = aform->amstrategies;
1019         amsupport = aform->amsupport;
1020
1021         /*
1022          * Make the private context to hold index access info.  The reason we need
1023          * a context, and not just a couple of pallocs, is so that we won't leak
1024          * any subsidiary info attached to fmgr lookup records.
1025          *
1026          * Context parameters are set on the assumption that it'll probably not
1027          * contain much data.
1028          */
1029         indexcxt = AllocSetContextCreate(CacheMemoryContext,
1030                                                                          RelationGetRelationName(relation),
1031                                                                          ALLOCSET_SMALL_MINSIZE,
1032                                                                          ALLOCSET_SMALL_INITSIZE,
1033                                                                          ALLOCSET_SMALL_MAXSIZE);
1034         relation->rd_indexcxt = indexcxt;
1035
1036         /*
1037          * Allocate arrays to hold data
1038          */
1039         relation->rd_aminfo = (RelationAmInfo *)
1040                 MemoryContextAllocZero(indexcxt, sizeof(RelationAmInfo));
1041
1042         relation->rd_opfamily = (Oid *)
1043                 MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1044         relation->rd_opcintype = (Oid *)
1045                 MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1046
1047         if (amstrategies > 0)
1048                 relation->rd_operator = (Oid *)
1049                         MemoryContextAllocZero(indexcxt,
1050                                                                    natts * amstrategies * sizeof(Oid));
1051         else
1052                 relation->rd_operator = NULL;
1053
1054         if (amsupport > 0)
1055         {
1056                 int                     nsupport = natts * amsupport;
1057
1058                 relation->rd_support = (RegProcedure *)
1059                         MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1060                 relation->rd_supportinfo = (FmgrInfo *)
1061                         MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1062         }
1063         else
1064         {
1065                 relation->rd_support = NULL;
1066                 relation->rd_supportinfo = NULL;
1067         }
1068
1069         relation->rd_indoption = (int16 *)
1070                 MemoryContextAllocZero(indexcxt, natts * sizeof(int16));
1071
1072         /*
1073          * indclass cannot be referenced directly through the C struct, because it
1074          * comes after the variable-width indkey field.  Must extract the datum
1075          * the hard way...
1076          */
1077         indclassDatum = fastgetattr(relation->rd_indextuple,
1078                                                                 Anum_pg_index_indclass,
1079                                                                 GetPgIndexDescriptor(),
1080                                                                 &isnull);
1081         Assert(!isnull);
1082         indclass = (oidvector *) DatumGetPointer(indclassDatum);
1083
1084         /*
1085          * Fill the operator and support procedure OID arrays, as well as the info
1086          * about opfamilies and opclass input types.  (aminfo and supportinfo are
1087          * left as zeroes, and are filled on-the-fly when used)
1088          */
1089         IndexSupportInitialize(indclass,
1090                                                    relation->rd_operator, relation->rd_support,
1091                                                    relation->rd_opfamily, relation->rd_opcintype,
1092                                                    amstrategies, amsupport, natts);
1093
1094         /*
1095          * Similarly extract indoption and copy it to the cache entry
1096          */
1097         indoptionDatum = fastgetattr(relation->rd_indextuple,
1098                                                                  Anum_pg_index_indoption,
1099                                                                  GetPgIndexDescriptor(),
1100                                                                  &isnull);
1101         Assert(!isnull);
1102         indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1103         memcpy(relation->rd_indoption, indoption->values, natts * sizeof(int16));
1104
1105         /*
1106          * expressions, predicate, exclusion caches will be filled later
1107          */
1108         relation->rd_indexprs = NIL;
1109         relation->rd_indpred = NIL;
1110         relation->rd_exclops = NULL;
1111         relation->rd_exclprocs = NULL;
1112         relation->rd_exclstrats = NULL;
1113         relation->rd_amcache = NULL;
1114 }
1115
1116 /*
1117  * IndexSupportInitialize
1118  *              Initializes an index's cached opclass information,
1119  *              given the index's pg_index.indclass entry.
1120  *
1121  * Data is returned into *indexOperator, *indexSupport, *opFamily, and
1122  * *opcInType, which are arrays allocated by the caller.
1123  *
1124  * The caller also passes maxStrategyNumber, maxSupportNumber, and
1125  * maxAttributeNumber, since these indicate the size of the arrays
1126  * it has allocated --- but in practice these numbers must always match
1127  * those obtainable from the system catalog entries for the index and
1128  * access method.
1129  */
1130 static void
1131 IndexSupportInitialize(oidvector *indclass,
1132                                            Oid *indexOperator,
1133                                            RegProcedure *indexSupport,
1134                                            Oid *opFamily,
1135                                            Oid *opcInType,
1136                                            StrategyNumber maxStrategyNumber,
1137                                            StrategyNumber maxSupportNumber,
1138                                            AttrNumber maxAttributeNumber)
1139 {
1140         int                     attIndex;
1141
1142         for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1143         {
1144                 OpClassCacheEnt *opcentry;
1145
1146                 if (!OidIsValid(indclass->values[attIndex]))
1147                         elog(ERROR, "bogus pg_index tuple");
1148
1149                 /* look up the info for this opclass, using a cache */
1150                 opcentry = LookupOpclassInfo(indclass->values[attIndex],
1151                                                                          maxStrategyNumber,
1152                                                                          maxSupportNumber);
1153
1154                 /* copy cached data into relcache entry */
1155                 opFamily[attIndex] = opcentry->opcfamily;
1156                 opcInType[attIndex] = opcentry->opcintype;
1157                 if (maxStrategyNumber > 0)
1158                         memcpy(&indexOperator[attIndex * maxStrategyNumber],
1159                                    opcentry->operatorOids,
1160                                    maxStrategyNumber * sizeof(Oid));
1161                 if (maxSupportNumber > 0)
1162                         memcpy(&indexSupport[attIndex * maxSupportNumber],
1163                                    opcentry->supportProcs,
1164                                    maxSupportNumber * sizeof(RegProcedure));
1165         }
1166 }
1167
1168 /*
1169  * LookupOpclassInfo
1170  *
1171  * This routine maintains a per-opclass cache of the information needed
1172  * by IndexSupportInitialize().  This is more efficient than relying on
1173  * the catalog cache, because we can load all the info about a particular
1174  * opclass in a single indexscan of pg_amproc or pg_amop.
1175  *
1176  * The information from pg_am about expected range of strategy and support
1177  * numbers is passed in, rather than being looked up, mainly because the
1178  * caller will have it already.
1179  *
1180  * Note there is no provision for flushing the cache.  This is OK at the
1181  * moment because there is no way to ALTER any interesting properties of an
1182  * existing opclass --- all you can do is drop it, which will result in
1183  * a useless but harmless dead entry in the cache.      To support altering
1184  * opclass membership (not the same as opfamily membership!), we'd need to
1185  * be able to flush this cache as well as the contents of relcache entries
1186  * for indexes.
1187  */
1188 static OpClassCacheEnt *
1189 LookupOpclassInfo(Oid operatorClassOid,
1190                                   StrategyNumber numStrats,
1191                                   StrategyNumber numSupport)
1192 {
1193         OpClassCacheEnt *opcentry;
1194         bool            found;
1195         Relation        rel;
1196         SysScanDesc scan;
1197         ScanKeyData skey[3];
1198         HeapTuple       htup;
1199         bool            indexOK;
1200
1201         if (OpClassCache == NULL)
1202         {
1203                 /* First time through: initialize the opclass cache */
1204                 HASHCTL         ctl;
1205
1206                 MemSet(&ctl, 0, sizeof(ctl));
1207                 ctl.keysize = sizeof(Oid);
1208                 ctl.entrysize = sizeof(OpClassCacheEnt);
1209                 ctl.hash = oid_hash;
1210                 OpClassCache = hash_create("Operator class cache", 64,
1211                                                                    &ctl, HASH_ELEM | HASH_FUNCTION);
1212
1213                 /* Also make sure CacheMemoryContext exists */
1214                 if (!CacheMemoryContext)
1215                         CreateCacheMemoryContext();
1216         }
1217
1218         opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1219                                                                                            (void *) &operatorClassOid,
1220                                                                                            HASH_ENTER, &found);
1221
1222         if (!found)
1223         {
1224                 /* Need to allocate memory for new entry */
1225                 opcentry->valid = false;        /* until known OK */
1226                 opcentry->numStrats = numStrats;
1227                 opcentry->numSupport = numSupport;
1228
1229                 if (numStrats > 0)
1230                         opcentry->operatorOids = (Oid *)
1231                                 MemoryContextAllocZero(CacheMemoryContext,
1232                                                                            numStrats * sizeof(Oid));
1233                 else
1234                         opcentry->operatorOids = NULL;
1235
1236                 if (numSupport > 0)
1237                         opcentry->supportProcs = (RegProcedure *)
1238                                 MemoryContextAllocZero(CacheMemoryContext,
1239                                                                            numSupport * sizeof(RegProcedure));
1240                 else
1241                         opcentry->supportProcs = NULL;
1242         }
1243         else
1244         {
1245                 Assert(numStrats == opcentry->numStrats);
1246                 Assert(numSupport == opcentry->numSupport);
1247         }
1248
1249         /*
1250          * When testing for cache-flush hazards, we intentionally disable the
1251          * operator class cache and force reloading of the info on each call. This
1252          * is helpful because we want to test the case where a cache flush occurs
1253          * while we are loading the info, and it's very hard to provoke that if
1254          * this happens only once per opclass per backend.
1255          */
1256 #if defined(CLOBBER_CACHE_ALWAYS)
1257         opcentry->valid = false;
1258 #endif
1259
1260         if (opcentry->valid)
1261                 return opcentry;
1262
1263         /*
1264          * Need to fill in new entry.
1265          *
1266          * To avoid infinite recursion during startup, force heap scans if we're
1267          * looking up info for the opclasses used by the indexes we would like to
1268          * reference here.
1269          */
1270         indexOK = criticalRelcachesBuilt ||
1271                 (operatorClassOid != OID_BTREE_OPS_OID &&
1272                  operatorClassOid != INT2_BTREE_OPS_OID);
1273
1274         /*
1275          * We have to fetch the pg_opclass row to determine its opfamily and
1276          * opcintype, which are needed to look up the operators and functions.
1277          * It'd be convenient to use the syscache here, but that probably doesn't
1278          * work while bootstrapping.
1279          */
1280         ScanKeyInit(&skey[0],
1281                                 ObjectIdAttributeNumber,
1282                                 BTEqualStrategyNumber, F_OIDEQ,
1283                                 ObjectIdGetDatum(operatorClassOid));
1284         rel = heap_open(OperatorClassRelationId, AccessShareLock);
1285         scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1286                                                           SnapshotNow, 1, skey);
1287
1288         if (HeapTupleIsValid(htup = systable_getnext(scan)))
1289         {
1290                 Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1291
1292                 opcentry->opcfamily = opclassform->opcfamily;
1293                 opcentry->opcintype = opclassform->opcintype;
1294         }
1295         else
1296                 elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1297
1298         systable_endscan(scan);
1299         heap_close(rel, AccessShareLock);
1300
1301
1302         /*
1303          * Scan pg_amop to obtain operators for the opclass.  We only fetch the
1304          * default ones (those with lefttype = righttype = opcintype).
1305          */
1306         if (numStrats > 0)
1307         {
1308                 ScanKeyInit(&skey[0],
1309                                         Anum_pg_amop_amopfamily,
1310                                         BTEqualStrategyNumber, F_OIDEQ,
1311                                         ObjectIdGetDatum(opcentry->opcfamily));
1312                 ScanKeyInit(&skey[1],
1313                                         Anum_pg_amop_amoplefttype,
1314                                         BTEqualStrategyNumber, F_OIDEQ,
1315                                         ObjectIdGetDatum(opcentry->opcintype));
1316                 ScanKeyInit(&skey[2],
1317                                         Anum_pg_amop_amoprighttype,
1318                                         BTEqualStrategyNumber, F_OIDEQ,
1319                                         ObjectIdGetDatum(opcentry->opcintype));
1320                 rel = heap_open(AccessMethodOperatorRelationId, AccessShareLock);
1321                 scan = systable_beginscan(rel, AccessMethodStrategyIndexId, indexOK,
1322                                                                   SnapshotNow, 3, skey);
1323
1324                 while (HeapTupleIsValid(htup = systable_getnext(scan)))
1325                 {
1326                         Form_pg_amop amopform = (Form_pg_amop) GETSTRUCT(htup);
1327
1328                         if (amopform->amopstrategy <= 0 ||
1329                                 (StrategyNumber) amopform->amopstrategy > numStrats)
1330                                 elog(ERROR, "invalid amopstrategy number %d for opclass %u",
1331                                          amopform->amopstrategy, operatorClassOid);
1332                         opcentry->operatorOids[amopform->amopstrategy - 1] =
1333                                 amopform->amopopr;
1334                 }
1335
1336                 systable_endscan(scan);
1337                 heap_close(rel, AccessShareLock);
1338         }
1339
1340         /*
1341          * Scan pg_amproc to obtain support procs for the opclass.      We only fetch
1342          * the default ones (those with lefttype = righttype = opcintype).
1343          */
1344         if (numSupport > 0)
1345         {
1346                 ScanKeyInit(&skey[0],
1347                                         Anum_pg_amproc_amprocfamily,
1348                                         BTEqualStrategyNumber, F_OIDEQ,
1349                                         ObjectIdGetDatum(opcentry->opcfamily));
1350                 ScanKeyInit(&skey[1],
1351                                         Anum_pg_amproc_amproclefttype,
1352                                         BTEqualStrategyNumber, F_OIDEQ,
1353                                         ObjectIdGetDatum(opcentry->opcintype));
1354                 ScanKeyInit(&skey[2],
1355                                         Anum_pg_amproc_amprocrighttype,
1356                                         BTEqualStrategyNumber, F_OIDEQ,
1357                                         ObjectIdGetDatum(opcentry->opcintype));
1358                 rel = heap_open(AccessMethodProcedureRelationId, AccessShareLock);
1359                 scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1360                                                                   SnapshotNow, 3, skey);
1361
1362                 while (HeapTupleIsValid(htup = systable_getnext(scan)))
1363                 {
1364                         Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1365
1366                         if (amprocform->amprocnum <= 0 ||
1367                                 (StrategyNumber) amprocform->amprocnum > numSupport)
1368                                 elog(ERROR, "invalid amproc number %d for opclass %u",
1369                                          amprocform->amprocnum, operatorClassOid);
1370
1371                         opcentry->supportProcs[amprocform->amprocnum - 1] =
1372                                 amprocform->amproc;
1373                 }
1374
1375                 systable_endscan(scan);
1376                 heap_close(rel, AccessShareLock);
1377         }
1378
1379         opcentry->valid = true;
1380         return opcentry;
1381 }
1382
1383
1384 /*
1385  *              formrdesc
1386  *
1387  *              This is a special cut-down version of RelationBuildDesc(),
1388  *              used while initializing the relcache.
1389  *              The relation descriptor is built just from the supplied parameters,
1390  *              without actually looking at any system table entries.  We cheat
1391  *              quite a lot since we only need to work for a few basic system
1392  *              catalogs.
1393  *
1394  * formrdesc is currently used for: pg_database, pg_authid, pg_auth_members,
1395  * pg_class, pg_attribute, pg_proc, and pg_type
1396  * (see RelationCacheInitializePhase2/3).
1397  *
1398  * Note that these catalogs can't have constraints (except attnotnull),
1399  * default values, rules, or triggers, since we don't cope with any of that.
1400  * (Well, actually, this only matters for properties that need to be valid
1401  * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1402  * these properties matter then...)
1403  *
1404  * NOTE: we assume we are already switched into CacheMemoryContext.
1405  */
1406 static void
1407 formrdesc(const char *relationName, Oid relationReltype,
1408                   bool isshared, bool hasoids,
1409                   int natts, const FormData_pg_attribute *attrs)
1410 {
1411         Relation        relation;
1412         int                     i;
1413         bool            has_not_null;
1414
1415         /*
1416          * allocate new relation desc, clear all fields of reldesc
1417          */
1418         relation = (Relation) palloc0(sizeof(RelationData));
1419
1420         /* make sure relation is marked as having no open file yet */
1421         relation->rd_smgr = NULL;
1422
1423         /*
1424          * initialize reference count: 1 because it is nailed in cache
1425          */
1426         relation->rd_refcnt = 1;
1427
1428         /*
1429          * all entries built with this routine are nailed-in-cache; none are for
1430          * new or temp relations.
1431          */
1432         relation->rd_isnailed = true;
1433         relation->rd_createSubid = InvalidSubTransactionId;
1434         relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
1435         relation->rd_istemp = false;
1436         relation->rd_backend = InvalidBackendId;
1437
1438         /*
1439          * initialize relation tuple form
1440          *
1441          * The data we insert here is pretty incomplete/bogus, but it'll serve to
1442          * get us launched.  RelationCacheInitializePhase3() will read the real
1443          * data from pg_class and replace what we've done here.  Note in
1444          * particular that relowner is left as zero; this cues
1445          * RelationCacheInitializePhase3 that the real data isn't there yet.
1446          */
1447         relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
1448
1449         namestrcpy(&relation->rd_rel->relname, relationName);
1450         relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1451         relation->rd_rel->reltype = relationReltype;
1452
1453         /*
1454          * It's important to distinguish between shared and non-shared relations,
1455          * even at bootstrap time, to make sure we know where they are stored.
1456          */
1457         relation->rd_rel->relisshared = isshared;
1458         if (isshared)
1459                 relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1460
1461         /*
1462          * Likewise, we must know if a relation is temp ... but formrdesc is not
1463          * used for any temp relations.
1464          */
1465         relation->rd_rel->relistemp = false;
1466
1467         relation->rd_rel->relpages = 1;
1468         relation->rd_rel->reltuples = 1;
1469         relation->rd_rel->relkind = RELKIND_RELATION;
1470         relation->rd_rel->relhasoids = hasoids;
1471         relation->rd_rel->relnatts = (int16) natts;
1472
1473         /*
1474          * initialize attribute tuple form
1475          *
1476          * Unlike the case with the relation tuple, this data had better be right
1477          * because it will never be replaced.  The data comes from
1478          * src/include/catalog/ headers via genbki.pl.
1479          */
1480         relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
1481         relation->rd_att->tdrefcount = 1;       /* mark as refcounted */
1482
1483         relation->rd_att->tdtypeid = relationReltype;
1484         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
1485
1486         /*
1487          * initialize tuple desc info
1488          */
1489         has_not_null = false;
1490         for (i = 0; i < natts; i++)
1491         {
1492                 memcpy(relation->rd_att->attrs[i],
1493                            &attrs[i],
1494                            ATTRIBUTE_FIXED_PART_SIZE);
1495                 has_not_null |= attrs[i].attnotnull;
1496                 /* make sure attcacheoff is valid */
1497                 relation->rd_att->attrs[i]->attcacheoff = -1;
1498         }
1499
1500         /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1501         relation->rd_att->attrs[0]->attcacheoff = 0;
1502
1503         /* mark not-null status */
1504         if (has_not_null)
1505         {
1506                 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1507
1508                 constr->has_not_null = true;
1509                 relation->rd_att->constr = constr;
1510         }
1511
1512         /*
1513          * initialize relation id from info in att array (my, this is ugly)
1514          */
1515         RelationGetRelid(relation) = relation->rd_att->attrs[0]->attrelid;
1516
1517         /*
1518          * All relations made with formrdesc are mapped.  This is necessarily so
1519          * because there is no other way to know what filenode they currently
1520          * have.  In bootstrap mode, add them to the initial relation mapper data,
1521          * specifying that the initial filenode is the same as the OID.
1522          */
1523         relation->rd_rel->relfilenode = InvalidOid;
1524         if (IsBootstrapProcessingMode())
1525                 RelationMapUpdateMap(RelationGetRelid(relation),
1526                                                          RelationGetRelid(relation),
1527                                                          isshared, true);
1528
1529         /*
1530          * initialize the relation lock manager information
1531          */
1532         RelationInitLockInfo(relation);         /* see lmgr.c */
1533
1534         /*
1535          * initialize physical addressing information for the relation
1536          */
1537         RelationInitPhysicalAddr(relation);
1538
1539         /*
1540          * initialize the rel-has-index flag, using hardwired knowledge
1541          */
1542         if (IsBootstrapProcessingMode())
1543         {
1544                 /* In bootstrap mode, we have no indexes */
1545                 relation->rd_rel->relhasindex = false;
1546         }
1547         else
1548         {
1549                 /* Otherwise, all the rels formrdesc is used for have indexes */
1550                 relation->rd_rel->relhasindex = true;
1551         }
1552
1553         /*
1554          * add new reldesc to relcache
1555          */
1556         RelationCacheInsert(relation);
1557
1558         /* It's fully valid */
1559         relation->rd_isvalid = true;
1560 }
1561
1562
1563 /* ----------------------------------------------------------------
1564  *                               Relation Descriptor Lookup Interface
1565  * ----------------------------------------------------------------
1566  */
1567
1568 /*
1569  *              RelationIdGetRelation
1570  *
1571  *              Lookup a reldesc by OID; make one if not already in cache.
1572  *
1573  *              Returns NULL if no pg_class row could be found for the given relid
1574  *              (suggesting we are trying to access a just-deleted relation).
1575  *              Any other error is reported via elog.
1576  *
1577  *              NB: caller should already have at least AccessShareLock on the
1578  *              relation ID, else there are nasty race conditions.
1579  *
1580  *              NB: relation ref count is incremented, or set to 1 if new entry.
1581  *              Caller should eventually decrement count.  (Usually,
1582  *              that happens by calling RelationClose().)
1583  */
1584 Relation
1585 RelationIdGetRelation(Oid relationId)
1586 {
1587         Relation        rd;
1588
1589         /*
1590          * first try to find reldesc in the cache
1591          */
1592         RelationIdCacheLookup(relationId, rd);
1593
1594         if (RelationIsValid(rd))
1595         {
1596                 RelationIncrementReferenceCount(rd);
1597                 /* revalidate cache entry if necessary */
1598                 if (!rd->rd_isvalid)
1599                 {
1600                         /*
1601                          * Indexes only have a limited number of possible schema changes,
1602                          * and we don't want to use the full-blown procedure because it's
1603                          * a headache for indexes that reload itself depends on.
1604                          */
1605                         if (rd->rd_rel->relkind == RELKIND_INDEX)
1606                                 RelationReloadIndexInfo(rd);
1607                         else
1608                                 RelationClearRelation(rd, true);
1609                 }
1610                 return rd;
1611         }
1612
1613         /*
1614          * no reldesc in the cache, so have RelationBuildDesc() build one and add
1615          * it.
1616          */
1617         rd = RelationBuildDesc(relationId, true);
1618         if (RelationIsValid(rd))
1619                 RelationIncrementReferenceCount(rd);
1620         return rd;
1621 }
1622
1623 /* ----------------------------------------------------------------
1624  *                              cache invalidation support routines
1625  * ----------------------------------------------------------------
1626  */
1627
1628 /*
1629  * RelationIncrementReferenceCount
1630  *              Increments relation reference count.
1631  *
1632  * Note: bootstrap mode has its own weird ideas about relation refcount
1633  * behavior; we ought to fix it someday, but for now, just disable
1634  * reference count ownership tracking in bootstrap mode.
1635  */
1636 void
1637 RelationIncrementReferenceCount(Relation rel)
1638 {
1639         ResourceOwnerEnlargeRelationRefs(CurrentResourceOwner);
1640         rel->rd_refcnt += 1;
1641         if (!IsBootstrapProcessingMode())
1642                 ResourceOwnerRememberRelationRef(CurrentResourceOwner, rel);
1643 }
1644
1645 /*
1646  * RelationDecrementReferenceCount
1647  *              Decrements relation reference count.
1648  */
1649 void
1650 RelationDecrementReferenceCount(Relation rel)
1651 {
1652         Assert(rel->rd_refcnt > 0);
1653         rel->rd_refcnt -= 1;
1654         if (!IsBootstrapProcessingMode())
1655                 ResourceOwnerForgetRelationRef(CurrentResourceOwner, rel);
1656 }
1657
1658 /*
1659  * RelationClose - close an open relation
1660  *
1661  *      Actually, we just decrement the refcount.
1662  *
1663  *      NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
1664  *      will be freed as soon as their refcount goes to zero.  In combination
1665  *      with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
1666  *      to catch references to already-released relcache entries.  It slows
1667  *      things down quite a bit, however.
1668  */
1669 void
1670 RelationClose(Relation relation)
1671 {
1672         /* Note: no locking manipulations needed */
1673         RelationDecrementReferenceCount(relation);
1674
1675 #ifdef RELCACHE_FORCE_RELEASE
1676         if (RelationHasReferenceCountZero(relation) &&
1677                 relation->rd_createSubid == InvalidSubTransactionId &&
1678                 relation->rd_newRelfilenodeSubid == InvalidSubTransactionId)
1679                 RelationClearRelation(relation, false);
1680 #endif
1681 }
1682
1683 /*
1684  * RelationReloadIndexInfo - reload minimal information for an open index
1685  *
1686  *      This function is used only for indexes.  A relcache inval on an index
1687  *      can mean that its pg_class or pg_index row changed.  There are only
1688  *      very limited changes that are allowed to an existing index's schema,
1689  *      so we can update the relcache entry without a complete rebuild; which
1690  *      is fortunate because we can't rebuild an index entry that is "nailed"
1691  *      and/or in active use.  We support full replacement of the pg_class row,
1692  *      as well as updates of a few simple fields of the pg_index row.
1693  *
1694  *      We can't necessarily reread the catalog rows right away; we might be
1695  *      in a failed transaction when we receive the SI notification.  If so,
1696  *      RelationClearRelation just marks the entry as invalid by setting
1697  *      rd_isvalid to false.  This routine is called to fix the entry when it
1698  *      is next needed.
1699  *
1700  *      We assume that at the time we are called, we have at least AccessShareLock
1701  *      on the target index.  (Note: in the calls from RelationClearRelation,
1702  *      this is legitimate because we know the rel has positive refcount.)
1703  */
1704 static void
1705 RelationReloadIndexInfo(Relation relation)
1706 {
1707         bool            indexOK;
1708         HeapTuple       pg_class_tuple;
1709         Form_pg_class relp;
1710
1711         /* Should be called only for invalidated indexes */
1712         Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
1713                    !relation->rd_isvalid);
1714         /* Should be closed at smgr level */
1715         Assert(relation->rd_smgr == NULL);
1716
1717         /* Must free any AM cached data upon relcache flush */
1718         if (relation->rd_amcache)
1719                 pfree(relation->rd_amcache);
1720         relation->rd_amcache = NULL;
1721
1722         /*
1723          * If it's a shared index, we might be called before backend startup has
1724          * finished selecting a database, in which case we have no way to read
1725          * pg_class yet.  However, a shared index can never have any significant
1726          * schema updates, so it's okay to ignore the invalidation signal.  Just
1727          * mark it valid and return without doing anything more.
1728          */
1729         if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
1730         {
1731                 relation->rd_isvalid = true;
1732                 return;
1733         }
1734
1735         /*
1736          * Read the pg_class row
1737          *
1738          * Don't try to use an indexscan of pg_class_oid_index to reload the info
1739          * for pg_class_oid_index ...
1740          */
1741         indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
1742         pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK);
1743         if (!HeapTupleIsValid(pg_class_tuple))
1744                 elog(ERROR, "could not find pg_class tuple for index %u",
1745                          RelationGetRelid(relation));
1746         relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1747         memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
1748         /* Reload reloptions in case they changed */
1749         if (relation->rd_options)
1750                 pfree(relation->rd_options);
1751         RelationParseRelOptions(relation, pg_class_tuple);
1752         /* done with pg_class tuple */
1753         heap_freetuple(pg_class_tuple);
1754         /* We must recalculate physical address in case it changed */
1755         RelationInitPhysicalAddr(relation);
1756
1757         /*
1758          * For a non-system index, there are fields of the pg_index row that are
1759          * allowed to change, so re-read that row and update the relcache entry.
1760          * Most of the info derived from pg_index (such as support function lookup
1761          * info) cannot change, and indeed the whole point of this routine is to
1762          * update the relcache entry without clobbering that data; so wholesale
1763          * replacement is not appropriate.
1764          */
1765         if (!IsSystemRelation(relation))
1766         {
1767                 HeapTuple       tuple;
1768                 Form_pg_index index;
1769
1770                 tuple = SearchSysCache1(INDEXRELID,
1771                                                                 ObjectIdGetDatum(RelationGetRelid(relation)));
1772                 if (!HeapTupleIsValid(tuple))
1773                         elog(ERROR, "cache lookup failed for index %u",
1774                                  RelationGetRelid(relation));
1775                 index = (Form_pg_index) GETSTRUCT(tuple);
1776
1777                 relation->rd_index->indisvalid = index->indisvalid;
1778                 relation->rd_index->indcheckxmin = index->indcheckxmin;
1779                 relation->rd_index->indisready = index->indisready;
1780                 HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data,
1781                                                            HeapTupleHeaderGetXmin(tuple->t_data));
1782
1783                 ReleaseSysCache(tuple);
1784         }
1785
1786         /* Okay, now it's valid again */
1787         relation->rd_isvalid = true;
1788 }
1789
1790 /*
1791  * RelationDestroyRelation
1792  *
1793  *      Physically delete a relation cache entry and all subsidiary data.
1794  *      Caller must already have unhooked the entry from the hash table.
1795  */
1796 static void
1797 RelationDestroyRelation(Relation relation)
1798 {
1799         Assert(RelationHasReferenceCountZero(relation));
1800
1801         /*
1802          * Make sure smgr and lower levels close the relation's files, if they
1803          * weren't closed already.  (This was probably done by caller, but let's
1804          * just be real sure.)
1805          */
1806         RelationCloseSmgr(relation);
1807
1808         /*
1809          * Free all the subsidiary data structures of the relcache entry, then the
1810          * entry itself.
1811          */
1812         if (relation->rd_rel)
1813                 pfree(relation->rd_rel);
1814         /* can't use DecrTupleDescRefCount here */
1815         Assert(relation->rd_att->tdrefcount > 0);
1816         if (--relation->rd_att->tdrefcount == 0)
1817                 FreeTupleDesc(relation->rd_att);
1818         list_free(relation->rd_indexlist);
1819         bms_free(relation->rd_indexattr);
1820         FreeTriggerDesc(relation->trigdesc);
1821         if (relation->rd_options)
1822                 pfree(relation->rd_options);
1823         if (relation->rd_indextuple)
1824                 pfree(relation->rd_indextuple);
1825         if (relation->rd_am)
1826                 pfree(relation->rd_am);
1827         if (relation->rd_indexcxt)
1828                 MemoryContextDelete(relation->rd_indexcxt);
1829         if (relation->rd_rulescxt)
1830                 MemoryContextDelete(relation->rd_rulescxt);
1831         pfree(relation);
1832 }
1833
1834 /*
1835  * RelationClearRelation
1836  *
1837  *       Physically blow away a relation cache entry, or reset it and rebuild
1838  *       it from scratch (that is, from catalog entries).  The latter path is
1839  *       used when we are notified of a change to an open relation (one with
1840  *       refcount > 0).
1841  *
1842  *       NB: when rebuilding, we'd better hold some lock on the relation,
1843  *       else the catalog data we need to read could be changing under us.
1844  *       Also, a rel to be rebuilt had better have refcnt > 0.  This is because
1845  *       an sinval reset could happen while we're accessing the catalogs, and
1846  *       the rel would get blown away underneath us by RelationCacheInvalidate
1847  *       if it has zero refcnt.
1848  *
1849  *       The "rebuild" parameter is redundant in current usage because it has
1850  *       to match the relation's refcnt status, but we keep it as a crosscheck
1851  *       that we're doing what the caller expects.
1852  */
1853 static void
1854 RelationClearRelation(Relation relation, bool rebuild)
1855 {
1856         /*
1857          * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
1858          * course it would be a bad idea to blow away one with nonzero refcnt.
1859          */
1860         Assert(rebuild ?
1861                    !RelationHasReferenceCountZero(relation) :
1862                    RelationHasReferenceCountZero(relation));
1863
1864         /*
1865          * Make sure smgr and lower levels close the relation's files, if they
1866          * weren't closed already.  If the relation is not getting deleted, the
1867          * next smgr access should reopen the files automatically.      This ensures
1868          * that the low-level file access state is updated after, say, a vacuum
1869          * truncation.
1870          */
1871         RelationCloseSmgr(relation);
1872
1873         /*
1874          * Never, never ever blow away a nailed-in system relation, because we'd
1875          * be unable to recover.  However, we must redo RelationInitPhysicalAddr
1876          * in case it is a mapped relation whose mapping changed.
1877          *
1878          * If it's a nailed index, then we need to re-read the pg_class row to see
1879          * if its relfilenode changed.  We can't necessarily do that here, because
1880          * we might be in a failed transaction.  We assume it's okay to do it if
1881          * there are open references to the relcache entry (cf notes for
1882          * AtEOXact_RelationCache).  Otherwise just mark the entry as possibly
1883          * invalid, and it'll be fixed when next opened.
1884          */
1885         if (relation->rd_isnailed)
1886         {
1887                 RelationInitPhysicalAddr(relation);
1888
1889                 if (relation->rd_rel->relkind == RELKIND_INDEX)
1890                 {
1891                         relation->rd_isvalid = false;           /* needs to be revalidated */
1892                         if (relation->rd_refcnt > 1)
1893                                 RelationReloadIndexInfo(relation);
1894                 }
1895                 return;
1896         }
1897
1898         /*
1899          * Even non-system indexes should not be blown away if they are open and
1900          * have valid index support information.  This avoids problems with active
1901          * use of the index support information.  As with nailed indexes, we
1902          * re-read the pg_class row to handle possible physical relocation of the
1903          * index, and we check for pg_index updates too.
1904          */
1905         if (relation->rd_rel->relkind == RELKIND_INDEX &&
1906                 relation->rd_refcnt > 0 &&
1907                 relation->rd_indexcxt != NULL)
1908         {
1909                 relation->rd_isvalid = false;   /* needs to be revalidated */
1910                 RelationReloadIndexInfo(relation);
1911                 return;
1912         }
1913
1914         /* Mark it invalid until we've finished rebuild */
1915         relation->rd_isvalid = false;
1916
1917         /*
1918          * If we're really done with the relcache entry, blow it away. But if
1919          * someone is still using it, reconstruct the whole deal without moving
1920          * the physical RelationData record (so that the someone's pointer is
1921          * still valid).
1922          */
1923         if (!rebuild)
1924         {
1925                 /* Remove it from the hash table */
1926                 RelationCacheDelete(relation);
1927
1928                 /* And release storage */
1929                 RelationDestroyRelation(relation);
1930         }
1931         else
1932         {
1933                 /*
1934                  * Our strategy for rebuilding an open relcache entry is to build a
1935                  * new entry from scratch, swap its contents with the old entry, and
1936                  * finally delete the new entry (along with any infrastructure swapped
1937                  * over from the old entry).  This is to avoid trouble in case an
1938                  * error causes us to lose control partway through.  The old entry
1939                  * will still be marked !rd_isvalid, so we'll try to rebuild it again
1940                  * on next access.      Meanwhile it's not any less valid than it was
1941                  * before, so any code that might expect to continue accessing it
1942                  * isn't hurt by the rebuild failure.  (Consider for example a
1943                  * subtransaction that ALTERs a table and then gets cancelled partway
1944                  * through the cache entry rebuild.  The outer transaction should
1945                  * still see the not-modified cache entry as valid.)  The worst
1946                  * consequence of an error is leaking the necessarily-unreferenced new
1947                  * entry, and this shouldn't happen often enough for that to be a big
1948                  * problem.
1949                  *
1950                  * When rebuilding an open relcache entry, we must preserve ref count,
1951                  * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state.  Also
1952                  * attempt to preserve the pg_class entry (rd_rel), tupledesc, and
1953                  * rewrite-rule substructures in place, because various places assume
1954                  * that these structures won't move while they are working with an
1955                  * open relcache entry.  (Note: the refcount mechanism for tupledescs
1956                  * might someday allow us to remove this hack for the tupledesc.)
1957                  *
1958                  * Note that this process does not touch CurrentResourceOwner; which
1959                  * is good because whatever ref counts the entry may have do not
1960                  * necessarily belong to that resource owner.
1961                  */
1962                 Relation        newrel;
1963                 Oid                     save_relid = RelationGetRelid(relation);
1964                 bool            keep_tupdesc;
1965                 bool            keep_rules;
1966
1967                 /* Build temporary entry, but don't link it into hashtable */
1968                 newrel = RelationBuildDesc(save_relid, false);
1969                 if (newrel == NULL)
1970                 {
1971                         /* Should only get here if relation was deleted */
1972                         RelationCacheDelete(relation);
1973                         RelationDestroyRelation(relation);
1974                         elog(ERROR, "relation %u deleted while still in use", save_relid);
1975                 }
1976
1977                 keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
1978                 keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
1979
1980                 /*
1981                  * Perform swapping of the relcache entry contents.  Within this
1982                  * process the old entry is momentarily invalid, so there *must* be no
1983                  * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
1984                  * all-in-line code for safety.
1985                  *
1986                  * Since the vast majority of fields should be swapped, our method is
1987                  * to swap the whole structures and then re-swap those few fields we
1988                  * didn't want swapped.
1989                  */
1990 #define SWAPFIELD(fldtype, fldname) \
1991                 do { \
1992                         fldtype _tmp = newrel->fldname; \
1993                         newrel->fldname = relation->fldname; \
1994                         relation->fldname = _tmp; \
1995                 } while (0)
1996
1997                 /* swap all Relation struct fields */
1998                 {
1999                         RelationData tmpstruct;
2000
2001                         memcpy(&tmpstruct, newrel, sizeof(RelationData));
2002                         memcpy(newrel, relation, sizeof(RelationData));
2003                         memcpy(relation, &tmpstruct, sizeof(RelationData));
2004                 }
2005
2006                 /* rd_smgr must not be swapped, due to back-links from smgr level */
2007                 SWAPFIELD(SMgrRelation, rd_smgr);
2008                 /* rd_refcnt must be preserved */
2009                 SWAPFIELD(int, rd_refcnt);
2010                 /* isnailed shouldn't change */
2011                 Assert(newrel->rd_isnailed == relation->rd_isnailed);
2012                 /* creation sub-XIDs must be preserved */
2013                 SWAPFIELD(SubTransactionId, rd_createSubid);
2014                 SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
2015                 /* un-swap rd_rel pointers, swap contents instead */
2016                 SWAPFIELD(Form_pg_class, rd_rel);
2017                 /* ... but actually, we don't have to update newrel->rd_rel */
2018                 memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2019                 /* preserve old tupledesc and rules if no logical change */
2020                 if (keep_tupdesc)
2021                         SWAPFIELD(TupleDesc, rd_att);
2022                 if (keep_rules)
2023                 {
2024                         SWAPFIELD(RuleLock *, rd_rules);
2025                         SWAPFIELD(MemoryContext, rd_rulescxt);
2026                 }
2027                 /* toast OID override must be preserved */
2028                 SWAPFIELD(Oid, rd_toastoid);
2029                 /* pgstat_info must be preserved */
2030                 SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2031
2032 #undef SWAPFIELD
2033
2034                 /* And now we can throw away the temporary entry */
2035                 RelationDestroyRelation(newrel);
2036         }
2037 }
2038
2039 /*
2040  * RelationFlushRelation
2041  *
2042  *       Rebuild the relation if it is open (refcount > 0), else blow it away.
2043  */
2044 static void
2045 RelationFlushRelation(Relation relation)
2046 {
2047         if (relation->rd_createSubid != InvalidSubTransactionId ||
2048                 relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
2049         {
2050                 /*
2051                  * New relcache entries are always rebuilt, not flushed; else we'd
2052                  * forget the "new" status of the relation, which is a useful
2053                  * optimization to have.  Ditto for the new-relfilenode status.
2054                  *
2055                  * The rel could have zero refcnt here, so temporarily increment the
2056                  * refcnt to ensure it's safe to rebuild it.  We can assume that the
2057                  * current transaction has some lock on the rel already.
2058                  */
2059                 RelationIncrementReferenceCount(relation);
2060                 RelationClearRelation(relation, true);
2061                 RelationDecrementReferenceCount(relation);
2062         }
2063         else
2064         {
2065                 /*
2066                  * Pre-existing rels can be dropped from the relcache if not open.
2067                  */
2068                 bool            rebuild = !RelationHasReferenceCountZero(relation);
2069
2070                 RelationClearRelation(relation, rebuild);
2071         }
2072 }
2073
2074 /*
2075  * RelationForgetRelation - unconditionally remove a relcache entry
2076  *
2077  *                 External interface for destroying a relcache entry when we
2078  *                 drop the relation.
2079  */
2080 void
2081 RelationForgetRelation(Oid rid)
2082 {
2083         Relation        relation;
2084
2085         RelationIdCacheLookup(rid, relation);
2086
2087         if (!PointerIsValid(relation))
2088                 return;                                 /* not in cache, nothing to do */
2089
2090         if (!RelationHasReferenceCountZero(relation))
2091                 elog(ERROR, "relation %u is still open", rid);
2092
2093         /* Unconditionally destroy the relcache entry */
2094         RelationClearRelation(relation, false);
2095 }
2096
2097 /*
2098  *              RelationCacheInvalidateEntry
2099  *
2100  *              This routine is invoked for SI cache flush messages.
2101  *
2102  * Any relcache entry matching the relid must be flushed.  (Note: caller has
2103  * already determined that the relid belongs to our database or is a shared
2104  * relation.)
2105  *
2106  * We used to skip local relations, on the grounds that they could
2107  * not be targets of cross-backend SI update messages; but it seems
2108  * safer to process them, so that our *own* SI update messages will
2109  * have the same effects during CommandCounterIncrement for both
2110  * local and nonlocal relations.
2111  */
2112 void
2113 RelationCacheInvalidateEntry(Oid relationId)
2114 {
2115         Relation        relation;
2116
2117         RelationIdCacheLookup(relationId, relation);
2118
2119         if (PointerIsValid(relation))
2120         {
2121                 relcacheInvalsReceived++;
2122                 RelationFlushRelation(relation);
2123         }
2124 }
2125
2126 /*
2127  * RelationCacheInvalidate
2128  *       Blow away cached relation descriptors that have zero reference counts,
2129  *       and rebuild those with positive reference counts.      Also reset the smgr
2130  *       relation cache and re-read relation mapping data.
2131  *
2132  *       This is currently used only to recover from SI message buffer overflow,
2133  *       so we do not touch new-in-transaction relations; they cannot be targets
2134  *       of cross-backend SI updates (and our own updates now go through a
2135  *       separate linked list that isn't limited by the SI message buffer size).
2136  *       Likewise, we need not discard new-relfilenode-in-transaction hints,
2137  *       since any invalidation of those would be a local event.
2138  *
2139  *       We do this in two phases: the first pass deletes deletable items, and
2140  *       the second one rebuilds the rebuildable items.  This is essential for
2141  *       safety, because hash_seq_search only copes with concurrent deletion of
2142  *       the element it is currently visiting.  If a second SI overflow were to
2143  *       occur while we are walking the table, resulting in recursive entry to
2144  *       this routine, we could crash because the inner invocation blows away
2145  *       the entry next to be visited by the outer scan.  But this way is OK,
2146  *       because (a) during the first pass we won't process any more SI messages,
2147  *       so hash_seq_search will complete safely; (b) during the second pass we
2148  *       only hold onto pointers to nondeletable entries.
2149  *
2150  *       The two-phase approach also makes it easy to ensure that we process
2151  *       nailed-in-cache indexes before other nondeletable items, and that we
2152  *       process pg_class_oid_index first of all.  In scenarios where a nailed
2153  *       index has been given a new relfilenode, we have to detect that update
2154  *       before the nailed index is used in reloading any other relcache entry.
2155  */
2156 void
2157 RelationCacheInvalidate(void)
2158 {
2159         HASH_SEQ_STATUS status;
2160         RelIdCacheEnt *idhentry;
2161         Relation        relation;
2162         List       *rebuildFirstList = NIL;
2163         List       *rebuildList = NIL;
2164         ListCell   *l;
2165
2166         /* Phase 1 */
2167         hash_seq_init(&status, RelationIdCache);
2168
2169         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2170         {
2171                 relation = idhentry->reldesc;
2172
2173                 /* Must close all smgr references to avoid leaving dangling ptrs */
2174                 RelationCloseSmgr(relation);
2175
2176                 /* Ignore new relations, since they are never SI targets */
2177                 if (relation->rd_createSubid != InvalidSubTransactionId)
2178                         continue;
2179
2180                 relcacheInvalsReceived++;
2181
2182                 if (RelationHasReferenceCountZero(relation))
2183                 {
2184                         /* Delete this entry immediately */
2185                         Assert(!relation->rd_isnailed);
2186                         RelationClearRelation(relation, false);
2187                 }
2188                 else
2189                 {
2190                         /*
2191                          * Add this entry to list of stuff to rebuild in second pass.
2192                          * pg_class_oid_index goes on the front of rebuildFirstList, other
2193                          * nailed indexes on the back, and everything else into
2194                          * rebuildList (in no particular order).
2195                          */
2196                         if (relation->rd_isnailed &&
2197                                 relation->rd_rel->relkind == RELKIND_INDEX)
2198                         {
2199                                 if (RelationGetRelid(relation) == ClassOidIndexId)
2200                                         rebuildFirstList = lcons(relation, rebuildFirstList);
2201                                 else
2202                                         rebuildFirstList = lappend(rebuildFirstList, relation);
2203                         }
2204                         else
2205                                 rebuildList = lcons(relation, rebuildList);
2206                 }
2207         }
2208
2209         /*
2210          * Now zap any remaining smgr cache entries.  This must happen before we
2211          * start to rebuild entries, since that may involve catalog fetches which
2212          * will re-open catalog files.
2213          */
2214         smgrcloseall();
2215
2216         /*
2217          * Reload relation mapping data before starting to reconstruct cache.
2218          */
2219         RelationMapInvalidateAll();
2220
2221         /* Phase 2: rebuild the items found to need rebuild in phase 1 */
2222         foreach(l, rebuildFirstList)
2223         {
2224                 relation = (Relation) lfirst(l);
2225                 RelationClearRelation(relation, true);
2226         }
2227         list_free(rebuildFirstList);
2228         foreach(l, rebuildList)
2229         {
2230                 relation = (Relation) lfirst(l);
2231                 RelationClearRelation(relation, true);
2232         }
2233         list_free(rebuildList);
2234 }
2235
2236 /*
2237  * RelationCloseSmgrByOid - close a relcache entry's smgr link
2238  *
2239  * Needed in some cases where we are changing a relation's physical mapping.
2240  * The link will be automatically reopened on next use.
2241  */
2242 void
2243 RelationCloseSmgrByOid(Oid relationId)
2244 {
2245         Relation        relation;
2246
2247         RelationIdCacheLookup(relationId, relation);
2248
2249         if (!PointerIsValid(relation))
2250                 return;                                 /* not in cache, nothing to do */
2251
2252         RelationCloseSmgr(relation);
2253 }
2254
2255 /*
2256  * AtEOXact_RelationCache
2257  *
2258  *      Clean up the relcache at main-transaction commit or abort.
2259  *
2260  * Note: this must be called *before* processing invalidation messages.
2261  * In the case of abort, we don't want to try to rebuild any invalidated
2262  * cache entries (since we can't safely do database accesses).  Therefore
2263  * we must reset refcnts before handling pending invalidations.
2264  *
2265  * As of PostgreSQL 8.1, relcache refcnts should get released by the
2266  * ResourceOwner mechanism.  This routine just does a debugging
2267  * cross-check that no pins remain.  However, we also need to do special
2268  * cleanup when the current transaction created any relations or made use
2269  * of forced index lists.
2270  */
2271 void
2272 AtEOXact_RelationCache(bool isCommit)
2273 {
2274         HASH_SEQ_STATUS status;
2275         RelIdCacheEnt *idhentry;
2276
2277         /*
2278          * To speed up transaction exit, we want to avoid scanning the relcache
2279          * unless there is actually something for this routine to do.  Other than
2280          * the debug-only Assert checks, most transactions don't create any work
2281          * for us to do here, so we keep a static flag that gets set if there is
2282          * anything to do.      (Currently, this means either a relation is created in
2283          * the current xact, or one is given a new relfilenode, or an index list
2284          * is forced.)  For simplicity, the flag remains set till end of top-level
2285          * transaction, even though we could clear it at subtransaction end in
2286          * some cases.
2287          */
2288         if (!need_eoxact_work
2289 #ifdef USE_ASSERT_CHECKING
2290                 && !assert_enabled
2291 #endif
2292                 )
2293                 return;
2294
2295         hash_seq_init(&status, RelationIdCache);
2296
2297         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2298         {
2299                 Relation        relation = idhentry->reldesc;
2300
2301                 /*
2302                  * The relcache entry's ref count should be back to its normal
2303                  * not-in-a-transaction state: 0 unless it's nailed in cache.
2304                  *
2305                  * In bootstrap mode, this is NOT true, so don't check it --- the
2306                  * bootstrap code expects relations to stay open across start/commit
2307                  * transaction calls.  (That seems bogus, but it's not worth fixing.)
2308                  */
2309 #ifdef USE_ASSERT_CHECKING
2310                 if (!IsBootstrapProcessingMode())
2311                 {
2312                         int                     expected_refcnt;
2313
2314                         expected_refcnt = relation->rd_isnailed ? 1 : 0;
2315                         Assert(relation->rd_refcnt == expected_refcnt);
2316                 }
2317 #endif
2318
2319                 /*
2320                  * Is it a relation created in the current transaction?
2321                  *
2322                  * During commit, reset the flag to zero, since we are now out of the
2323                  * creating transaction.  During abort, simply delete the relcache
2324                  * entry --- it isn't interesting any longer.  (NOTE: if we have
2325                  * forgotten the new-ness of a new relation due to a forced cache
2326                  * flush, the entry will get deleted anyway by shared-cache-inval
2327                  * processing of the aborted pg_class insertion.)
2328                  */
2329                 if (relation->rd_createSubid != InvalidSubTransactionId)
2330                 {
2331                         if (isCommit)
2332                                 relation->rd_createSubid = InvalidSubTransactionId;
2333                         else
2334                         {
2335                                 RelationClearRelation(relation, false);
2336                                 continue;
2337                         }
2338                 }
2339
2340                 /*
2341                  * Likewise, reset the hint about the relfilenode being new.
2342                  */
2343                 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2344
2345                 /*
2346                  * Flush any temporary index list.
2347                  */
2348                 if (relation->rd_indexvalid == 2)
2349                 {
2350                         list_free(relation->rd_indexlist);
2351                         relation->rd_indexlist = NIL;
2352                         relation->rd_oidindex = InvalidOid;
2353                         relation->rd_indexvalid = 0;
2354                 }
2355         }
2356
2357         /* Once done with the transaction, we can reset need_eoxact_work */
2358         need_eoxact_work = false;
2359 }
2360
2361 /*
2362  * AtEOSubXact_RelationCache
2363  *
2364  *      Clean up the relcache at sub-transaction commit or abort.
2365  *
2366  * Note: this must be called *before* processing invalidation messages.
2367  */
2368 void
2369 AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
2370                                                   SubTransactionId parentSubid)
2371 {
2372         HASH_SEQ_STATUS status;
2373         RelIdCacheEnt *idhentry;
2374
2375         /*
2376          * Skip the relcache scan if nothing to do --- see notes for
2377          * AtEOXact_RelationCache.
2378          */
2379         if (!need_eoxact_work)
2380                 return;
2381
2382         hash_seq_init(&status, RelationIdCache);
2383
2384         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2385         {
2386                 Relation        relation = idhentry->reldesc;
2387
2388                 /*
2389                  * Is it a relation created in the current subtransaction?
2390                  *
2391                  * During subcommit, mark it as belonging to the parent, instead.
2392                  * During subabort, simply delete the relcache entry.
2393                  */
2394                 if (relation->rd_createSubid == mySubid)
2395                 {
2396                         if (isCommit)
2397                                 relation->rd_createSubid = parentSubid;
2398                         else
2399                         {
2400                                 RelationClearRelation(relation, false);
2401                                 continue;
2402                         }
2403                 }
2404
2405                 /*
2406                  * Likewise, update or drop any new-relfilenode-in-subtransaction
2407                  * hint.
2408                  */
2409                 if (relation->rd_newRelfilenodeSubid == mySubid)
2410                 {
2411                         if (isCommit)
2412                                 relation->rd_newRelfilenodeSubid = parentSubid;
2413                         else
2414                                 relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2415                 }
2416
2417                 /*
2418                  * Flush any temporary index list.
2419                  */
2420                 if (relation->rd_indexvalid == 2)
2421                 {
2422                         list_free(relation->rd_indexlist);
2423                         relation->rd_indexlist = NIL;
2424                         relation->rd_oidindex = InvalidOid;
2425                         relation->rd_indexvalid = 0;
2426                 }
2427         }
2428 }
2429
2430
2431 /*
2432  *              RelationBuildLocalRelation
2433  *                      Build a relcache entry for an about-to-be-created relation,
2434  *                      and enter it into the relcache.
2435  */
2436 Relation
2437 RelationBuildLocalRelation(const char *relname,
2438                                                    Oid relnamespace,
2439                                                    TupleDesc tupDesc,
2440                                                    Oid relid,
2441                                                    Oid reltablespace,
2442                                                    bool shared_relation,
2443                                                    bool mapped_relation)
2444 {
2445         Relation        rel;
2446         MemoryContext oldcxt;
2447         int                     natts = tupDesc->natts;
2448         int                     i;
2449         bool            has_not_null;
2450         bool            nailit;
2451
2452         AssertArg(natts >= 0);
2453
2454         /*
2455          * check for creation of a rel that must be nailed in cache.
2456          *
2457          * XXX this list had better match the relations specially handled in
2458          * RelationCacheInitializePhase2/3.
2459          */
2460         switch (relid)
2461         {
2462                 case DatabaseRelationId:
2463                 case AuthIdRelationId:
2464                 case AuthMemRelationId:
2465                 case RelationRelationId:
2466                 case AttributeRelationId:
2467                 case ProcedureRelationId:
2468                 case TypeRelationId:
2469                         nailit = true;
2470                         break;
2471                 default:
2472                         nailit = false;
2473                         break;
2474         }
2475
2476         /*
2477          * check that hardwired list of shared rels matches what's in the
2478          * bootstrap .bki file.  If you get a failure here during initdb, you
2479          * probably need to fix IsSharedRelation() to match whatever you've done
2480          * to the set of shared relations.
2481          */
2482         if (shared_relation != IsSharedRelation(relid))
2483                 elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
2484                          relname, relid);
2485
2486         /* Shared relations had better be mapped, too */
2487         Assert(mapped_relation || !shared_relation);
2488
2489         /*
2490          * switch to the cache context to create the relcache entry.
2491          */
2492         if (!CacheMemoryContext)
2493                 CreateCacheMemoryContext();
2494
2495         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2496
2497         /*
2498          * allocate a new relation descriptor and fill in basic state fields.
2499          */
2500         rel = (Relation) palloc0(sizeof(RelationData));
2501
2502         /* make sure relation is marked as having no open file yet */
2503         rel->rd_smgr = NULL;
2504
2505         /* mark it nailed if appropriate */
2506         rel->rd_isnailed = nailit;
2507
2508         rel->rd_refcnt = nailit ? 1 : 0;
2509
2510         /* it's being created in this transaction */
2511         rel->rd_createSubid = GetCurrentSubTransactionId();
2512         rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
2513
2514         /* must flag that we have rels created in this transaction */
2515         need_eoxact_work = true;
2516
2517         /* it is temporary if and only if it is in my temp-table namespace */
2518         rel->rd_istemp = isTempOrToastNamespace(relnamespace);
2519         rel->rd_backend = rel->rd_istemp ? MyBackendId : InvalidBackendId;
2520
2521         /*
2522          * create a new tuple descriptor from the one passed in.  We do this
2523          * partly to copy it into the cache context, and partly because the new
2524          * relation can't have any defaults or constraints yet; they have to be
2525          * added in later steps, because they require additions to multiple system
2526          * catalogs.  We can copy attnotnull constraints here, however.
2527          */
2528         rel->rd_att = CreateTupleDescCopy(tupDesc);
2529         rel->rd_att->tdrefcount = 1;    /* mark as refcounted */
2530         has_not_null = false;
2531         for (i = 0; i < natts; i++)
2532         {
2533                 rel->rd_att->attrs[i]->attnotnull = tupDesc->attrs[i]->attnotnull;
2534                 has_not_null |= tupDesc->attrs[i]->attnotnull;
2535         }
2536
2537         if (has_not_null)
2538         {
2539                 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
2540
2541                 constr->has_not_null = true;
2542                 rel->rd_att->constr = constr;
2543         }
2544
2545         /*
2546          * initialize relation tuple form (caller may add/override data later)
2547          */
2548         rel->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
2549
2550         namestrcpy(&rel->rd_rel->relname, relname);
2551         rel->rd_rel->relnamespace = relnamespace;
2552
2553         rel->rd_rel->relkind = RELKIND_UNCATALOGED;
2554         rel->rd_rel->relhasoids = rel->rd_att->tdhasoid;
2555         rel->rd_rel->relnatts = natts;
2556         rel->rd_rel->reltype = InvalidOid;
2557         /* needed when bootstrapping: */
2558         rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
2559
2560         /*
2561          * Insert relation physical and logical identifiers (OIDs) into the right
2562          * places.      Note that the physical ID (relfilenode) is initially the same
2563          * as the logical ID (OID); except that for a mapped relation, we set
2564          * relfilenode to zero and rely on RelationInitPhysicalAddr to consult the
2565          * map.
2566          */
2567         rel->rd_rel->relisshared = shared_relation;
2568         rel->rd_rel->relistemp = rel->rd_istemp;
2569
2570         RelationGetRelid(rel) = relid;
2571
2572         for (i = 0; i < natts; i++)
2573                 rel->rd_att->attrs[i]->attrelid = relid;
2574
2575         rel->rd_rel->reltablespace = reltablespace;
2576
2577         if (mapped_relation)
2578         {
2579                 rel->rd_rel->relfilenode = InvalidOid;
2580                 /* Add it to the active mapping information */
2581                 RelationMapUpdateMap(relid, relid, shared_relation, true);
2582         }
2583         else
2584                 rel->rd_rel->relfilenode = relid;
2585
2586         RelationInitLockInfo(rel);      /* see lmgr.c */
2587
2588         RelationInitPhysicalAddr(rel);
2589
2590         /*
2591          * Okay to insert into the relcache hash tables.
2592          */
2593         RelationCacheInsert(rel);
2594
2595         /*
2596          * done building relcache entry.
2597          */
2598         MemoryContextSwitchTo(oldcxt);
2599
2600         /* It's fully valid */
2601         rel->rd_isvalid = true;
2602
2603         /*
2604          * Caller expects us to pin the returned entry.
2605          */
2606         RelationIncrementReferenceCount(rel);
2607
2608         return rel;
2609 }
2610
2611
2612 /*
2613  * RelationSetNewRelfilenode
2614  *
2615  * Assign a new relfilenode (physical file name) to the relation.
2616  *
2617  * This allows a full rewrite of the relation to be done with transactional
2618  * safety (since the filenode assignment can be rolled back).  Note however
2619  * that there is no simple way to access the relation's old data for the
2620  * remainder of the current transaction.  This limits the usefulness to cases
2621  * such as TRUNCATE or rebuilding an index from scratch.
2622  *
2623  * Caller must already hold exclusive lock on the relation.
2624  *
2625  * The relation is marked with relfrozenxid = freezeXid (InvalidTransactionId
2626  * must be passed for indexes).  This should be a lower bound on the XIDs
2627  * that will be put into the new relation contents.
2628  */
2629 void
2630 RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid)
2631 {
2632         Oid                     newrelfilenode;
2633         RelFileNodeBackend newrnode;
2634         Relation        pg_class;
2635         HeapTuple       tuple;
2636         Form_pg_class classform;
2637
2638         /* Indexes must have Invalid frozenxid; other relations must not */
2639         Assert((relation->rd_rel->relkind == RELKIND_INDEX &&
2640                         freezeXid == InvalidTransactionId) ||
2641                    TransactionIdIsNormal(freezeXid));
2642
2643         /* Allocate a new relfilenode */
2644         newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
2645                                                                            relation->rd_backend);
2646
2647         /*
2648          * Get a writable copy of the pg_class tuple for the given relation.
2649          */
2650         pg_class = heap_open(RelationRelationId, RowExclusiveLock);
2651
2652         tuple = SearchSysCacheCopy1(RELOID,
2653                                                                 ObjectIdGetDatum(RelationGetRelid(relation)));
2654         if (!HeapTupleIsValid(tuple))
2655                 elog(ERROR, "could not find tuple for relation %u",
2656                          RelationGetRelid(relation));
2657         classform = (Form_pg_class) GETSTRUCT(tuple);
2658
2659         /*
2660          * Create storage for the main fork of the new relfilenode.
2661          *
2662          * NOTE: any conflict in relfilenode value will be caught here, if
2663          * GetNewRelFileNode messes up for any reason.
2664          */
2665         newrnode.node = relation->rd_node;
2666         newrnode.node.relNode = newrelfilenode;
2667         newrnode.backend = relation->rd_backend;
2668         RelationCreateStorage(newrnode.node, relation->rd_istemp);
2669         smgrclosenode(newrnode);
2670
2671         /*
2672          * Schedule unlinking of the old storage at transaction commit.
2673          */
2674         RelationDropStorage(relation);
2675
2676         /*
2677          * Now update the pg_class row.  However, if we're dealing with a mapped
2678          * index, pg_class.relfilenode doesn't change; instead we have to send the
2679          * update to the relation mapper.
2680          */
2681         if (RelationIsMapped(relation))
2682                 RelationMapUpdateMap(RelationGetRelid(relation),
2683                                                          newrelfilenode,
2684                                                          relation->rd_rel->relisshared,
2685                                                          false);
2686         else
2687                 classform->relfilenode = newrelfilenode;
2688
2689         /* These changes are safe even for a mapped relation */
2690         classform->relpages = 0;        /* it's empty until further notice */
2691         classform->reltuples = 0;
2692         classform->relfrozenxid = freezeXid;
2693
2694         simple_heap_update(pg_class, &tuple->t_self, tuple);
2695         CatalogUpdateIndexes(pg_class, tuple);
2696
2697         heap_freetuple(tuple);
2698
2699         heap_close(pg_class, RowExclusiveLock);
2700
2701         /*
2702          * Make the pg_class row change visible, as well as the relation map
2703          * change if any.  This will cause the relcache entry to get updated, too.
2704          */
2705         CommandCounterIncrement();
2706
2707         /*
2708          * Mark the rel as having been given a new relfilenode in the current
2709          * (sub) transaction.  This is a hint that can be used to optimize later
2710          * operations on the rel in the same transaction.
2711          */
2712         relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
2713         /* ... and now we have eoxact cleanup work to do */
2714         need_eoxact_work = true;
2715 }
2716
2717
2718 /*
2719  *              RelationCacheInitialize
2720  *
2721  *              This initializes the relation descriptor cache.  At the time
2722  *              that this is invoked, we can't do database access yet (mainly
2723  *              because the transaction subsystem is not up); all we are doing
2724  *              is making an empty cache hashtable.  This must be done before
2725  *              starting the initialization transaction, because otherwise
2726  *              AtEOXact_RelationCache would crash if that transaction aborts
2727  *              before we can get the relcache set up.
2728  */
2729
2730 #define INITRELCACHESIZE                400
2731
2732 void
2733 RelationCacheInitialize(void)
2734 {
2735         HASHCTL         ctl;
2736
2737         /*
2738          * make sure cache memory context exists
2739          */
2740         if (!CacheMemoryContext)
2741                 CreateCacheMemoryContext();
2742
2743         /*
2744          * create hashtable that indexes the relcache
2745          */
2746         MemSet(&ctl, 0, sizeof(ctl));
2747         ctl.keysize = sizeof(Oid);
2748         ctl.entrysize = sizeof(RelIdCacheEnt);
2749         ctl.hash = oid_hash;
2750         RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
2751                                                                   &ctl, HASH_ELEM | HASH_FUNCTION);
2752
2753         /*
2754          * relation mapper needs initialized too
2755          */
2756         RelationMapInitialize();
2757 }
2758
2759 /*
2760  *              RelationCacheInitializePhase2
2761  *
2762  *              This is called to prepare for access to shared catalogs during startup.
2763  *              We must at least set up nailed reldescs for pg_database, pg_authid,
2764  *              and pg_auth_members.  Ideally we'd like to have reldescs for their
2765  *              indexes, too.  We attempt to load this information from the shared
2766  *              relcache init file.  If that's missing or broken, just make phony
2767  *              entries for the catalogs themselves.  RelationCacheInitializePhase3
2768  *              will clean up as needed.
2769  */
2770 void
2771 RelationCacheInitializePhase2(void)
2772 {
2773         MemoryContext oldcxt;
2774
2775         /*
2776          * relation mapper needs initialized too
2777          */
2778         RelationMapInitializePhase2();
2779
2780         /*
2781          * In bootstrap mode, the shared catalogs aren't there yet anyway, so do
2782          * nothing.
2783          */
2784         if (IsBootstrapProcessingMode())
2785                 return;
2786
2787         /*
2788          * switch to cache memory context
2789          */
2790         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2791
2792         /*
2793          * Try to load the shared relcache cache file.  If unsuccessful, bootstrap
2794          * the cache with pre-made descriptors for the critical shared catalogs.
2795          */
2796         if (!load_relcache_init_file(true))
2797         {
2798                 formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
2799                                   true, Natts_pg_database, Desc_pg_database);
2800                 formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
2801                                   true, Natts_pg_authid, Desc_pg_authid);
2802                 formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
2803                                   false, Natts_pg_auth_members, Desc_pg_auth_members);
2804
2805 #define NUM_CRITICAL_SHARED_RELS        3       /* fix if you change list above */
2806         }
2807
2808         MemoryContextSwitchTo(oldcxt);
2809 }
2810
2811 /*
2812  *              RelationCacheInitializePhase3
2813  *
2814  *              This is called as soon as the catcache and transaction system
2815  *              are functional and we have determined MyDatabaseId.  At this point
2816  *              we can actually read data from the database's system catalogs.
2817  *              We first try to read pre-computed relcache entries from the local
2818  *              relcache init file.  If that's missing or broken, make phony entries
2819  *              for the minimum set of nailed-in-cache relations.  Then (unless
2820  *              bootstrapping) make sure we have entries for the critical system
2821  *              indexes.  Once we've done all this, we have enough infrastructure to
2822  *              open any system catalog or use any catcache.  The last step is to
2823  *              rewrite the cache files if needed.
2824  */
2825 void
2826 RelationCacheInitializePhase3(void)
2827 {
2828         HASH_SEQ_STATUS status;
2829         RelIdCacheEnt *idhentry;
2830         MemoryContext oldcxt;
2831         bool            needNewCacheFile = !criticalSharedRelcachesBuilt;
2832
2833         /*
2834          * relation mapper needs initialized too
2835          */
2836         RelationMapInitializePhase3();
2837
2838         /*
2839          * switch to cache memory context
2840          */
2841         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2842
2843         /*
2844          * Try to load the local relcache cache file.  If unsuccessful, bootstrap
2845          * the cache with pre-made descriptors for the critical "nailed-in" system
2846          * catalogs.
2847          */
2848         if (IsBootstrapProcessingMode() ||
2849                 !load_relcache_init_file(false))
2850         {
2851                 needNewCacheFile = true;
2852
2853                 formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
2854                                   true, Natts_pg_class, Desc_pg_class);
2855                 formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
2856                                   false, Natts_pg_attribute, Desc_pg_attribute);
2857                 formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
2858                                   true, Natts_pg_proc, Desc_pg_proc);
2859                 formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
2860                                   true, Natts_pg_type, Desc_pg_type);
2861
2862 #define NUM_CRITICAL_LOCAL_RELS 4               /* fix if you change list above */
2863         }
2864
2865         MemoryContextSwitchTo(oldcxt);
2866
2867         /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
2868         if (IsBootstrapProcessingMode())
2869                 return;
2870
2871         /*
2872          * If we didn't get the critical system indexes loaded into relcache, do
2873          * so now.      These are critical because the catcache and/or opclass cache
2874          * depend on them for fetches done during relcache load.  Thus, we have an
2875          * infinite-recursion problem.  We can break the recursion by doing
2876          * heapscans instead of indexscans at certain key spots. To avoid hobbling
2877          * performance, we only want to do that until we have the critical indexes
2878          * loaded into relcache.  Thus, the flag criticalRelcachesBuilt is used to
2879          * decide whether to do heapscan or indexscan at the key spots, and we set
2880          * it true after we've loaded the critical indexes.
2881          *
2882          * The critical indexes are marked as "nailed in cache", partly to make it
2883          * easy for load_relcache_init_file to count them, but mainly because we
2884          * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
2885          * true.  (NOTE: perhaps it would be possible to reload them by
2886          * temporarily setting criticalRelcachesBuilt to false again.  For now,
2887          * though, we just nail 'em in.)
2888          *
2889          * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
2890          * in the same way as the others, because the critical catalogs don't
2891          * (currently) have any rules or triggers, and so these indexes can be
2892          * rebuilt without inducing recursion.  However they are used during
2893          * relcache load when a rel does have rules or triggers, so we choose to
2894          * nail them for performance reasons.
2895          */
2896         if (!criticalRelcachesBuilt)
2897         {
2898                 load_critical_index(ClassOidIndexId,
2899                                                         RelationRelationId);
2900                 load_critical_index(AttributeRelidNumIndexId,
2901                                                         AttributeRelationId);
2902                 load_critical_index(IndexRelidIndexId,
2903                                                         IndexRelationId);
2904                 load_critical_index(OpclassOidIndexId,
2905                                                         OperatorClassRelationId);
2906                 load_critical_index(AccessMethodStrategyIndexId,
2907                                                         AccessMethodOperatorRelationId);
2908                 load_critical_index(AccessMethodProcedureIndexId,
2909                                                         AccessMethodProcedureRelationId);
2910                 load_critical_index(OperatorOidIndexId,
2911                                                         OperatorRelationId);
2912                 load_critical_index(RewriteRelRulenameIndexId,
2913                                                         RewriteRelationId);
2914                 load_critical_index(TriggerRelidNameIndexId,
2915                                                         TriggerRelationId);
2916
2917 #define NUM_CRITICAL_LOCAL_INDEXES      9       /* fix if you change list above */
2918
2919                 criticalRelcachesBuilt = true;
2920         }
2921
2922         /*
2923          * Process critical shared indexes too.
2924          *
2925          * DatabaseNameIndexId isn't critical for relcache loading, but rather for
2926          * initial lookup of MyDatabaseId, without which we'll never find any
2927          * non-shared catalogs at all.  Autovacuum calls InitPostgres with a
2928          * database OID, so it instead depends on DatabaseOidIndexId.  We also
2929          * need to nail up some indexes on pg_authid and pg_auth_members for use
2930          * during client authentication.
2931          */
2932         if (!criticalSharedRelcachesBuilt)
2933         {
2934                 load_critical_index(DatabaseNameIndexId,
2935                                                         DatabaseRelationId);
2936                 load_critical_index(DatabaseOidIndexId,
2937                                                         DatabaseRelationId);
2938                 load_critical_index(AuthIdRolnameIndexId,
2939                                                         AuthIdRelationId);
2940                 load_critical_index(AuthIdOidIndexId,
2941                                                         AuthIdRelationId);
2942                 load_critical_index(AuthMemMemRoleIndexId,
2943                                                         AuthMemRelationId);
2944
2945 #define NUM_CRITICAL_SHARED_INDEXES 5   /* fix if you change list above */
2946
2947                 criticalSharedRelcachesBuilt = true;
2948         }
2949
2950         /*
2951          * Now, scan all the relcache entries and update anything that might be
2952          * wrong in the results from formrdesc or the relcache cache file. If we
2953          * faked up relcache entries using formrdesc, then read the real pg_class
2954          * rows and replace the fake entries with them. Also, if any of the
2955          * relcache entries have rules or triggers, load that info the hard way
2956          * since it isn't recorded in the cache file.
2957          *
2958          * Whenever we access the catalogs to read data, there is a possibility of
2959          * a shared-inval cache flush causing relcache entries to be removed.
2960          * Since hash_seq_search only guarantees to still work after the *current*
2961          * entry is removed, it's unsafe to continue the hashtable scan afterward.
2962          * We handle this by restarting the scan from scratch after each access.
2963          * This is theoretically O(N^2), but the number of entries that actually
2964          * need to be fixed is small enough that it doesn't matter.
2965          */
2966         hash_seq_init(&status, RelationIdCache);
2967
2968         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2969         {
2970                 Relation        relation = idhentry->reldesc;
2971                 bool            restart = false;
2972
2973                 /*
2974                  * Make sure *this* entry doesn't get flushed while we work with it.
2975                  */
2976                 RelationIncrementReferenceCount(relation);
2977
2978                 /*
2979                  * If it's a faked-up entry, read the real pg_class tuple.
2980                  */
2981                 if (relation->rd_rel->relowner == InvalidOid)
2982                 {
2983                         HeapTuple       htup;
2984                         Form_pg_class relp;
2985
2986                         htup = SearchSysCache1(RELOID,
2987                                                            ObjectIdGetDatum(RelationGetRelid(relation)));
2988                         if (!HeapTupleIsValid(htup))
2989                                 elog(FATAL, "cache lookup failed for relation %u",
2990                                          RelationGetRelid(relation));
2991                         relp = (Form_pg_class) GETSTRUCT(htup);
2992
2993                         /*
2994                          * Copy tuple to relation->rd_rel. (See notes in
2995                          * AllocateRelationDesc())
2996                          */
2997                         memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
2998
2999                         /* Update rd_options while we have the tuple */
3000                         if (relation->rd_options)
3001                                 pfree(relation->rd_options);
3002                         RelationParseRelOptions(relation, htup);
3003
3004                         /*
3005                          * Check the values in rd_att were set up correctly.  (We cannot
3006                          * just copy them over now: formrdesc must have set up the rd_att
3007                          * data correctly to start with, because it may already have been
3008                          * copied into one or more catcache entries.)
3009                          */
3010                         Assert(relation->rd_att->tdtypeid == relp->reltype);
3011                         Assert(relation->rd_att->tdtypmod == -1);
3012                         Assert(relation->rd_att->tdhasoid == relp->relhasoids);
3013
3014                         ReleaseSysCache(htup);
3015
3016                         /* relowner had better be OK now, else we'll loop forever */
3017                         if (relation->rd_rel->relowner == InvalidOid)
3018                                 elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
3019                                          RelationGetRelationName(relation));
3020
3021                         restart = true;
3022                 }
3023
3024                 /*
3025                  * Fix data that isn't saved in relcache cache file.
3026                  *
3027                  * relhasrules or relhastriggers could possibly be wrong or out of
3028                  * date.  If we don't actually find any rules or triggers, clear the
3029                  * local copy of the flag so that we don't get into an infinite loop
3030                  * here.  We don't make any attempt to fix the pg_class entry, though.
3031                  */
3032                 if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
3033                 {
3034                         RelationBuildRuleLock(relation);
3035                         if (relation->rd_rules == NULL)
3036                                 relation->rd_rel->relhasrules = false;
3037                         restart = true;
3038                 }
3039                 if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
3040                 {
3041                         RelationBuildTriggers(relation);
3042                         if (relation->trigdesc == NULL)
3043                                 relation->rd_rel->relhastriggers = false;
3044                         restart = true;
3045                 }
3046
3047                 /* Release hold on the relation */
3048                 RelationDecrementReferenceCount(relation);
3049
3050                 /* Now, restart the hashtable scan if needed */
3051                 if (restart)
3052                 {
3053                         hash_seq_term(&status);
3054                         hash_seq_init(&status, RelationIdCache);
3055                 }
3056         }
3057
3058         /*
3059          * Lastly, write out new relcache cache files if needed.  We don't bother
3060          * to distinguish cases where only one of the two needs an update.
3061          */
3062         if (needNewCacheFile)
3063         {
3064                 /*
3065                  * Force all the catcaches to finish initializing and thereby open the
3066                  * catalogs and indexes they use.  This will preload the relcache with
3067                  * entries for all the most important system catalogs and indexes, so
3068                  * that the init files will be most useful for future backends.
3069                  */
3070                 InitCatalogCachePhase2();
3071
3072                 /* reset initFileRelationIds list; we'll fill it during write */
3073                 initFileRelationIds = NIL;
3074
3075                 /* now write the files */
3076                 write_relcache_init_file(true);
3077                 write_relcache_init_file(false);
3078         }
3079 }
3080
3081 /*
3082  * Load one critical system index into the relcache
3083  *
3084  * indexoid is the OID of the target index, heapoid is the OID of the catalog
3085  * it belongs to.
3086  */
3087 static void
3088 load_critical_index(Oid indexoid, Oid heapoid)
3089 {
3090         Relation        ird;
3091
3092         /*
3093          * We must lock the underlying catalog before locking the index to avoid
3094          * deadlock, since RelationBuildDesc might well need to read the catalog,
3095          * and if anyone else is exclusive-locking this catalog and index they'll
3096          * be doing it in that order.
3097          */
3098         LockRelationOid(heapoid, AccessShareLock);
3099         LockRelationOid(indexoid, AccessShareLock);
3100         ird = RelationBuildDesc(indexoid, true);
3101         if (ird == NULL)
3102                 elog(PANIC, "could not open critical system index %u", indexoid);
3103         ird->rd_isnailed = true;
3104         ird->rd_refcnt = 1;
3105         UnlockRelationOid(indexoid, AccessShareLock);
3106         UnlockRelationOid(heapoid, AccessShareLock);
3107 }
3108
3109 /*
3110  * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
3111  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
3112  *
3113  * We need this kluge because we have to be able to access non-fixed-width
3114  * fields of pg_class and pg_index before we have the standard catalog caches
3115  * available.  We use predefined data that's set up in just the same way as
3116  * the bootstrapped reldescs used by formrdesc().  The resulting tupdesc is
3117  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
3118  * does it have a TupleConstr field.  But it's good enough for the purpose of
3119  * extracting fields.
3120  */
3121 static TupleDesc
3122 BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs,
3123                                                  bool hasoids)
3124 {
3125         TupleDesc       result;
3126         MemoryContext oldcxt;
3127         int                     i;
3128
3129         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3130
3131         result = CreateTemplateTupleDesc(natts, hasoids);
3132         result->tdtypeid = RECORDOID;           /* not right, but we don't care */
3133         result->tdtypmod = -1;
3134
3135         for (i = 0; i < natts; i++)
3136         {
3137                 memcpy(result->attrs[i], &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
3138                 /* make sure attcacheoff is valid */
3139                 result->attrs[i]->attcacheoff = -1;
3140         }
3141
3142         /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
3143         result->attrs[0]->attcacheoff = 0;
3144
3145         /* Note: we don't bother to set up a TupleConstr entry */
3146
3147         MemoryContextSwitchTo(oldcxt);
3148
3149         return result;
3150 }
3151
3152 static TupleDesc
3153 GetPgClassDescriptor(void)
3154 {
3155         static TupleDesc pgclassdesc = NULL;
3156
3157         /* Already done? */
3158         if (pgclassdesc == NULL)
3159                 pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
3160                                                                                            Desc_pg_class,
3161                                                                                            true);
3162
3163         return pgclassdesc;
3164 }
3165
3166 static TupleDesc
3167 GetPgIndexDescriptor(void)
3168 {
3169         static TupleDesc pgindexdesc = NULL;
3170
3171         /* Already done? */
3172         if (pgindexdesc == NULL)
3173                 pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
3174                                                                                            Desc_pg_index,
3175                                                                                            false);
3176
3177         return pgindexdesc;
3178 }
3179
3180 /*
3181  * Load any default attribute value definitions for the relation.
3182  */
3183 static void
3184 AttrDefaultFetch(Relation relation)
3185 {
3186         AttrDefault *attrdef = relation->rd_att->constr->defval;
3187         int                     ndef = relation->rd_att->constr->num_defval;
3188         Relation        adrel;
3189         SysScanDesc adscan;
3190         ScanKeyData skey;
3191         HeapTuple       htup;
3192         Datum           val;
3193         bool            isnull;
3194         int                     found;
3195         int                     i;
3196
3197         ScanKeyInit(&skey,
3198                                 Anum_pg_attrdef_adrelid,
3199                                 BTEqualStrategyNumber, F_OIDEQ,
3200                                 ObjectIdGetDatum(RelationGetRelid(relation)));
3201
3202         adrel = heap_open(AttrDefaultRelationId, AccessShareLock);
3203         adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
3204                                                                 SnapshotNow, 1, &skey);
3205         found = 0;
3206
3207         while (HeapTupleIsValid(htup = systable_getnext(adscan)))
3208         {
3209                 Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
3210
3211                 for (i = 0; i < ndef; i++)
3212                 {
3213                         if (adform->adnum != attrdef[i].adnum)
3214                                 continue;
3215                         if (attrdef[i].adbin != NULL)
3216                                 elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
3217                                 NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
3218                                          RelationGetRelationName(relation));
3219                         else
3220                                 found++;
3221
3222                         val = fastgetattr(htup,
3223                                                           Anum_pg_attrdef_adbin,
3224                                                           adrel->rd_att, &isnull);
3225                         if (isnull)
3226                                 elog(WARNING, "null adbin for attr %s of rel %s",
3227                                 NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
3228                                          RelationGetRelationName(relation));
3229                         else
3230                                 attrdef[i].adbin = MemoryContextStrdup(CacheMemoryContext,
3231                                                                                                    TextDatumGetCString(val));
3232                         break;
3233                 }
3234
3235                 if (i >= ndef)
3236                         elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
3237                                  adform->adnum, RelationGetRelationName(relation));
3238         }
3239
3240         systable_endscan(adscan);
3241         heap_close(adrel, AccessShareLock);
3242
3243         if (found != ndef)
3244                 elog(WARNING, "%d attrdef record(s) missing for rel %s",
3245                          ndef - found, RelationGetRelationName(relation));
3246 }
3247
3248 /*
3249  * Load any check constraints for the relation.
3250  */
3251 static void
3252 CheckConstraintFetch(Relation relation)
3253 {
3254         ConstrCheck *check = relation->rd_att->constr->check;
3255         int                     ncheck = relation->rd_att->constr->num_check;
3256         Relation        conrel;
3257         SysScanDesc conscan;
3258         ScanKeyData skey[1];
3259         HeapTuple       htup;
3260         Datum           val;
3261         bool            isnull;
3262         int                     found = 0;
3263
3264         ScanKeyInit(&skey[0],
3265                                 Anum_pg_constraint_conrelid,
3266                                 BTEqualStrategyNumber, F_OIDEQ,
3267                                 ObjectIdGetDatum(RelationGetRelid(relation)));
3268
3269         conrel = heap_open(ConstraintRelationId, AccessShareLock);
3270         conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
3271                                                                  SnapshotNow, 1, skey);
3272
3273         while (HeapTupleIsValid(htup = systable_getnext(conscan)))
3274         {
3275                 Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
3276
3277                 /* We want check constraints only */
3278                 if (conform->contype != CONSTRAINT_CHECK)
3279                         continue;
3280
3281                 if (found >= ncheck)
3282                         elog(ERROR, "unexpected constraint record found for rel %s",
3283                                  RelationGetRelationName(relation));
3284
3285                 check[found].ccname = MemoryContextStrdup(CacheMemoryContext,
3286                                                                                                   NameStr(conform->conname));
3287
3288                 /* Grab and test conbin is actually set */
3289                 val = fastgetattr(htup,
3290                                                   Anum_pg_constraint_conbin,
3291                                                   conrel->rd_att, &isnull);
3292                 if (isnull)
3293                         elog(ERROR, "null conbin for rel %s",
3294                                  RelationGetRelationName(relation));
3295
3296                 check[found].ccbin = MemoryContextStrdup(CacheMemoryContext,
3297                                                                                                  TextDatumGetCString(val));
3298                 found++;
3299         }
3300
3301         systable_endscan(conscan);
3302         heap_close(conrel, AccessShareLock);
3303
3304         if (found != ncheck)
3305                 elog(ERROR, "%d constraint record(s) missing for rel %s",
3306                          ncheck - found, RelationGetRelationName(relation));
3307 }
3308
3309 /*
3310  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
3311  *
3312  * The index list is created only if someone requests it.  We scan pg_index
3313  * to find relevant indexes, and add the list to the relcache entry so that
3314  * we won't have to compute it again.  Note that shared cache inval of a
3315  * relcache entry will delete the old list and set rd_indexvalid to 0,
3316  * so that we must recompute the index list on next request.  This handles
3317  * creation or deletion of an index.
3318  *
3319  * The returned list is guaranteed to be sorted in order by OID.  This is
3320  * needed by the executor, since for index types that we obtain exclusive
3321  * locks on when updating the index, all backends must lock the indexes in
3322  * the same order or we will get deadlocks (see ExecOpenIndices()).  Any
3323  * consistent ordering would do, but ordering by OID is easy.
3324  *
3325  * Since shared cache inval causes the relcache's copy of the list to go away,
3326  * we return a copy of the list palloc'd in the caller's context.  The caller
3327  * may list_free() the returned list after scanning it. This is necessary
3328  * since the caller will typically be doing syscache lookups on the relevant
3329  * indexes, and syscache lookup could cause SI messages to be processed!
3330  *
3331  * We also update rd_oidindex, which this module treats as effectively part
3332  * of the index list.  rd_oidindex is valid when rd_indexvalid isn't zero;
3333  * it is the pg_class OID of a unique index on OID when the relation has one,
3334  * and InvalidOid if there is no such index.
3335  */
3336 List *
3337 RelationGetIndexList(Relation relation)
3338 {
3339         Relation        indrel;
3340         SysScanDesc indscan;
3341         ScanKeyData skey;
3342         HeapTuple       htup;
3343         List       *result;
3344         Oid                     oidIndex;
3345         MemoryContext oldcxt;
3346
3347         /* Quick exit if we already computed the list. */
3348         if (relation->rd_indexvalid != 0)
3349                 return list_copy(relation->rd_indexlist);
3350
3351         /*
3352          * We build the list we intend to return (in the caller's context) while
3353          * doing the scan.      After successfully completing the scan, we copy that
3354          * list into the relcache entry.  This avoids cache-context memory leakage
3355          * if we get some sort of error partway through.
3356          */
3357         result = NIL;
3358         oidIndex = InvalidOid;
3359
3360         /* Prepare to scan pg_index for entries having indrelid = this rel. */
3361         ScanKeyInit(&skey,
3362                                 Anum_pg_index_indrelid,
3363                                 BTEqualStrategyNumber, F_OIDEQ,
3364                                 ObjectIdGetDatum(RelationGetRelid(relation)));
3365
3366         indrel = heap_open(IndexRelationId, AccessShareLock);
3367         indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
3368                                                                  SnapshotNow, 1, &skey);
3369
3370         while (HeapTupleIsValid(htup = systable_getnext(indscan)))
3371         {
3372                 Form_pg_index index = (Form_pg_index) GETSTRUCT(htup);
3373
3374                 /* Add index's OID to result list in the proper order */
3375                 result = insert_ordered_oid(result, index->indexrelid);
3376
3377                 /* Check to see if it is a unique, non-partial btree index on OID */
3378                 if (index->indnatts == 1 &&
3379                         index->indisunique && index->indimmediate &&
3380                         index->indkey.values[0] == ObjectIdAttributeNumber &&
3381                         index->indclass.values[0] == OID_BTREE_OPS_OID &&
3382                         heap_attisnull(htup, Anum_pg_index_indpred))
3383                         oidIndex = index->indexrelid;
3384         }
3385
3386         systable_endscan(indscan);
3387         heap_close(indrel, AccessShareLock);
3388
3389         /* Now save a copy of the completed list in the relcache entry. */
3390         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3391         relation->rd_indexlist = list_copy(result);
3392         relation->rd_oidindex = oidIndex;
3393         relation->rd_indexvalid = 1;
3394         MemoryContextSwitchTo(oldcxt);
3395
3396         return result;
3397 }
3398
3399 /*
3400  * insert_ordered_oid
3401  *              Insert a new Oid into a sorted list of Oids, preserving ordering
3402  *
3403  * Building the ordered list this way is O(N^2), but with a pretty small
3404  * constant, so for the number of entries we expect it will probably be
3405  * faster than trying to apply qsort().  Most tables don't have very many
3406  * indexes...
3407  */
3408 static List *
3409 insert_ordered_oid(List *list, Oid datum)
3410 {
3411         ListCell   *prev;
3412
3413         /* Does the datum belong at the front? */
3414         if (list == NIL || datum < linitial_oid(list))
3415                 return lcons_oid(datum, list);
3416         /* No, so find the entry it belongs after */
3417         prev = list_head(list);
3418         for (;;)
3419         {
3420                 ListCell   *curr = lnext(prev);
3421
3422                 if (curr == NULL || datum < lfirst_oid(curr))
3423                         break;                          /* it belongs after 'prev', before 'curr' */
3424
3425                 prev = curr;
3426         }
3427         /* Insert datum into list after 'prev' */
3428         lappend_cell_oid(list, prev, datum);
3429         return list;
3430 }
3431
3432 /*
3433  * RelationSetIndexList -- externally force the index list contents
3434  *
3435  * This is used to temporarily override what we think the set of valid
3436  * indexes is (including the presence or absence of an OID index).
3437  * The forcing will be valid only until transaction commit or abort.
3438  *
3439  * This should only be applied to nailed relations, because in a non-nailed
3440  * relation the hacked index list could be lost at any time due to SI
3441  * messages.  In practice it is only used on pg_class (see REINDEX).
3442  *
3443  * It is up to the caller to make sure the given list is correctly ordered.
3444  *
3445  * We deliberately do not change rd_indexattr here: even when operating
3446  * with a temporary partial index list, HOT-update decisions must be made
3447  * correctly with respect to the full index set.  It is up to the caller
3448  * to ensure that a correct rd_indexattr set has been cached before first
3449  * calling RelationSetIndexList; else a subsequent inquiry might cause a
3450  * wrong rd_indexattr set to get computed and cached.
3451  */
3452 void
3453 RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
3454 {
3455         MemoryContext oldcxt;
3456
3457         Assert(relation->rd_isnailed);
3458         /* Copy the list into the cache context (could fail for lack of mem) */
3459         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3460         indexIds = list_copy(indexIds);
3461         MemoryContextSwitchTo(oldcxt);
3462         /* Okay to replace old list */
3463         list_free(relation->rd_indexlist);
3464         relation->rd_indexlist = indexIds;
3465         relation->rd_oidindex = oidIndex;
3466         relation->rd_indexvalid = 2;    /* mark list as forced */
3467         /* must flag that we have a forced index list */
3468         need_eoxact_work = true;
3469 }
3470
3471 /*
3472  * RelationGetOidIndex -- get the pg_class OID of the relation's OID index
3473  *
3474  * Returns InvalidOid if there is no such index.
3475  */
3476 Oid
3477 RelationGetOidIndex(Relation relation)
3478 {
3479         List       *ilist;
3480
3481         /*
3482          * If relation doesn't have OIDs at all, caller is probably confused. (We
3483          * could just silently return InvalidOid, but it seems better to throw an
3484          * assertion.)
3485          */
3486         Assert(relation->rd_rel->relhasoids);
3487
3488         if (relation->rd_indexvalid == 0)
3489         {
3490                 /* RelationGetIndexList does the heavy lifting. */
3491                 ilist = RelationGetIndexList(relation);
3492                 list_free(ilist);
3493                 Assert(relation->rd_indexvalid != 0);
3494         }
3495
3496         return relation->rd_oidindex;
3497 }
3498
3499 /*
3500  * RelationGetIndexExpressions -- get the index expressions for an index
3501  *
3502  * We cache the result of transforming pg_index.indexprs into a node tree.
3503  * If the rel is not an index or has no expressional columns, we return NIL.
3504  * Otherwise, the returned tree is copied into the caller's memory context.
3505  * (We don't want to return a pointer to the relcache copy, since it could
3506  * disappear due to relcache invalidation.)
3507  */
3508 List *
3509 RelationGetIndexExpressions(Relation relation)
3510 {
3511         List       *result;
3512         Datum           exprsDatum;
3513         bool            isnull;
3514         char       *exprsString;
3515         MemoryContext oldcxt;
3516
3517         /* Quick exit if we already computed the result. */
3518         if (relation->rd_indexprs)
3519                 return (List *) copyObject(relation->rd_indexprs);
3520
3521         /* Quick exit if there is nothing to do. */
3522         if (relation->rd_indextuple == NULL ||
3523                 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs))
3524                 return NIL;
3525
3526         /*
3527          * We build the tree we intend to return in the caller's context. After
3528          * successfully completing the work, we copy it into the relcache entry.
3529          * This avoids problems if we get some sort of error partway through.
3530          */
3531         exprsDatum = heap_getattr(relation->rd_indextuple,
3532                                                           Anum_pg_index_indexprs,
3533                                                           GetPgIndexDescriptor(),
3534                                                           &isnull);
3535         Assert(!isnull);
3536         exprsString = TextDatumGetCString(exprsDatum);
3537         result = (List *) stringToNode(exprsString);
3538         pfree(exprsString);
3539
3540         /*
3541          * Run the expressions through eval_const_expressions. This is not just an
3542          * optimization, but is necessary, because the planner will be comparing
3543          * them to similarly-processed qual clauses, and may fail to detect valid
3544          * matches without this.  We don't bother with canonicalize_qual, however.
3545          */
3546         result = (List *) eval_const_expressions(NULL, (Node *) result);
3547
3548         /*
3549          * Also mark any coercion format fields as "don't care", so that the
3550          * planner can match to both explicit and implicit coercions.
3551          */
3552         set_coercionform_dontcare((Node *) result);
3553
3554         /* May as well fix opfuncids too */
3555         fix_opfuncids((Node *) result);
3556
3557         /* Now save a copy of the completed tree in the relcache entry. */
3558         oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
3559         relation->rd_indexprs = (List *) copyObject(result);
3560         MemoryContextSwitchTo(oldcxt);
3561
3562         return result;
3563 }
3564
3565 /*
3566  * RelationGetIndexPredicate -- get the index predicate for an index
3567  *
3568  * We cache the result of transforming pg_index.indpred into an implicit-AND
3569  * node tree (suitable for ExecQual).
3570  * If the rel is not an index or has no predicate, we return NIL.
3571  * Otherwise, the returned tree is copied into the caller's memory context.
3572  * (We don't want to return a pointer to the relcache copy, since it could
3573  * disappear due to relcache invalidation.)
3574  */
3575 List *
3576 RelationGetIndexPredicate(Relation relation)
3577 {
3578         List       *result;
3579         Datum           predDatum;
3580         bool            isnull;
3581         char       *predString;
3582         MemoryContext oldcxt;
3583
3584         /* Quick exit if we already computed the result. */
3585         if (relation->rd_indpred)
3586                 return (List *) copyObject(relation->rd_indpred);
3587
3588         /* Quick exit if there is nothing to do. */
3589         if (relation->rd_indextuple == NULL ||
3590                 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred))
3591                 return NIL;
3592
3593         /*
3594          * We build the tree we intend to return in the caller's context. After
3595          * successfully completing the work, we copy it into the relcache entry.
3596          * This avoids problems if we get some sort of error partway through.
3597          */
3598         predDatum = heap_getattr(relation->rd_indextuple,
3599                                                          Anum_pg_index_indpred,
3600                                                          GetPgIndexDescriptor(),
3601                                                          &isnull);
3602         Assert(!isnull);
3603         predString = TextDatumGetCString(predDatum);
3604         result = (List *) stringToNode(predString);
3605         pfree(predString);
3606
3607         /*
3608          * Run the expression through const-simplification and canonicalization.
3609          * This is not just an optimization, but is necessary, because the planner
3610          * will be comparing it to similarly-processed qual clauses, and may fail
3611          * to detect valid matches without this.  This must match the processing
3612          * done to qual clauses in preprocess_expression()!  (We can skip the
3613          * stuff involving subqueries, however, since we don't allow any in index
3614          * predicates.)
3615          */
3616         result = (List *) eval_const_expressions(NULL, (Node *) result);
3617
3618         result = (List *) canonicalize_qual((Expr *) result);
3619
3620         /*
3621          * Also mark any coercion format fields as "don't care", so that the
3622          * planner can match to both explicit and implicit coercions.
3623          */
3624         set_coercionform_dontcare((Node *) result);
3625
3626         /* Also convert to implicit-AND format */
3627         result = make_ands_implicit((Expr *) result);
3628
3629         /* May as well fix opfuncids too */
3630         fix_opfuncids((Node *) result);
3631
3632         /* Now save a copy of the completed tree in the relcache entry. */
3633         oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
3634         relation->rd_indpred = (List *) copyObject(result);
3635         MemoryContextSwitchTo(oldcxt);
3636
3637         return result;
3638 }
3639
3640 /*
3641  * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
3642  *
3643  * The result has a bit set for each attribute used anywhere in the index
3644  * definitions of all the indexes on this relation.  (This includes not only
3645  * simple index keys, but attributes used in expressions and partial-index
3646  * predicates.)
3647  *
3648  * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
3649  * we can include system attributes (e.g., OID) in the bitmap representation.
3650  *
3651  * The returned result is palloc'd in the caller's memory context and should
3652  * be bms_free'd when not needed anymore.
3653  */
3654 Bitmapset *
3655 RelationGetIndexAttrBitmap(Relation relation)
3656 {
3657         Bitmapset  *indexattrs;
3658         List       *indexoidlist;
3659         ListCell   *l;
3660         MemoryContext oldcxt;
3661
3662         /* Quick exit if we already computed the result. */
3663         if (relation->rd_indexattr != NULL)
3664                 return bms_copy(relation->rd_indexattr);
3665
3666         /* Fast path if definitely no indexes */
3667         if (!RelationGetForm(relation)->relhasindex)
3668                 return NULL;
3669
3670         /*
3671          * Get cached list of index OIDs
3672          */
3673         indexoidlist = RelationGetIndexList(relation);
3674
3675         /* Fall out if no indexes (but relhasindex was set) */
3676         if (indexoidlist == NIL)
3677                 return NULL;
3678
3679         /*
3680          * For each index, add referenced attributes to indexattrs.
3681          */
3682         indexattrs = NULL;
3683         foreach(l, indexoidlist)
3684         {
3685                 Oid                     indexOid = lfirst_oid(l);
3686                 Relation        indexDesc;
3687                 IndexInfo  *indexInfo;
3688                 int                     i;
3689
3690                 indexDesc = index_open(indexOid, AccessShareLock);
3691
3692                 /* Extract index key information from the index's pg_index row */
3693                 indexInfo = BuildIndexInfo(indexDesc);
3694
3695                 /* Collect simple attribute references */
3696                 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
3697                 {
3698                         int                     attrnum = indexInfo->ii_KeyAttrNumbers[i];
3699
3700                         if (attrnum != 0)
3701                                 indexattrs = bms_add_member(indexattrs,
3702                                                            attrnum - FirstLowInvalidHeapAttributeNumber);
3703                 }
3704
3705                 /* Collect all attributes used in expressions, too */
3706                 pull_varattnos((Node *) indexInfo->ii_Expressions, &indexattrs);
3707
3708                 /* Collect all attributes in the index predicate, too */
3709                 pull_varattnos((Node *) indexInfo->ii_Predicate, &indexattrs);
3710
3711                 index_close(indexDesc, AccessShareLock);
3712         }
3713
3714         list_free(indexoidlist);
3715
3716         /* Now save a copy of the bitmap in the relcache entry. */
3717         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3718         relation->rd_indexattr = bms_copy(indexattrs);
3719         MemoryContextSwitchTo(oldcxt);
3720
3721         /* We return our original working copy for caller to play with */
3722         return indexattrs;
3723 }
3724
3725 /*
3726  * RelationGetExclusionInfo -- get info about index's exclusion constraint
3727  *
3728  * This should be called only for an index that is known to have an
3729  * associated exclusion constraint.  It returns arrays (palloc'd in caller's
3730  * context) of the exclusion operator OIDs, their underlying functions'
3731  * OIDs, and their strategy numbers in the index's opclasses.  We cache
3732  * all this information since it requires a fair amount of work to get.
3733  */
3734 void
3735 RelationGetExclusionInfo(Relation indexRelation,
3736                                                  Oid **operators,
3737                                                  Oid **procs,
3738                                                  uint16 **strategies)
3739 {
3740         int                     ncols = indexRelation->rd_rel->relnatts;
3741         Oid                *ops;
3742         Oid                *funcs;
3743         uint16     *strats;
3744         Relation        conrel;
3745         SysScanDesc conscan;
3746         ScanKeyData skey[1];
3747         HeapTuple       htup;
3748         bool            found;
3749         MemoryContext oldcxt;
3750         int                     i;
3751
3752         /* Allocate result space in caller context */
3753         *operators = ops = (Oid *) palloc(sizeof(Oid) * ncols);
3754         *procs = funcs = (Oid *) palloc(sizeof(Oid) * ncols);
3755         *strategies = strats = (uint16 *) palloc(sizeof(uint16) * ncols);
3756
3757         /* Quick exit if we have the data cached already */
3758         if (indexRelation->rd_exclstrats != NULL)
3759         {
3760                 memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * ncols);
3761                 memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * ncols);
3762                 memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * ncols);
3763                 return;
3764         }
3765
3766         /*
3767          * Search pg_constraint for the constraint associated with the index. To
3768          * make this not too painfully slow, we use the index on conrelid; that
3769          * will hold the parent relation's OID not the index's own OID.
3770          */
3771         ScanKeyInit(&skey[0],
3772                                 Anum_pg_constraint_conrelid,
3773                                 BTEqualStrategyNumber, F_OIDEQ,
3774                                 ObjectIdGetDatum(indexRelation->rd_index->indrelid));
3775
3776         conrel = heap_open(ConstraintRelationId, AccessShareLock);
3777         conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
3778                                                                  SnapshotNow, 1, skey);
3779         found = false;
3780
3781         while (HeapTupleIsValid(htup = systable_getnext(conscan)))
3782         {
3783                 Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
3784                 Datum           val;
3785                 bool            isnull;
3786                 ArrayType  *arr;
3787                 int                     nelem;
3788
3789                 /* We want the exclusion constraint owning the index */
3790                 if (conform->contype != CONSTRAINT_EXCLUSION ||
3791                         conform->conindid != RelationGetRelid(indexRelation))
3792                         continue;
3793
3794                 /* There should be only one */
3795                 if (found)
3796                         elog(ERROR, "unexpected exclusion constraint record found for rel %s",
3797                                  RelationGetRelationName(indexRelation));
3798                 found = true;
3799
3800                 /* Extract the operator OIDS from conexclop */
3801                 val = fastgetattr(htup,
3802                                                   Anum_pg_constraint_conexclop,
3803                                                   conrel->rd_att, &isnull);
3804                 if (isnull)
3805                         elog(ERROR, "null conexclop for rel %s",
3806                                  RelationGetRelationName(indexRelation));
3807
3808                 arr = DatumGetArrayTypeP(val);  /* ensure not toasted */
3809                 nelem = ARR_DIMS(arr)[0];
3810                 if (ARR_NDIM(arr) != 1 ||
3811                         nelem != ncols ||
3812                         ARR_HASNULL(arr) ||
3813                         ARR_ELEMTYPE(arr) != OIDOID)
3814                         elog(ERROR, "conexclop is not a 1-D Oid array");
3815
3816                 memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * ncols);
3817         }
3818
3819         systable_endscan(conscan);
3820         heap_close(conrel, AccessShareLock);
3821
3822         if (!found)
3823                 elog(ERROR, "exclusion constraint record missing for rel %s",
3824                          RelationGetRelationName(indexRelation));
3825
3826         /* We need the func OIDs and strategy numbers too */
3827         for (i = 0; i < ncols; i++)
3828         {
3829                 funcs[i] = get_opcode(ops[i]);
3830                 strats[i] = get_op_opfamily_strategy(ops[i],
3831                                                                                          indexRelation->rd_opfamily[i]);
3832                 /* shouldn't fail, since it was checked at index creation */
3833                 if (strats[i] == InvalidStrategy)
3834                         elog(ERROR, "could not find strategy for operator %u in family %u",
3835                                  ops[i], indexRelation->rd_opfamily[i]);
3836         }
3837
3838         /* Save a copy of the results in the relcache entry. */
3839         oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
3840         indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * ncols);
3841         indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * ncols);
3842         indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * ncols);
3843         memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * ncols);
3844         memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * ncols);
3845         memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * ncols);
3846         MemoryContextSwitchTo(oldcxt);
3847 }
3848
3849
3850 /*
3851  *      load_relcache_init_file, write_relcache_init_file
3852  *
3853  *              In late 1992, we started regularly having databases with more than
3854  *              a thousand classes in them.  With this number of classes, it became
3855  *              critical to do indexed lookups on the system catalogs.
3856  *
3857  *              Bootstrapping these lookups is very hard.  We want to be able to
3858  *              use an index on pg_attribute, for example, but in order to do so,
3859  *              we must have read pg_attribute for the attributes in the index,
3860  *              which implies that we need to use the index.
3861  *
3862  *              In order to get around the problem, we do the following:
3863  *
3864  *                 +  When the database system is initialized (at initdb time), we
3865  *                        don't use indexes.  We do sequential scans.
3866  *
3867  *                 +  When the backend is started up in normal mode, we load an image
3868  *                        of the appropriate relation descriptors, in internal format,
3869  *                        from an initialization file in the data/base/... directory.
3870  *
3871  *                 +  If the initialization file isn't there, then we create the
3872  *                        relation descriptors using sequential scans and write 'em to
3873  *                        the initialization file for use by subsequent backends.
3874  *
3875  *              As of Postgres 9.0, there is one local initialization file in each
3876  *              database, plus one shared initialization file for shared catalogs.
3877  *
3878  *              We could dispense with the initialization files and just build the
3879  *              critical reldescs the hard way on every backend startup, but that
3880  *              slows down backend startup noticeably.
3881  *
3882  *              We can in fact go further, and save more relcache entries than
3883  *              just the ones that are absolutely critical; this allows us to speed
3884  *              up backend startup by not having to build such entries the hard way.
3885  *              Presently, all the catalog and index entries that are referred to
3886  *              by catcaches are stored in the initialization files.
3887  *
3888  *              The same mechanism that detects when catcache and relcache entries
3889  *              need to be invalidated (due to catalog updates) also arranges to
3890  *              unlink the initialization files when the contents may be out of date.
3891  *              The files will then be rebuilt during the next backend startup.
3892  */
3893
3894 /*
3895  * load_relcache_init_file -- attempt to load cache from the shared
3896  * or local cache init file
3897  *
3898  * If successful, return TRUE and set criticalRelcachesBuilt or
3899  * criticalSharedRelcachesBuilt to true.
3900  * If not successful, return FALSE.
3901  *
3902  * NOTE: we assume we are already switched into CacheMemoryContext.
3903  */
3904 static bool
3905 load_relcache_init_file(bool shared)
3906 {
3907         FILE       *fp;
3908         char            initfilename[MAXPGPATH];
3909         Relation   *rels;
3910         int                     relno,
3911                                 num_rels,
3912                                 max_rels,
3913                                 nailed_rels,
3914                                 nailed_indexes,
3915                                 magic;
3916         int                     i;
3917
3918         if (shared)
3919                 snprintf(initfilename, sizeof(initfilename), "global/%s",
3920                                  RELCACHE_INIT_FILENAME);
3921         else
3922                 snprintf(initfilename, sizeof(initfilename), "%s/%s",
3923                                  DatabasePath, RELCACHE_INIT_FILENAME);
3924
3925         fp = AllocateFile(initfilename, PG_BINARY_R);
3926         if (fp == NULL)
3927                 return false;
3928
3929         /*
3930          * Read the index relcache entries from the file.  Note we will not enter
3931          * any of them into the cache if the read fails partway through; this
3932          * helps to guard against broken init files.
3933          */
3934         max_rels = 100;
3935         rels = (Relation *) palloc(max_rels * sizeof(Relation));
3936         num_rels = 0;
3937         nailed_rels = nailed_indexes = 0;
3938
3939         /* check for correct magic number (compatible version) */
3940         if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
3941                 goto read_failed;
3942         if (magic != RELCACHE_INIT_FILEMAGIC)
3943                 goto read_failed;
3944
3945         for (relno = 0;; relno++)
3946         {
3947                 Size            len;
3948                 size_t          nread;
3949                 Relation        rel;
3950                 Form_pg_class relform;
3951                 bool            has_not_null;
3952
3953                 /* first read the relation descriptor length */
3954                 nread = fread(&len, 1, sizeof(len), fp);
3955                 if (nread != sizeof(len))
3956                 {
3957                         if (nread == 0)
3958                                 break;                  /* end of file */
3959                         goto read_failed;
3960                 }
3961
3962                 /* safety check for incompatible relcache layout */
3963                 if (len != sizeof(RelationData))
3964                         goto read_failed;
3965
3966                 /* allocate another relcache header */
3967                 if (num_rels >= max_rels)
3968                 {
3969                         max_rels *= 2;
3970                         rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
3971                 }
3972
3973                 rel = rels[num_rels++] = (Relation) palloc(len);
3974
3975                 /* then, read the Relation structure */
3976                 if (fread(rel, 1, len, fp) != len)
3977                         goto read_failed;
3978
3979                 /* next read the relation tuple form */
3980                 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
3981                         goto read_failed;
3982
3983                 relform = (Form_pg_class) palloc(len);
3984                 if (fread(relform, 1, len, fp) != len)
3985                         goto read_failed;
3986
3987                 rel->rd_rel = relform;
3988
3989                 /* initialize attribute tuple forms */
3990                 rel->rd_att = CreateTemplateTupleDesc(relform->relnatts,
3991                                                                                           relform->relhasoids);
3992                 rel->rd_att->tdrefcount = 1;    /* mark as refcounted */
3993
3994                 rel->rd_att->tdtypeid = relform->reltype;
3995                 rel->rd_att->tdtypmod = -1;             /* unnecessary, but... */
3996
3997                 /* next read all the attribute tuple form data entries */
3998                 has_not_null = false;
3999                 for (i = 0; i < relform->relnatts; i++)
4000                 {
4001                         if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4002                                 goto read_failed;
4003                         if (len != ATTRIBUTE_FIXED_PART_SIZE)
4004                                 goto read_failed;
4005                         if (fread(rel->rd_att->attrs[i], 1, len, fp) != len)
4006                                 goto read_failed;
4007
4008                         has_not_null |= rel->rd_att->attrs[i]->attnotnull;
4009                 }
4010
4011                 /* next read the access method specific field */
4012                 if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4013                         goto read_failed;
4014                 if (len > 0)
4015                 {
4016                         rel->rd_options = palloc(len);
4017                         if (fread(rel->rd_options, 1, len, fp) != len)
4018                                 goto read_failed;
4019                         if (len != VARSIZE(rel->rd_options))
4020                                 goto read_failed;               /* sanity check */
4021                 }
4022                 else
4023                 {
4024                         rel->rd_options = NULL;
4025                 }
4026
4027                 /* mark not-null status */
4028                 if (has_not_null)
4029                 {
4030                         TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
4031
4032                         constr->has_not_null = true;
4033                         rel->rd_att->constr = constr;
4034                 }
4035
4036                 /* If it's an index, there's more to do */
4037                 if (rel->rd_rel->relkind == RELKIND_INDEX)
4038                 {
4039                         Form_pg_am      am;
4040                         MemoryContext indexcxt;
4041                         Oid                *opfamily;
4042                         Oid                *opcintype;
4043                         Oid                *operator;
4044                         RegProcedure *support;
4045                         int                     nsupport;
4046                         int16      *indoption;
4047
4048                         /* Count nailed indexes to ensure we have 'em all */
4049                         if (rel->rd_isnailed)
4050                                 nailed_indexes++;
4051
4052                         /* next, read the pg_index tuple */
4053                         if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4054                                 goto read_failed;
4055
4056                         rel->rd_indextuple = (HeapTuple) palloc(len);
4057                         if (fread(rel->rd_indextuple, 1, len, fp) != len)
4058                                 goto read_failed;
4059
4060                         /* Fix up internal pointers in the tuple -- see heap_copytuple */
4061                         rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
4062                         rel->rd_index = (Form_pg_index) GETSTRUCT(rel->rd_indextuple);
4063
4064                         /* next, read the access method tuple form */
4065                         if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4066                                 goto read_failed;
4067
4068                         am = (Form_pg_am) palloc(len);
4069                         if (fread(am, 1, len, fp) != len)
4070                                 goto read_failed;
4071                         rel->rd_am = am;
4072
4073                         /*
4074                          * prepare index info context --- parameters should match
4075                          * RelationInitIndexAccessInfo
4076                          */
4077                         indexcxt = AllocSetContextCreate(CacheMemoryContext,
4078                                                                                          RelationGetRelationName(rel),
4079                                                                                          ALLOCSET_SMALL_MINSIZE,
4080                                                                                          ALLOCSET_SMALL_INITSIZE,
4081                                                                                          ALLOCSET_SMALL_MAXSIZE);
4082                         rel->rd_indexcxt = indexcxt;
4083
4084                         /* next, read the vector of opfamily OIDs */
4085                         if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4086                                 goto read_failed;
4087
4088                         opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
4089                         if (fread(opfamily, 1, len, fp) != len)
4090                                 goto read_failed;
4091
4092                         rel->rd_opfamily = opfamily;
4093
4094                         /* next, read the vector of opcintype OIDs */
4095                         if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4096                                 goto read_failed;
4097
4098                         opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
4099                         if (fread(opcintype, 1, len, fp) != len)
4100                                 goto read_failed;
4101
4102                         rel->rd_opcintype = opcintype;
4103
4104                         /* next, read the vector of operator OIDs */
4105                         if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4106                                 goto read_failed;
4107
4108                         operator = (Oid *) MemoryContextAlloc(indexcxt, len);
4109                         if (fread(operator, 1, len, fp) != len)
4110                                 goto read_failed;
4111
4112                         rel->rd_operator = operator;
4113
4114                         /* next, read the vector of support procedures */
4115                         if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4116                                 goto read_failed;
4117                         support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
4118                         if (fread(support, 1, len, fp) != len)
4119                                 goto read_failed;
4120
4121                         rel->rd_support = support;
4122
4123                         /* finally, read the vector of indoption values */
4124                         if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
4125                                 goto read_failed;
4126
4127                         indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
4128                         if (fread(indoption, 1, len, fp) != len)
4129                                 goto read_failed;
4130
4131                         rel->rd_indoption = indoption;
4132
4133                         /* set up zeroed fmgr-info vectors */
4134                         rel->rd_aminfo = (RelationAmInfo *)
4135                                 MemoryContextAllocZero(indexcxt, sizeof(RelationAmInfo));
4136                         nsupport = relform->relnatts * am->amsupport;
4137                         rel->rd_supportinfo = (FmgrInfo *)
4138                                 MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
4139                 }
4140                 else
4141                 {
4142                         /* Count nailed rels to ensure we have 'em all */
4143                         if (rel->rd_isnailed)
4144                                 nailed_rels++;
4145
4146                         Assert(rel->rd_index == NULL);
4147                         Assert(rel->rd_indextuple == NULL);
4148                         Assert(rel->rd_am == NULL);
4149                         Assert(rel->rd_indexcxt == NULL);
4150                         Assert(rel->rd_aminfo == NULL);
4151                         Assert(rel->rd_opfamily == NULL);
4152                         Assert(rel->rd_opcintype == NULL);
4153                         Assert(rel->rd_operator == NULL);
4154                         Assert(rel->rd_support == NULL);
4155                         Assert(rel->rd_supportinfo == NULL);
4156                         Assert(rel->rd_indoption == NULL);
4157                 }
4158
4159                 /*
4160                  * Rules and triggers are not saved (mainly because the internal
4161                  * format is complex and subject to change).  They must be rebuilt if
4162                  * needed by RelationCacheInitializePhase3.  This is not expected to
4163                  * be a big performance hit since few system catalogs have such. Ditto
4164                  * for index expressions, predicates, and exclusion info.
4165                  */
4166                 rel->rd_rules = NULL;
4167                 rel->rd_rulescxt = NULL;
4168                 rel->trigdesc = NULL;
4169                 rel->rd_indexprs = NIL;
4170                 rel->rd_indpred = NIL;
4171                 rel->rd_exclops = NULL;
4172                 rel->rd_exclprocs = NULL;
4173                 rel->rd_exclstrats = NULL;
4174
4175                 /*
4176                  * Reset transient-state fields in the relcache entry
4177                  */
4178                 rel->rd_smgr = NULL;
4179                 if (rel->rd_isnailed)
4180                         rel->rd_refcnt = 1;
4181                 else
4182                         rel->rd_refcnt = 0;
4183                 rel->rd_indexvalid = 0;
4184                 rel->rd_indexlist = NIL;
4185                 rel->rd_indexattr = NULL;
4186                 rel->rd_oidindex = InvalidOid;
4187                 rel->rd_createSubid = InvalidSubTransactionId;
4188                 rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
4189                 rel->rd_amcache = NULL;
4190                 MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
4191
4192                 /*
4193                  * Recompute lock and physical addressing info.  This is needed in
4194                  * case the pg_internal.init file was copied from some other database
4195                  * by CREATE DATABASE.
4196                  */
4197                 RelationInitLockInfo(rel);
4198                 RelationInitPhysicalAddr(rel);
4199         }
4200
4201         /*
4202          * We reached the end of the init file without apparent problem. Did we
4203          * get the right number of nailed items?  (This is a useful crosscheck in
4204          * case the set of critical rels or indexes changes.)
4205          */
4206         if (shared)
4207         {
4208                 if (nailed_rels != NUM_CRITICAL_SHARED_RELS ||
4209                         nailed_indexes != NUM_CRITICAL_SHARED_INDEXES)
4210                         goto read_failed;
4211         }
4212         else
4213         {
4214                 if (nailed_rels != NUM_CRITICAL_LOCAL_RELS ||
4215                         nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES)
4216                         goto read_failed;
4217         }
4218
4219         /*
4220          * OK, all appears well.
4221          *
4222          * Now insert all the new relcache entries into the cache.
4223          */
4224         for (relno = 0; relno < num_rels; relno++)
4225         {
4226                 RelationCacheInsert(rels[relno]);
4227                 /* also make a list of their OIDs, for RelationIdIsInInitFile */
4228                 if (!shared)
4229                         initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
4230                                                                                         initFileRelationIds);
4231         }
4232
4233         pfree(rels);
4234         FreeFile(fp);
4235
4236         if (shared)
4237                 criticalSharedRelcachesBuilt = true;
4238         else
4239                 criticalRelcachesBuilt = true;
4240         return true;
4241
4242         /*
4243          * init file is broken, so do it the hard way.  We don't bother trying to
4244          * free the clutter we just allocated; it's not in the relcache so it
4245          * won't hurt.
4246          */
4247 read_failed:
4248         pfree(rels);
4249         FreeFile(fp);
4250
4251         return false;
4252 }
4253
4254 /*
4255  * Write out a new initialization file with the current contents
4256  * of the relcache (either shared rels or local rels, as indicated).
4257  */
4258 static void
4259 write_relcache_init_file(bool shared)
4260 {
4261         FILE       *fp;
4262         char            tempfilename[MAXPGPATH];
4263         char            finalfilename[MAXPGPATH];
4264         int                     magic;
4265         HASH_SEQ_STATUS status;
4266         RelIdCacheEnt *idhentry;
4267         MemoryContext oldcxt;
4268         int                     i;
4269
4270         /*
4271          * We must write a temporary file and rename it into place. Otherwise,
4272          * another backend starting at about the same time might crash trying to
4273          * read the partially-complete file.
4274          */
4275         if (shared)
4276         {
4277                 snprintf(tempfilename, sizeof(tempfilename), "global/%s.%d",
4278                                  RELCACHE_INIT_FILENAME, MyProcPid);
4279                 snprintf(finalfilename, sizeof(finalfilename), "global/%s",
4280                                  RELCACHE_INIT_FILENAME);
4281         }
4282         else
4283         {
4284                 snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
4285                                  DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
4286                 snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
4287                                  DatabasePath, RELCACHE_INIT_FILENAME);
4288         }
4289
4290         unlink(tempfilename);           /* in case it exists w/wrong permissions */
4291
4292         fp = AllocateFile(tempfilename, PG_BINARY_W);
4293         if (fp == NULL)
4294         {
4295                 /*
4296                  * We used to consider this a fatal error, but we might as well
4297                  * continue with backend startup ...
4298                  */
4299                 ereport(WARNING,
4300                                 (errcode_for_file_access(),
4301                                  errmsg("could not create relation-cache initialization file \"%s\": %m",
4302                                                 tempfilename),
4303                           errdetail("Continuing anyway, but there's something wrong.")));
4304                 return;
4305         }
4306
4307         /*
4308          * Write a magic number to serve as a file version identifier.  We can
4309          * change the magic number whenever the relcache layout changes.
4310          */
4311         magic = RELCACHE_INIT_FILEMAGIC;
4312         if (fwrite(&magic, 1, sizeof(magic), fp) != sizeof(magic))
4313                 elog(FATAL, "could not write init file");
4314
4315         /*
4316          * Write all the appropriate reldescs (in no particular order).
4317          */
4318         hash_seq_init(&status, RelationIdCache);
4319
4320         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
4321         {
4322                 Relation        rel = idhentry->reldesc;
4323                 Form_pg_class relform = rel->rd_rel;
4324
4325                 /* ignore if not correct group */
4326                 if (relform->relisshared != shared)
4327                         continue;
4328
4329                 /* first write the relcache entry proper */
4330                 write_item(rel, sizeof(RelationData), fp);
4331
4332                 /* next write the relation tuple form */
4333                 write_item(relform, CLASS_TUPLE_SIZE, fp);
4334
4335                 /* next, do all the attribute tuple form data entries */
4336                 for (i = 0; i < relform->relnatts; i++)
4337                 {
4338                         write_item(rel->rd_att->attrs[i], ATTRIBUTE_FIXED_PART_SIZE, fp);
4339                 }
4340
4341                 /* next, do the access method specific field */
4342                 write_item(rel->rd_options,
4343                                    (rel->rd_options ? VARSIZE(rel->rd_options) : 0),
4344                                    fp);
4345
4346                 /* If it's an index, there's more to do */
4347                 if (rel->rd_rel->relkind == RELKIND_INDEX)
4348                 {
4349                         Form_pg_am      am = rel->rd_am;
4350
4351                         /* write the pg_index tuple */
4352                         /* we assume this was created by heap_copytuple! */
4353                         write_item(rel->rd_indextuple,
4354                                            HEAPTUPLESIZE + rel->rd_indextuple->t_len,
4355                                            fp);
4356
4357                         /* next, write the access method tuple form */
4358                         write_item(am, sizeof(FormData_pg_am), fp);
4359
4360                         /* next, write the vector of opfamily OIDs */
4361                         write_item(rel->rd_opfamily,
4362                                            relform->relnatts * sizeof(Oid),
4363                                            fp);
4364
4365                         /* next, write the vector of opcintype OIDs */
4366                         write_item(rel->rd_opcintype,
4367                                            relform->relnatts * sizeof(Oid),
4368                                            fp);
4369
4370                         /* next, write the vector of operator OIDs */
4371                         write_item(rel->rd_operator,
4372                                            relform->relnatts * (am->amstrategies * sizeof(Oid)),
4373                                            fp);
4374
4375                         /* next, write the vector of support procedures */
4376                         write_item(rel->rd_support,
4377                                   relform->relnatts * (am->amsupport * sizeof(RegProcedure)),
4378                                            fp);
4379
4380                         /* finally, write the vector of indoption values */
4381                         write_item(rel->rd_indoption,
4382                                            relform->relnatts * sizeof(int16),
4383                                            fp);
4384                 }
4385
4386                 /* also make a list of their OIDs, for RelationIdIsInInitFile */
4387                 if (!shared)
4388                 {
4389                         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4390                         initFileRelationIds = lcons_oid(RelationGetRelid(rel),
4391                                                                                         initFileRelationIds);
4392                         MemoryContextSwitchTo(oldcxt);
4393                 }
4394         }
4395
4396         if (FreeFile(fp))
4397                 elog(FATAL, "could not write init file");
4398
4399         /*
4400          * Now we have to check whether the data we've so painstakingly
4401          * accumulated is already obsolete due to someone else's just-committed
4402          * catalog changes.  If so, we just delete the temp file and leave it to
4403          * the next backend to try again.  (Our own relcache entries will be
4404          * updated by SI message processing, but we can't be sure whether what we
4405          * wrote out was up-to-date.)
4406          *
4407          * This mustn't run concurrently with RelationCacheInitFileInvalidate, so
4408          * grab a serialization lock for the duration.
4409          */
4410         LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
4411
4412         /* Make sure we have seen all incoming SI messages */
4413         AcceptInvalidationMessages();
4414
4415         /*
4416          * If we have received any SI relcache invals since backend start, assume
4417          * we may have written out-of-date data.
4418          */
4419         if (relcacheInvalsReceived == 0L)
4420         {
4421                 /*
4422                  * OK, rename the temp file to its final name, deleting any
4423                  * previously-existing init file.
4424                  *
4425                  * Note: a failure here is possible under Cygwin, if some other
4426                  * backend is holding open an unlinked-but-not-yet-gone init file. So
4427                  * treat this as a noncritical failure; just remove the useless temp
4428                  * file on failure.
4429                  */
4430                 if (rename(tempfilename, finalfilename) < 0)
4431                         unlink(tempfilename);
4432         }
4433         else
4434         {
4435                 /* Delete the already-obsolete temp file */
4436                 unlink(tempfilename);
4437         }
4438
4439         LWLockRelease(RelCacheInitLock);
4440 }
4441
4442 /* write a chunk of data preceded by its length */
4443 static void
4444 write_item(const void *data, Size len, FILE *fp)
4445 {
4446         if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
4447                 elog(FATAL, "could not write init file");
4448         if (fwrite(data, 1, len, fp) != len)
4449                 elog(FATAL, "could not write init file");
4450 }
4451
4452 /*
4453  * Detect whether a given relation (identified by OID) is one of the ones
4454  * we store in the local relcache init file.
4455  *
4456  * Note that we effectively assume that all backends running in a database
4457  * would choose to store the same set of relations in the init file;
4458  * otherwise there are cases where we'd fail to detect the need for an init
4459  * file invalidation.  This does not seem likely to be a problem in practice.
4460  */
4461 bool
4462 RelationIdIsInInitFile(Oid relationId)
4463 {
4464         return list_member_oid(initFileRelationIds, relationId);
4465 }
4466
4467 /*
4468  * Invalidate (remove) the init file during commit of a transaction that
4469  * changed one or more of the relation cache entries that are kept in the
4470  * local init file.
4471  *
4472  * We actually need to remove the init file twice: once just before sending
4473  * the SI messages that include relcache inval for such relations, and once
4474  * just after sending them.  The unlink before ensures that a backend that's
4475  * currently starting cannot read the now-obsolete init file and then miss
4476  * the SI messages that will force it to update its relcache entries.  (This
4477  * works because the backend startup sequence gets into the PGPROC array before
4478  * trying to load the init file.)  The unlink after is to synchronize with a
4479  * backend that may currently be trying to write an init file based on data
4480  * that we've just rendered invalid.  Such a backend will see the SI messages,
4481  * but we can't leave the init file sitting around to fool later backends.
4482  *
4483  * Ignore any failure to unlink the file, since it might not be there if
4484  * no backend has been started since the last removal.
4485  *
4486  * Notice this deals only with the local init file, not the shared init file.
4487  * The reason is that there can never be a "significant" change to the
4488  * relcache entry of a shared relation; the most that could happen is
4489  * updates of noncritical fields such as relpages/reltuples.  So, while
4490  * it's worth updating the shared init file from time to time, it can never
4491  * be invalid enough to make it necessary to remove it.
4492  */
4493 void
4494 RelationCacheInitFileInvalidate(bool beforeSend)
4495 {
4496         char            initfilename[MAXPGPATH];
4497
4498         snprintf(initfilename, sizeof(initfilename), "%s/%s",
4499                          DatabasePath, RELCACHE_INIT_FILENAME);
4500
4501         if (beforeSend)
4502         {
4503                 /* no interlock needed here */
4504                 unlink(initfilename);
4505         }
4506         else
4507         {
4508                 /*
4509                  * We need to interlock this against write_relcache_init_file, to
4510                  * guard against possibility that someone renames a new-but-
4511                  * already-obsolete init file into place just after we unlink. With
4512                  * the interlock, it's certain that write_relcache_init_file will
4513                  * notice our SI inval message before renaming into place, or else
4514                  * that we will execute second and successfully unlink the file.
4515                  */
4516                 LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
4517                 unlink(initfilename);
4518                 LWLockRelease(RelCacheInitLock);
4519         }
4520 }
4521
4522 /*
4523  * Remove the init files during postmaster startup.
4524  *
4525  * We used to keep the init files across restarts, but that is unsafe in PITR
4526  * scenarios, and even in simple crash-recovery cases there are windows for
4527  * the init files to become out-of-sync with the database.      So now we just
4528  * remove them during startup and expect the first backend launch to rebuild
4529  * them.  Of course, this has to happen in each database of the cluster.
4530  */
4531 void
4532 RelationCacheInitFileRemove(void)
4533 {
4534         const char *tblspcdir = "pg_tblspc";
4535         DIR                *dir;
4536         struct dirent *de;
4537         char            path[MAXPGPATH];
4538
4539         /*
4540          * We zap the shared cache file too.  In theory it can't get out of sync
4541          * enough to be a problem, but in data-corruption cases, who knows ...
4542          */
4543         snprintf(path, sizeof(path), "global/%s",
4544                          RELCACHE_INIT_FILENAME);
4545         unlink_initfile(path);
4546
4547         /* Scan everything in the default tablespace */
4548         RelationCacheInitFileRemoveInDir("base");
4549
4550         /* Scan the tablespace link directory to find non-default tablespaces */
4551         dir = AllocateDir(tblspcdir);
4552         if (dir == NULL)
4553         {
4554                 elog(LOG, "could not open tablespace link directory \"%s\": %m",
4555                          tblspcdir);
4556                 return;
4557         }
4558
4559         while ((de = ReadDir(dir, tblspcdir)) != NULL)
4560         {
4561                 if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
4562                 {
4563                         /* Scan the tablespace dir for per-database dirs */
4564                         snprintf(path, sizeof(path), "%s/%s/%s",
4565                                          tblspcdir, de->d_name, TABLESPACE_VERSION_DIRECTORY);
4566                         RelationCacheInitFileRemoveInDir(path);
4567                 }
4568         }
4569
4570         FreeDir(dir);
4571 }
4572
4573 /* Process one per-tablespace directory for RelationCacheInitFileRemove */
4574 static void
4575 RelationCacheInitFileRemoveInDir(const char *tblspcpath)
4576 {
4577         DIR                *dir;
4578         struct dirent *de;
4579         char            initfilename[MAXPGPATH];
4580
4581         /* Scan the tablespace directory to find per-database directories */
4582         dir = AllocateDir(tblspcpath);
4583         if (dir == NULL)
4584         {
4585                 elog(LOG, "could not open tablespace directory \"%s\": %m",
4586                          tblspcpath);
4587                 return;
4588         }
4589
4590         while ((de = ReadDir(dir, tblspcpath)) != NULL)
4591         {
4592                 if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
4593                 {
4594                         /* Try to remove the init file in each database */
4595                         snprintf(initfilename, sizeof(initfilename), "%s/%s/%s",
4596                                          tblspcpath, de->d_name, RELCACHE_INIT_FILENAME);
4597                         unlink_initfile(initfilename);
4598                 }
4599         }
4600
4601         FreeDir(dir);
4602 }
4603
4604 static void
4605 unlink_initfile(const char *initfilename)
4606 {
4607         if (unlink(initfilename) < 0)
4608         {
4609                 /* It might not be there, but log any error other than ENOENT */
4610                 if (errno != ENOENT)
4611                         elog(LOG, "could not remove cache file \"%s\": %m", initfilename);
4612         }
4613 }