OSDN Git Service

Arrange to cache btree metapage data in the relcache entry for the index,
[pg-rex/syncrep.git] / src / backend / utils / cache / relcache.c
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  *        POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.239 2006/04/25 22:46:05 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *              RelationCacheInitialize                 - initialize relcache
18  *              RelationCacheInitializePhase2   - finish initializing relcache
19  *              RelationIdGetRelation                   - get a reldesc by relation id
20  *              RelationIdCacheGetRelation              - get a cached reldesc by relid
21  *              RelationClose                                   - close an open relation
22  *
23  * NOTES
24  *              The following code contains many undocumented hacks.  Please be
25  *              careful....
26  */
27 #include "postgres.h"
28
29 #include <sys/file.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32
33 #include "access/genam.h"
34 #include "access/heapam.h"
35 #include "catalog/catalog.h"
36 #include "catalog/indexing.h"
37 #include "catalog/namespace.h"
38 #include "catalog/pg_amop.h"
39 #include "catalog/pg_amproc.h"
40 #include "catalog/pg_attrdef.h"
41 #include "catalog/pg_attribute.h"
42 #include "catalog/pg_authid.h"
43 #include "catalog/pg_constraint.h"
44 #include "catalog/pg_index.h"
45 #include "catalog/pg_namespace.h"
46 #include "catalog/pg_opclass.h"
47 #include "catalog/pg_proc.h"
48 #include "catalog/pg_rewrite.h"
49 #include "catalog/pg_type.h"
50 #include "commands/trigger.h"
51 #include "miscadmin.h"
52 #include "optimizer/clauses.h"
53 #include "optimizer/planmain.h"
54 #include "optimizer/prep.h"
55 #include "storage/fd.h"
56 #include "storage/smgr.h"
57 #include "utils/builtins.h"
58 #include "utils/catcache.h"
59 #include "utils/fmgroids.h"
60 #include "utils/inval.h"
61 #include "utils/lsyscache.h"
62 #include "utils/memutils.h"
63 #include "utils/relcache.h"
64 #include "utils/resowner.h"
65 #include "utils/syscache.h"
66 #include "utils/typcache.h"
67
68
69 /*
70  * name of relcache init file, used to speed up backend startup
71  */
72 #define RELCACHE_INIT_FILENAME  "pg_internal.init"
73
74 #define RELCACHE_INIT_FILEMAGIC         0x573262        /* version ID value */
75
76 /*
77  *              hardcoded tuple descriptors.  see include/catalog/pg_attribute.h
78  */
79 static FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
80 static FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
81 static FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
82 static FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
83 static FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
84
85 /*
86  *              Hash tables that index the relation cache
87  *
88  *              We used to index the cache by both name and OID, but now there
89  *              is only an index by OID.
90  */
91 typedef struct relidcacheent
92 {
93         Oid                     reloid;
94         Relation        reldesc;
95 } RelIdCacheEnt;
96
97 static HTAB *RelationIdCache;
98
99 /*
100  * This flag is false until we have prepared the critical relcache entries
101  * that are needed to do indexscans on the tables read by relcache building.
102  */
103 bool            criticalRelcachesBuilt = false;
104
105 /*
106  * This flag is set if we discover that we need to write a new relcache
107  * cache file at the end of startup.
108  */
109 static bool needNewCacheFile = false;
110
111 /*
112  * This counter counts relcache inval events received since backend startup
113  * (but only for rels that are actually in cache).      Presently, we use it only
114  * to detect whether data about to be written by write_relcache_init_file()
115  * might already be obsolete.
116  */
117 static long relcacheInvalsReceived = 0L;
118
119 /*
120  * This list remembers the OIDs of the relations cached in the relcache
121  * init file.
122  */
123 static List *initFileRelationIds = NIL;
124
125 /*
126  * This flag lets us optimize away work in AtEO(Sub)Xact_RelationCache().
127  */
128 static bool need_eoxact_work = false;
129
130
131 /*
132  *              macros to manipulate the lookup hashtables
133  */
134 #define RelationCacheInsert(RELATION)   \
135 do { \
136         RelIdCacheEnt *idhentry; bool found; \
137         idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
138                                                                                    (void *) &(RELATION->rd_id), \
139                                                                                    HASH_ENTER, \
140                                                                                    &found); \
141         /* used to give notice if found -- now just keep quiet */ \
142         idhentry->reldesc = RELATION; \
143 } while(0)
144
145 #define RelationIdCacheLookup(ID, RELATION) \
146 do { \
147         RelIdCacheEnt *hentry; \
148         hentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
149                                                                                  (void *) &(ID), HASH_FIND,NULL); \
150         if (hentry) \
151                 RELATION = hentry->reldesc; \
152         else \
153                 RELATION = NULL; \
154 } while(0)
155
156 #define RelationCacheDelete(RELATION) \
157 do { \
158         RelIdCacheEnt *idhentry; \
159         idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
160                                                                                    (void *) &(RELATION->rd_id), \
161                                                                                    HASH_REMOVE, NULL); \
162         if (idhentry == NULL) \
163                 elog(WARNING, "trying to delete a rd_id reldesc that does not exist"); \
164 } while(0)
165
166
167 /*
168  * Special cache for opclass-related information
169  *
170  * Note: only default-subtype operators and support procs get cached
171  */
172 typedef struct opclasscacheent
173 {
174         Oid                     opclassoid;             /* lookup key: OID of opclass */
175         bool            valid;                  /* set TRUE after successful fill-in */
176         StrategyNumber numStrats;       /* max # of strategies (from pg_am) */
177         StrategyNumber numSupport;      /* max # of support procs (from pg_am) */
178         Oid                *operatorOids;       /* strategy operators' OIDs */
179         RegProcedure *supportProcs; /* support procs */
180 } OpClassCacheEnt;
181
182 static HTAB *OpClassCache = NULL;
183
184
185 /* non-export function prototypes */
186
187 static void RelationClearRelation(Relation relation, bool rebuild);
188
189 static void RelationReloadClassinfo(Relation relation);
190 static void RelationFlushRelation(Relation relation);
191 static bool load_relcache_init_file(void);
192 static void write_relcache_init_file(void);
193
194 static void formrdesc(const char *relationName, Oid relationReltype,
195                   bool hasoids, int natts, FormData_pg_attribute *att);
196
197 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK);
198 static Relation AllocateRelationDesc(Relation relation, Form_pg_class relp);
199 static void RelationBuildTupleDesc(Relation relation);
200 static Relation RelationBuildDesc(Oid targetRelId, Relation oldrelation);
201 static void RelationInitPhysicalAddr(Relation relation);
202 static TupleDesc GetPgIndexDescriptor(void);
203 static void AttrDefaultFetch(Relation relation);
204 static void CheckConstraintFetch(Relation relation);
205 static List *insert_ordered_oid(List *list, Oid datum);
206 static void IndexSupportInitialize(oidvector *indclass,
207                                            Oid *indexOperator,
208                                            RegProcedure *indexSupport,
209                                            StrategyNumber maxStrategyNumber,
210                                            StrategyNumber maxSupportNumber,
211                                            AttrNumber maxAttributeNumber);
212 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
213                                   StrategyNumber numStrats,
214                                   StrategyNumber numSupport);
215
216
217 /*
218  *              ScanPgRelation
219  *
220  *              this is used by RelationBuildDesc to find a pg_class
221  *              tuple matching targetRelId.
222  *
223  *              NB: the returned tuple has been copied into palloc'd storage
224  *              and must eventually be freed with heap_freetuple.
225  */
226 static HeapTuple
227 ScanPgRelation(Oid targetRelId, bool indexOK)
228 {
229         HeapTuple       pg_class_tuple;
230         Relation        pg_class_desc;
231         SysScanDesc pg_class_scan;
232         ScanKeyData key[1];
233
234         /*
235          * form a scan key
236          */
237         ScanKeyInit(&key[0],
238                                 ObjectIdAttributeNumber,
239                                 BTEqualStrategyNumber, F_OIDEQ,
240                                 ObjectIdGetDatum(targetRelId));
241
242         /*
243          * Open pg_class and fetch a tuple.  Force heap scan if we haven't yet
244          * built the critical relcache entries (this includes initdb and startup
245          * without a pg_internal.init file).  The caller can also force a heap
246          * scan by setting indexOK == false.
247          */
248         pg_class_desc = heap_open(RelationRelationId, AccessShareLock);
249         pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
250                                                                            indexOK && criticalRelcachesBuilt,
251                                                                            SnapshotNow,
252                                                                            1, key);
253
254         pg_class_tuple = systable_getnext(pg_class_scan);
255
256         /*
257          * Must copy tuple before releasing buffer.
258          */
259         if (HeapTupleIsValid(pg_class_tuple))
260                 pg_class_tuple = heap_copytuple(pg_class_tuple);
261
262         /* all done */
263         systable_endscan(pg_class_scan);
264         heap_close(pg_class_desc, AccessShareLock);
265
266         return pg_class_tuple;
267 }
268
269 /*
270  *              AllocateRelationDesc
271  *
272  *              This is used to allocate memory for a new relation descriptor
273  *              and initialize the rd_rel field.
274  *
275  *              If 'relation' is NULL, allocate a new RelationData object.
276  *              If not, reuse the given object (that path is taken only when
277  *              we have to rebuild a relcache entry during RelationClearRelation).
278  */
279 static Relation
280 AllocateRelationDesc(Relation relation, Form_pg_class relp)
281 {
282         MemoryContext oldcxt;
283         Form_pg_class relationForm;
284
285         /* Relcache entries must live in CacheMemoryContext */
286         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
287
288         /*
289          * allocate space for new relation descriptor, if needed
290          */
291         if (relation == NULL)
292                 relation = (Relation) palloc(sizeof(RelationData));
293
294         /*
295          * clear all fields of reldesc
296          */
297         MemSet(relation, 0, sizeof(RelationData));
298         relation->rd_targblock = InvalidBlockNumber;
299
300         /* make sure relation is marked as having no open file yet */
301         relation->rd_smgr = NULL;
302
303         /*
304          * Copy the relation tuple form
305          *
306          * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE.
307          * relacl is NOT stored in the relcache --- there'd be little point in it,
308          * since we don't copy the tuple's nullvalues bitmap and hence wouldn't
309          * know if the value is valid ... bottom line is that relacl *cannot* be
310          * retrieved from the relcache.  Get it from the syscache if you need it.
311          */
312         relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
313
314         memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
315
316         /* initialize relation tuple form */
317         relation->rd_rel = relationForm;
318
319         /* and allocate attribute tuple form storage */
320         relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts,
321                                                                                            relationForm->relhasoids);
322
323         MemoryContextSwitchTo(oldcxt);
324
325         return relation;
326 }
327
328 /*
329  *              RelationBuildTupleDesc
330  *
331  *              Form the relation's tuple descriptor from information in
332  *              the pg_attribute, pg_attrdef & pg_constraint system catalogs.
333  */
334 static void
335 RelationBuildTupleDesc(Relation relation)
336 {
337         HeapTuple       pg_attribute_tuple;
338         Relation        pg_attribute_desc;
339         SysScanDesc pg_attribute_scan;
340         ScanKeyData skey[2];
341         int                     need;
342         TupleConstr *constr;
343         AttrDefault *attrdef = NULL;
344         int                     ndef = 0;
345
346         /* copy some fields from pg_class row to rd_att */
347         relation->rd_att->tdtypeid = relation->rd_rel->reltype;
348         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
349         relation->rd_att->tdhasoid = relation->rd_rel->relhasoids;
350
351         constr = (TupleConstr *) MemoryContextAlloc(CacheMemoryContext,
352                                                                                                 sizeof(TupleConstr));
353         constr->has_not_null = false;
354
355         /*
356          * Form a scan key that selects only user attributes (attnum > 0).
357          * (Eliminating system attribute rows at the index level is lots faster
358          * than fetching them.)
359          */
360         ScanKeyInit(&skey[0],
361                                 Anum_pg_attribute_attrelid,
362                                 BTEqualStrategyNumber, F_OIDEQ,
363                                 ObjectIdGetDatum(RelationGetRelid(relation)));
364         ScanKeyInit(&skey[1],
365                                 Anum_pg_attribute_attnum,
366                                 BTGreaterStrategyNumber, F_INT2GT,
367                                 Int16GetDatum(0));
368
369         /*
370          * Open pg_attribute and begin a scan.  Force heap scan if we haven't yet
371          * built the critical relcache entries (this includes initdb and startup
372          * without a pg_internal.init file).
373          */
374         pg_attribute_desc = heap_open(AttributeRelationId, AccessShareLock);
375         pg_attribute_scan = systable_beginscan(pg_attribute_desc,
376                                                                                    AttributeRelidNumIndexId,
377                                                                                    criticalRelcachesBuilt,
378                                                                                    SnapshotNow,
379                                                                                    2, skey);
380
381         /*
382          * add attribute data to relation->rd_att
383          */
384         need = relation->rd_rel->relnatts;
385
386         while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
387         {
388                 Form_pg_attribute attp;
389
390                 attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
391
392                 if (attp->attnum <= 0 ||
393                         attp->attnum > relation->rd_rel->relnatts)
394                         elog(ERROR, "invalid attribute number %d for %s",
395                                  attp->attnum, RelationGetRelationName(relation));
396
397                 memcpy(relation->rd_att->attrs[attp->attnum - 1],
398                            attp,
399                            ATTRIBUTE_TUPLE_SIZE);
400
401                 /* Update constraint/default info */
402                 if (attp->attnotnull)
403                         constr->has_not_null = true;
404
405                 if (attp->atthasdef)
406                 {
407                         if (attrdef == NULL)
408                                 attrdef = (AttrDefault *)
409                                         MemoryContextAllocZero(CacheMemoryContext,
410                                                                                    relation->rd_rel->relnatts *
411                                                                                    sizeof(AttrDefault));
412                         attrdef[ndef].adnum = attp->attnum;
413                         attrdef[ndef].adbin = NULL;
414                         ndef++;
415                 }
416                 need--;
417                 if (need == 0)
418                         break;
419         }
420
421         /*
422          * end the scan and close the attribute relation
423          */
424         systable_endscan(pg_attribute_scan);
425         heap_close(pg_attribute_desc, AccessShareLock);
426
427         if (need != 0)
428                 elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
429                          need, RelationGetRelid(relation));
430
431         /*
432          * The attcacheoff values we read from pg_attribute should all be -1
433          * ("unknown").  Verify this if assert checking is on.  They will be
434          * computed when and if needed during tuple access.
435          */
436 #ifdef USE_ASSERT_CHECKING
437         {
438                 int                     i;
439
440                 for (i = 0; i < relation->rd_rel->relnatts; i++)
441                         Assert(relation->rd_att->attrs[i]->attcacheoff == -1);
442         }
443 #endif
444
445         /*
446          * However, we can easily set the attcacheoff value for the first
447          * attribute: it must be zero.  This eliminates the need for special cases
448          * for attnum=1 that used to exist in fastgetattr() and index_getattr().
449          */
450         if (relation->rd_rel->relnatts > 0)
451                 relation->rd_att->attrs[0]->attcacheoff = 0;
452
453         /*
454          * Set up constraint/default info
455          */
456         if (constr->has_not_null || ndef > 0 || relation->rd_rel->relchecks)
457         {
458                 relation->rd_att->constr = constr;
459
460                 if (ndef > 0)                   /* DEFAULTs */
461                 {
462                         if (ndef < relation->rd_rel->relnatts)
463                                 constr->defval = (AttrDefault *)
464                                         repalloc(attrdef, ndef * sizeof(AttrDefault));
465                         else
466                                 constr->defval = attrdef;
467                         constr->num_defval = ndef;
468                         AttrDefaultFetch(relation);
469                 }
470                 else
471                         constr->num_defval = 0;
472
473                 if (relation->rd_rel->relchecks > 0)    /* CHECKs */
474                 {
475                         constr->num_check = relation->rd_rel->relchecks;
476                         constr->check = (ConstrCheck *)
477                                 MemoryContextAllocZero(CacheMemoryContext,
478                                                                         constr->num_check * sizeof(ConstrCheck));
479                         CheckConstraintFetch(relation);
480                 }
481                 else
482                         constr->num_check = 0;
483         }
484         else
485         {
486                 pfree(constr);
487                 relation->rd_att->constr = NULL;
488         }
489 }
490
491 /*
492  *              RelationBuildRuleLock
493  *
494  *              Form the relation's rewrite rules from information in
495  *              the pg_rewrite system catalog.
496  *
497  * Note: The rule parsetrees are potentially very complex node structures.
498  * To allow these trees to be freed when the relcache entry is flushed,
499  * we make a private memory context to hold the RuleLock information for
500  * each relcache entry that has associated rules.  The context is used
501  * just for rule info, not for any other subsidiary data of the relcache
502  * entry, because that keeps the update logic in RelationClearRelation()
503  * manageable.  The other subsidiary data structures are simple enough
504  * to be easy to free explicitly, anyway.
505  */
506 static void
507 RelationBuildRuleLock(Relation relation)
508 {
509         MemoryContext rulescxt;
510         MemoryContext oldcxt;
511         HeapTuple       rewrite_tuple;
512         Relation        rewrite_desc;
513         TupleDesc       rewrite_tupdesc;
514         SysScanDesc rewrite_scan;
515         ScanKeyData key;
516         RuleLock   *rulelock;
517         int                     numlocks;
518         RewriteRule **rules;
519         int                     maxlocks;
520
521         /*
522          * Make the private context.  Parameters are set on the assumption that
523          * it'll probably not contain much data.
524          */
525         rulescxt = AllocSetContextCreate(CacheMemoryContext,
526                                                                          RelationGetRelationName(relation),
527                                                                          ALLOCSET_SMALL_MINSIZE,
528                                                                          ALLOCSET_SMALL_INITSIZE,
529                                                                          ALLOCSET_SMALL_MAXSIZE);
530         relation->rd_rulescxt = rulescxt;
531
532         /*
533          * allocate an array to hold the rewrite rules (the array is extended if
534          * necessary)
535          */
536         maxlocks = 4;
537         rules = (RewriteRule **)
538                 MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
539         numlocks = 0;
540
541         /*
542          * form a scan key
543          */
544         ScanKeyInit(&key,
545                                 Anum_pg_rewrite_ev_class,
546                                 BTEqualStrategyNumber, F_OIDEQ,
547                                 ObjectIdGetDatum(RelationGetRelid(relation)));
548
549         /*
550          * open pg_rewrite and begin a scan
551          *
552          * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
553          * be reading the rules in name order, except possibly during
554          * emergency-recovery operations (ie, IgnoreSystemIndexes). This in
555          * turn ensures that rules will be fired in name order.
556          */
557         rewrite_desc = heap_open(RewriteRelationId, AccessShareLock);
558         rewrite_tupdesc = RelationGetDescr(rewrite_desc);
559         rewrite_scan = systable_beginscan(rewrite_desc,
560                                                                           RewriteRelRulenameIndexId,
561                                                                           true, SnapshotNow,
562                                                                           1, &key);
563
564         while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
565         {
566                 Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
567                 bool            isnull;
568                 Datum           rule_datum;
569                 text       *rule_text;
570                 char       *rule_str;
571                 RewriteRule *rule;
572
573                 rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
574                                                                                                   sizeof(RewriteRule));
575
576                 rule->ruleId = HeapTupleGetOid(rewrite_tuple);
577
578                 rule->event = rewrite_form->ev_type - '0';
579                 rule->attrno = rewrite_form->ev_attr;
580                 rule->isInstead = rewrite_form->is_instead;
581
582                 /*
583                  * Must use heap_getattr to fetch ev_action and ev_qual.  Also,
584                  * the rule strings are often large enough to be toasted.  To avoid
585                  * leaking memory in the caller's context, do the detoasting here
586                  * so we can free the detoasted version.
587                  */
588                 rule_datum = heap_getattr(rewrite_tuple,
589                                                                   Anum_pg_rewrite_ev_action,
590                                                                   rewrite_tupdesc,
591                                                                   &isnull);
592                 Assert(!isnull);
593                 rule_text = DatumGetTextP(rule_datum);
594                 rule_str = DatumGetCString(DirectFunctionCall1(textout,
595                                                                                                 PointerGetDatum(rule_text)));
596                 oldcxt = MemoryContextSwitchTo(rulescxt);
597                 rule->actions = (List *) stringToNode(rule_str);
598                 MemoryContextSwitchTo(oldcxt);
599                 pfree(rule_str);
600                 if ((Pointer) rule_text != DatumGetPointer(rule_datum))
601                         pfree(rule_text);
602
603                 rule_datum = heap_getattr(rewrite_tuple,
604                                                                   Anum_pg_rewrite_ev_qual,
605                                                                   rewrite_tupdesc,
606                                                                   &isnull);
607                 Assert(!isnull);
608                 rule_text = DatumGetTextP(rule_datum);
609                 rule_str = DatumGetCString(DirectFunctionCall1(textout,
610                                                                                                 PointerGetDatum(rule_text)));
611                 oldcxt = MemoryContextSwitchTo(rulescxt);
612                 rule->qual = (Node *) stringToNode(rule_str);
613                 MemoryContextSwitchTo(oldcxt);
614                 pfree(rule_str);
615                 if ((Pointer) rule_text != DatumGetPointer(rule_datum))
616                         pfree(rule_text);
617
618                 if (numlocks >= maxlocks)
619                 {
620                         maxlocks *= 2;
621                         rules = (RewriteRule **)
622                                 repalloc(rules, sizeof(RewriteRule *) * maxlocks);
623                 }
624                 rules[numlocks++] = rule;
625         }
626
627         /*
628          * end the scan and close the attribute relation
629          */
630         systable_endscan(rewrite_scan);
631         heap_close(rewrite_desc, AccessShareLock);
632
633         /*
634          * form a RuleLock and insert into relation
635          */
636         rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
637         rulelock->numLocks = numlocks;
638         rulelock->rules = rules;
639
640         relation->rd_rules = rulelock;
641 }
642
643 /*
644  *              equalRuleLocks
645  *
646  *              Determine whether two RuleLocks are equivalent
647  *
648  *              Probably this should be in the rules code someplace...
649  */
650 static bool
651 equalRuleLocks(RuleLock *rlock1, RuleLock *rlock2)
652 {
653         int                     i;
654
655         /*
656          * As of 7.3 we assume the rule ordering is repeatable, because
657          * RelationBuildRuleLock should read 'em in a consistent order.  So just
658          * compare corresponding slots.
659          */
660         if (rlock1 != NULL)
661         {
662                 if (rlock2 == NULL)
663                         return false;
664                 if (rlock1->numLocks != rlock2->numLocks)
665                         return false;
666                 for (i = 0; i < rlock1->numLocks; i++)
667                 {
668                         RewriteRule *rule1 = rlock1->rules[i];
669                         RewriteRule *rule2 = rlock2->rules[i];
670
671                         if (rule1->ruleId != rule2->ruleId)
672                                 return false;
673                         if (rule1->event != rule2->event)
674                                 return false;
675                         if (rule1->attrno != rule2->attrno)
676                                 return false;
677                         if (rule1->isInstead != rule2->isInstead)
678                                 return false;
679                         if (!equal(rule1->qual, rule2->qual))
680                                 return false;
681                         if (!equal(rule1->actions, rule2->actions))
682                                 return false;
683                 }
684         }
685         else if (rlock2 != NULL)
686                 return false;
687         return true;
688 }
689
690
691 /* ----------------------------------
692  *              RelationBuildDesc
693  *
694  *              Build a relation descriptor --- either a new one, or by
695  *              recycling the given old relation object.  The latter case
696  *              supports rebuilding a relcache entry without invalidating
697  *              pointers to it.
698  * --------------------------------
699  */
700 static Relation
701 RelationBuildDesc(Oid targetRelId, Relation oldrelation)
702 {
703         Relation        relation;
704         Oid                     relid;
705         HeapTuple       pg_class_tuple;
706         Form_pg_class relp;
707         MemoryContext oldcxt;
708
709         /*
710          * find the tuple in pg_class corresponding to the given relation id
711          */
712         pg_class_tuple = ScanPgRelation(targetRelId, true);
713
714         /*
715          * if no such tuple exists, return NULL
716          */
717         if (!HeapTupleIsValid(pg_class_tuple))
718                 return NULL;
719
720         /*
721          * get information from the pg_class_tuple
722          */
723         relid = HeapTupleGetOid(pg_class_tuple);
724         relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
725
726         /*
727          * allocate storage for the relation descriptor, and copy pg_class_tuple
728          * to relation->rd_rel.
729          */
730         relation = AllocateRelationDesc(oldrelation, relp);
731
732         /*
733          * now we can free the memory allocated for pg_class_tuple
734          */
735         heap_freetuple(pg_class_tuple);
736
737         /*
738          * initialize the relation's relation id (relation->rd_id)
739          */
740         RelationGetRelid(relation) = relid;
741
742         /*
743          * normal relations are not nailed into the cache; nor can a pre-existing
744          * relation be new.  It could be temp though.  (Actually, it could be new
745          * too, but it's okay to forget that fact if forced to flush the entry.)
746          */
747         relation->rd_refcnt = 0;
748         relation->rd_isnailed = false;
749         relation->rd_createSubid = InvalidSubTransactionId;
750         relation->rd_istemp = isTempNamespace(relation->rd_rel->relnamespace);
751
752         /*
753          * initialize the tuple descriptor (relation->rd_att).
754          */
755         RelationBuildTupleDesc(relation);
756
757         /*
758          * Fetch rules and triggers that affect this relation
759          */
760         if (relation->rd_rel->relhasrules)
761                 RelationBuildRuleLock(relation);
762         else
763         {
764                 relation->rd_rules = NULL;
765                 relation->rd_rulescxt = NULL;
766         }
767
768         if (relation->rd_rel->reltriggers > 0)
769                 RelationBuildTriggers(relation);
770         else
771                 relation->trigdesc = NULL;
772
773         /*
774          * if it's an index, initialize index-related information
775          */
776         if (OidIsValid(relation->rd_rel->relam))
777                 RelationInitIndexAccessInfo(relation);
778
779         /*
780          * initialize the relation lock manager information
781          */
782         RelationInitLockInfo(relation);         /* see lmgr.c */
783
784         /*
785          * initialize physical addressing information for the relation
786          */
787         RelationInitPhysicalAddr(relation);
788
789         /* make sure relation is marked as having no open file yet */
790         relation->rd_smgr = NULL;
791
792         /*
793          * Insert newly created relation into relcache hash tables.
794          */
795         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
796         RelationCacheInsert(relation);
797         MemoryContextSwitchTo(oldcxt);
798
799         /* It's fully valid */
800         relation->rd_isvalid = true;
801
802         return relation;
803 }
804
805 /*
806  * Initialize the physical addressing info (RelFileNode) for a relcache entry
807  */
808 static void
809 RelationInitPhysicalAddr(Relation relation)
810 {
811         if (relation->rd_rel->reltablespace)
812                 relation->rd_node.spcNode = relation->rd_rel->reltablespace;
813         else
814                 relation->rd_node.spcNode = MyDatabaseTableSpace;
815         if (relation->rd_rel->relisshared)
816                 relation->rd_node.dbNode = InvalidOid;
817         else
818                 relation->rd_node.dbNode = MyDatabaseId;
819         relation->rd_node.relNode = relation->rd_rel->relfilenode;
820 }
821
822 /*
823  * Initialize index-access-method support data for an index relation
824  */
825 void
826 RelationInitIndexAccessInfo(Relation relation)
827 {
828         HeapTuple       tuple;
829         Form_pg_am      aform;
830         Datum           indclassDatum;
831         bool            isnull;
832         MemoryContext indexcxt;
833         MemoryContext oldcontext;
834         Oid                *operator;
835         RegProcedure *support;
836         FmgrInfo   *supportinfo;
837         int                     natts;
838         uint16          amstrategies;
839         uint16          amsupport;
840
841         /*
842          * Make a copy of the pg_index entry for the index.  Since pg_index
843          * contains variable-length and possibly-null fields, we have to do this
844          * honestly rather than just treating it as a Form_pg_index struct.
845          */
846         tuple = SearchSysCache(INDEXRELID,
847                                                    ObjectIdGetDatum(RelationGetRelid(relation)),
848                                                    0, 0, 0);
849         if (!HeapTupleIsValid(tuple))
850                 elog(ERROR, "cache lookup failed for index %u",
851                          RelationGetRelid(relation));
852         oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
853         relation->rd_indextuple = heap_copytuple(tuple);
854         relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
855         MemoryContextSwitchTo(oldcontext);
856         ReleaseSysCache(tuple);
857
858         /*
859          * indclass cannot be referenced directly through the C struct, because it
860          * is after the variable-width indkey field.  Therefore we extract the
861          * datum the hard way and provide a direct link in the relcache.
862          */
863         indclassDatum = fastgetattr(relation->rd_indextuple,
864                                                                 Anum_pg_index_indclass,
865                                                                 GetPgIndexDescriptor(),
866                                                                 &isnull);
867         Assert(!isnull);
868         relation->rd_indclass = (oidvector *) DatumGetPointer(indclassDatum);
869
870         /*
871          * Make a copy of the pg_am entry for the index's access method
872          */
873         tuple = SearchSysCache(AMOID,
874                                                    ObjectIdGetDatum(relation->rd_rel->relam),
875                                                    0, 0, 0);
876         if (!HeapTupleIsValid(tuple))
877                 elog(ERROR, "cache lookup failed for access method %u",
878                          relation->rd_rel->relam);
879         aform = (Form_pg_am) MemoryContextAlloc(CacheMemoryContext, sizeof *aform);
880         memcpy(aform, GETSTRUCT(tuple), sizeof *aform);
881         ReleaseSysCache(tuple);
882         relation->rd_am = aform;
883
884         natts = relation->rd_rel->relnatts;
885         if (natts != relation->rd_index->indnatts)
886                 elog(ERROR, "relnatts disagrees with indnatts for index %u",
887                          RelationGetRelid(relation));
888         amstrategies = aform->amstrategies;
889         amsupport = aform->amsupport;
890
891         /*
892          * Make the private context to hold index access info.  The reason we need
893          * a context, and not just a couple of pallocs, is so that we won't leak
894          * any subsidiary info attached to fmgr lookup records.
895          *
896          * Context parameters are set on the assumption that it'll probably not
897          * contain much data.
898          */
899         indexcxt = AllocSetContextCreate(CacheMemoryContext,
900                                                                          RelationGetRelationName(relation),
901                                                                          ALLOCSET_SMALL_MINSIZE,
902                                                                          ALLOCSET_SMALL_INITSIZE,
903                                                                          ALLOCSET_SMALL_MAXSIZE);
904         relation->rd_indexcxt = indexcxt;
905
906         /*
907          * Allocate arrays to hold data
908          */
909         relation->rd_aminfo = (RelationAmInfo *)
910                 MemoryContextAllocZero(indexcxt, sizeof(RelationAmInfo));
911
912         if (amstrategies > 0)
913                 operator = (Oid *)
914                         MemoryContextAllocZero(indexcxt,
915                                                                    natts * amstrategies * sizeof(Oid));
916         else
917                 operator = NULL;
918
919         if (amsupport > 0)
920         {
921                 int                     nsupport = natts * amsupport;
922
923                 support = (RegProcedure *)
924                         MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
925                 supportinfo = (FmgrInfo *)
926                         MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
927         }
928         else
929         {
930                 support = NULL;
931                 supportinfo = NULL;
932         }
933
934         relation->rd_operator = operator;
935         relation->rd_support = support;
936         relation->rd_supportinfo = supportinfo;
937
938         /*
939          * Fill the operator and support procedure OID arrays.  (aminfo and
940          * supportinfo are left as zeroes, and are filled on-the-fly when used)
941          */
942         IndexSupportInitialize(relation->rd_indclass,
943                                                    operator, support,
944                                                    amstrategies, amsupport, natts);
945
946         /*
947          * expressions and predicate cache will be filled later
948          */
949         relation->rd_indexprs = NIL;
950         relation->rd_indpred = NIL;
951         relation->rd_amcache = NULL;
952 }
953
954 /*
955  * IndexSupportInitialize
956  *              Initializes an index's cached opclass information,
957  *              given the index's pg_index.indclass entry.
958  *
959  * Data is returned into *indexOperator and *indexSupport, which are arrays
960  * allocated by the caller.
961  *
962  * The caller also passes maxStrategyNumber, maxSupportNumber, and
963  * maxAttributeNumber, since these indicate the size of the arrays
964  * it has allocated --- but in practice these numbers must always match
965  * those obtainable from the system catalog entries for the index and
966  * access method.
967  */
968 static void
969 IndexSupportInitialize(oidvector *indclass,
970                                            Oid *indexOperator,
971                                            RegProcedure *indexSupport,
972                                            StrategyNumber maxStrategyNumber,
973                                            StrategyNumber maxSupportNumber,
974                                            AttrNumber maxAttributeNumber)
975 {
976         int                     attIndex;
977
978         for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
979         {
980                 OpClassCacheEnt *opcentry;
981
982                 if (!OidIsValid(indclass->values[attIndex]))
983                         elog(ERROR, "bogus pg_index tuple");
984
985                 /* look up the info for this opclass, using a cache */
986                 opcentry = LookupOpclassInfo(indclass->values[attIndex],
987                                                                          maxStrategyNumber,
988                                                                          maxSupportNumber);
989
990                 /* copy cached data into relcache entry */
991                 if (maxStrategyNumber > 0)
992                         memcpy(&indexOperator[attIndex * maxStrategyNumber],
993                                    opcentry->operatorOids,
994                                    maxStrategyNumber * sizeof(Oid));
995                 if (maxSupportNumber > 0)
996                         memcpy(&indexSupport[attIndex * maxSupportNumber],
997                                    opcentry->supportProcs,
998                                    maxSupportNumber * sizeof(RegProcedure));
999         }
1000 }
1001
1002 /*
1003  * LookupOpclassInfo
1004  *
1005  * This routine maintains a per-opclass cache of the information needed
1006  * by IndexSupportInitialize().  This is more efficient than relying on
1007  * the catalog cache, because we can load all the info about a particular
1008  * opclass in a single indexscan of pg_amproc or pg_amop.
1009  *
1010  * The information from pg_am about expected range of strategy and support
1011  * numbers is passed in, rather than being looked up, mainly because the
1012  * caller will have it already.
1013  *
1014  * XXX There isn't any provision for flushing the cache.  However, there
1015  * isn't any provision for flushing relcache entries when opclass info
1016  * changes, either :-(
1017  */
1018 static OpClassCacheEnt *
1019 LookupOpclassInfo(Oid operatorClassOid,
1020                                   StrategyNumber numStrats,
1021                                   StrategyNumber numSupport)
1022 {
1023         OpClassCacheEnt *opcentry;
1024         bool            found;
1025         Relation        rel;
1026         SysScanDesc scan;
1027         ScanKeyData skey[2];
1028         HeapTuple       htup;
1029         bool            indexOK;
1030
1031         if (OpClassCache == NULL)
1032         {
1033                 /* First time through: initialize the opclass cache */
1034                 HASHCTL         ctl;
1035
1036                 if (!CacheMemoryContext)
1037                         CreateCacheMemoryContext();
1038
1039                 MemSet(&ctl, 0, sizeof(ctl));
1040                 ctl.keysize = sizeof(Oid);
1041                 ctl.entrysize = sizeof(OpClassCacheEnt);
1042                 ctl.hash = oid_hash;
1043                 OpClassCache = hash_create("Operator class cache", 64,
1044                                                                    &ctl, HASH_ELEM | HASH_FUNCTION);
1045         }
1046
1047         opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1048                                                                                            (void *) &operatorClassOid,
1049                                                                                            HASH_ENTER, &found);
1050
1051         if (found && opcentry->valid)
1052         {
1053                 /* Already made an entry for it */
1054                 Assert(numStrats == opcentry->numStrats);
1055                 Assert(numSupport == opcentry->numSupport);
1056                 return opcentry;
1057         }
1058
1059         /* Need to fill in new entry */
1060         opcentry->valid = false;        /* until known OK */
1061         opcentry->numStrats = numStrats;
1062         opcentry->numSupport = numSupport;
1063
1064         if (numStrats > 0)
1065                 opcentry->operatorOids = (Oid *)
1066                         MemoryContextAllocZero(CacheMemoryContext,
1067                                                                    numStrats * sizeof(Oid));
1068         else
1069                 opcentry->operatorOids = NULL;
1070
1071         if (numSupport > 0)
1072                 opcentry->supportProcs = (RegProcedure *)
1073                         MemoryContextAllocZero(CacheMemoryContext,
1074                                                                    numSupport * sizeof(RegProcedure));
1075         else
1076                 opcentry->supportProcs = NULL;
1077
1078         /*
1079          * To avoid infinite recursion during startup, force heap scans if we're
1080          * looking up info for the opclasses used by the indexes we would like to
1081          * reference here.
1082          */
1083         indexOK = criticalRelcachesBuilt ||
1084                 (operatorClassOid != OID_BTREE_OPS_OID &&
1085                  operatorClassOid != INT2_BTREE_OPS_OID);
1086
1087         /*
1088          * Scan pg_amop to obtain operators for the opclass.  We only fetch the
1089          * default ones (those with subtype zero).
1090          */
1091         if (numStrats > 0)
1092         {
1093                 ScanKeyInit(&skey[0],
1094                                         Anum_pg_amop_amopclaid,
1095                                         BTEqualStrategyNumber, F_OIDEQ,
1096                                         ObjectIdGetDatum(operatorClassOid));
1097                 ScanKeyInit(&skey[1],
1098                                         Anum_pg_amop_amopsubtype,
1099                                         BTEqualStrategyNumber, F_OIDEQ,
1100                                         ObjectIdGetDatum(InvalidOid));
1101                 rel = heap_open(AccessMethodOperatorRelationId, AccessShareLock);
1102                 scan = systable_beginscan(rel, AccessMethodStrategyIndexId, indexOK,
1103                                                                   SnapshotNow, 2, skey);
1104
1105                 while (HeapTupleIsValid(htup = systable_getnext(scan)))
1106                 {
1107                         Form_pg_amop amopform = (Form_pg_amop) GETSTRUCT(htup);
1108
1109                         if (amopform->amopstrategy <= 0 ||
1110                                 (StrategyNumber) amopform->amopstrategy > numStrats)
1111                                 elog(ERROR, "invalid amopstrategy number %d for opclass %u",
1112                                          amopform->amopstrategy, operatorClassOid);
1113                         opcentry->operatorOids[amopform->amopstrategy - 1] =
1114                                 amopform->amopopr;
1115                 }
1116
1117                 systable_endscan(scan);
1118                 heap_close(rel, AccessShareLock);
1119         }
1120
1121         /*
1122          * Scan pg_amproc to obtain support procs for the opclass.      We only fetch
1123          * the default ones (those with subtype zero).
1124          */
1125         if (numSupport > 0)
1126         {
1127                 ScanKeyInit(&skey[0],
1128                                         Anum_pg_amproc_amopclaid,
1129                                         BTEqualStrategyNumber, F_OIDEQ,
1130                                         ObjectIdGetDatum(operatorClassOid));
1131                 ScanKeyInit(&skey[1],
1132                                         Anum_pg_amproc_amprocsubtype,
1133                                         BTEqualStrategyNumber, F_OIDEQ,
1134                                         ObjectIdGetDatum(InvalidOid));
1135                 rel = heap_open(AccessMethodProcedureRelationId, AccessShareLock);
1136                 scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1137                                                                   SnapshotNow, 2, skey);
1138
1139                 while (HeapTupleIsValid(htup = systable_getnext(scan)))
1140                 {
1141                         Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1142
1143                         if (amprocform->amprocnum <= 0 ||
1144                                 (StrategyNumber) amprocform->amprocnum > numSupport)
1145                                 elog(ERROR, "invalid amproc number %d for opclass %u",
1146                                          amprocform->amprocnum, operatorClassOid);
1147
1148                         opcentry->supportProcs[amprocform->amprocnum - 1] =
1149                                 amprocform->amproc;
1150                 }
1151
1152                 systable_endscan(scan);
1153                 heap_close(rel, AccessShareLock);
1154         }
1155
1156         opcentry->valid = true;
1157         return opcentry;
1158 }
1159
1160
1161 /*
1162  *              formrdesc
1163  *
1164  *              This is a special cut-down version of RelationBuildDesc()
1165  *              used by RelationCacheInitialize() in initializing the relcache.
1166  *              The relation descriptor is built just from the supplied parameters,
1167  *              without actually looking at any system table entries.  We cheat
1168  *              quite a lot since we only need to work for a few basic system
1169  *              catalogs.
1170  *
1171  * formrdesc is currently used for: pg_class, pg_attribute, pg_proc,
1172  * and pg_type (see RelationCacheInitialize).
1173  *
1174  * Note that these catalogs can't have constraints (except attnotnull),
1175  * default values, rules, or triggers, since we don't cope with any of that.
1176  *
1177  * NOTE: we assume we are already switched into CacheMemoryContext.
1178  */
1179 static void
1180 formrdesc(const char *relationName, Oid relationReltype,
1181                   bool hasoids, int natts, FormData_pg_attribute *att)
1182 {
1183         Relation        relation;
1184         int                     i;
1185         bool            has_not_null;
1186
1187         /*
1188          * allocate new relation desc, clear all fields of reldesc
1189          */
1190         relation = (Relation) palloc0(sizeof(RelationData));
1191         relation->rd_targblock = InvalidBlockNumber;
1192
1193         /* make sure relation is marked as having no open file yet */
1194         relation->rd_smgr = NULL;
1195
1196         /*
1197          * initialize reference count: 1 because it is nailed in cache
1198          */
1199         relation->rd_refcnt = 1;
1200
1201         /*
1202          * all entries built with this routine are nailed-in-cache; none are for
1203          * new or temp relations.
1204          */
1205         relation->rd_isnailed = true;
1206         relation->rd_createSubid = InvalidSubTransactionId;
1207         relation->rd_istemp = false;
1208
1209         /*
1210          * initialize relation tuple form
1211          *
1212          * The data we insert here is pretty incomplete/bogus, but it'll serve to
1213          * get us launched.  RelationCacheInitializePhase2() will read the real
1214          * data from pg_class and replace what we've done here.
1215          */
1216         relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
1217
1218         namestrcpy(&relation->rd_rel->relname, relationName);
1219         relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1220         relation->rd_rel->reltype = relationReltype;
1221
1222         /*
1223          * It's important to distinguish between shared and non-shared relations,
1224          * even at bootstrap time, to make sure we know where they are stored.  At
1225          * present, all relations that formrdesc is used for are not shared.
1226          */
1227         relation->rd_rel->relisshared = false;
1228
1229         relation->rd_rel->relpages = 1;
1230         relation->rd_rel->reltuples = 1;
1231         relation->rd_rel->relkind = RELKIND_RELATION;
1232         relation->rd_rel->relhasoids = hasoids;
1233         relation->rd_rel->relnatts = (int16) natts;
1234
1235         /*
1236          * initialize attribute tuple form
1237          *
1238          * Unlike the case with the relation tuple, this data had better be right
1239          * because it will never be replaced.  The input values must be correctly
1240          * defined by macros in src/include/catalog/ headers.
1241          */
1242         relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
1243         relation->rd_att->tdtypeid = relationReltype;
1244         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
1245
1246         /*
1247          * initialize tuple desc info
1248          */
1249         has_not_null = false;
1250         for (i = 0; i < natts; i++)
1251         {
1252                 memcpy(relation->rd_att->attrs[i],
1253                            &att[i],
1254                            ATTRIBUTE_TUPLE_SIZE);
1255                 has_not_null |= att[i].attnotnull;
1256                 /* make sure attcacheoff is valid */
1257                 relation->rd_att->attrs[i]->attcacheoff = -1;
1258         }
1259
1260         /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1261         relation->rd_att->attrs[0]->attcacheoff = 0;
1262
1263         /* mark not-null status */
1264         if (has_not_null)
1265         {
1266                 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1267
1268                 constr->has_not_null = true;
1269                 relation->rd_att->constr = constr;
1270         }
1271
1272         /*
1273          * initialize relation id from info in att array (my, this is ugly)
1274          */
1275         RelationGetRelid(relation) = relation->rd_att->attrs[0]->attrelid;
1276         relation->rd_rel->relfilenode = RelationGetRelid(relation);
1277
1278         /*
1279          * initialize the relation lock manager information
1280          */
1281         RelationInitLockInfo(relation);         /* see lmgr.c */
1282
1283         /*
1284          * initialize physical addressing information for the relation
1285          */
1286         RelationInitPhysicalAddr(relation);
1287
1288         /*
1289          * initialize the rel-has-index flag, using hardwired knowledge
1290          */
1291         if (IsBootstrapProcessingMode())
1292         {
1293                 /* In bootstrap mode, we have no indexes */
1294                 relation->rd_rel->relhasindex = false;
1295         }
1296         else
1297         {
1298                 /* Otherwise, all the rels formrdesc is used for have indexes */
1299                 relation->rd_rel->relhasindex = true;
1300         }
1301
1302         /*
1303          * add new reldesc to relcache
1304          */
1305         RelationCacheInsert(relation);
1306
1307         /* It's fully valid */
1308         relation->rd_isvalid = true;
1309 }
1310
1311
1312 /* ----------------------------------------------------------------
1313  *                               Relation Descriptor Lookup Interface
1314  * ----------------------------------------------------------------
1315  */
1316
1317 /*
1318  *              RelationIdCacheGetRelation
1319  *
1320  *              Lookup an existing reldesc by OID.
1321  *
1322  *              Only try to get the reldesc by looking in the cache,
1323  *              do not go to the disk if it's not present.
1324  *
1325  *              NB: relation ref count is incremented if successful.
1326  *              Caller should eventually decrement count.  (Usually,
1327  *              that happens by calling RelationClose().)
1328  */
1329 Relation
1330 RelationIdCacheGetRelation(Oid relationId)
1331 {
1332         Relation        rd;
1333
1334         RelationIdCacheLookup(relationId, rd);
1335
1336         if (RelationIsValid(rd))
1337         {
1338                 RelationIncrementReferenceCount(rd);
1339                 /* revalidate nailed index if necessary */
1340                 if (!rd->rd_isvalid)
1341                         RelationReloadClassinfo(rd);
1342         }
1343
1344         return rd;
1345 }
1346
1347 /*
1348  *              RelationIdGetRelation
1349  *
1350  *              Lookup a reldesc by OID; make one if not already in cache.
1351  *
1352  *              NB: relation ref count is incremented, or set to 1 if new entry.
1353  *              Caller should eventually decrement count.  (Usually,
1354  *              that happens by calling RelationClose().)
1355  */
1356 Relation
1357 RelationIdGetRelation(Oid relationId)
1358 {
1359         Relation        rd;
1360
1361         /*
1362          * first try and get a reldesc from the cache
1363          */
1364         rd = RelationIdCacheGetRelation(relationId);
1365         if (RelationIsValid(rd))
1366                 return rd;
1367
1368         /*
1369          * no reldesc in the cache, so have RelationBuildDesc() build one and add
1370          * it.
1371          */
1372         rd = RelationBuildDesc(relationId, NULL);
1373         if (RelationIsValid(rd))
1374                 RelationIncrementReferenceCount(rd);
1375         return rd;
1376 }
1377
1378 /* ----------------------------------------------------------------
1379  *                              cache invalidation support routines
1380  * ----------------------------------------------------------------
1381  */
1382
1383 /*
1384  * RelationIncrementReferenceCount
1385  *              Increments relation reference count.
1386  *
1387  * Note: bootstrap mode has its own weird ideas about relation refcount
1388  * behavior; we ought to fix it someday, but for now, just disable
1389  * reference count ownership tracking in bootstrap mode.
1390  */
1391 void
1392 RelationIncrementReferenceCount(Relation rel)
1393 {
1394         ResourceOwnerEnlargeRelationRefs(CurrentResourceOwner);
1395         rel->rd_refcnt += 1;
1396         if (!IsBootstrapProcessingMode())
1397                 ResourceOwnerRememberRelationRef(CurrentResourceOwner, rel);
1398 }
1399
1400 /*
1401  * RelationDecrementReferenceCount
1402  *              Decrements relation reference count.
1403  */
1404 void
1405 RelationDecrementReferenceCount(Relation rel)
1406 {
1407         Assert(rel->rd_refcnt > 0);
1408         rel->rd_refcnt -= 1;
1409         if (!IsBootstrapProcessingMode())
1410                 ResourceOwnerForgetRelationRef(CurrentResourceOwner, rel);
1411 }
1412
1413 /*
1414  * RelationClose - close an open relation
1415  *
1416  *      Actually, we just decrement the refcount.
1417  *
1418  *      NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
1419  *      will be freed as soon as their refcount goes to zero.  In combination
1420  *      with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
1421  *      to catch references to already-released relcache entries.  It slows
1422  *      things down quite a bit, however.
1423  */
1424 void
1425 RelationClose(Relation relation)
1426 {
1427         /* Note: no locking manipulations needed */
1428         RelationDecrementReferenceCount(relation);
1429
1430 #ifdef RELCACHE_FORCE_RELEASE
1431         if (RelationHasReferenceCountZero(relation) &&
1432                 relation->rd_createSubid == InvalidSubTransactionId)
1433                 RelationClearRelation(relation, false);
1434 #endif
1435 }
1436
1437 /*
1438  * RelationReloadClassinfo - reload the pg_class row (only)
1439  *
1440  *      This function is used only for indexes.  We currently allow only the
1441  *      pg_class row of an existing index to change (to support changes of
1442  *      owner, tablespace, or relfilenode), not its pg_index row or other
1443  *      subsidiary index schema information.  Therefore it's sufficient to do
1444  *      this when we get an SI invalidation.  Furthermore, there are cases
1445  *      where it's necessary not to throw away the index information, especially
1446  *      for "nailed" indexes which we are unable to rebuild on-the-fly.
1447  *
1448  *      We can't necessarily reread the pg_class row right away; we might be
1449  *      in a failed transaction when we receive the SI notification.  If so,
1450  *      RelationClearRelation just marks the entry as invalid by setting
1451  *      rd_isvalid to false.  This routine is called to fix the entry when it
1452  *      is next needed.
1453  */
1454 static void
1455 RelationReloadClassinfo(Relation relation)
1456 {
1457         bool            indexOK;
1458         HeapTuple       pg_class_tuple;
1459         Form_pg_class relp;
1460
1461         /* Should be called only for invalidated indexes */
1462         Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
1463                    !relation->rd_isvalid);
1464         /* Should be closed at smgr level */
1465         Assert(relation->rd_smgr == NULL);
1466
1467         /*
1468          * Read the pg_class row
1469          *
1470          * Don't try to use an indexscan of pg_class_oid_index to reload the info
1471          * for pg_class_oid_index ...
1472          */
1473         indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
1474         pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK);
1475         if (!HeapTupleIsValid(pg_class_tuple))
1476                 elog(ERROR, "could not find pg_class tuple for index %u",
1477                          RelationGetRelid(relation));
1478         relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1479         memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
1480         heap_freetuple(pg_class_tuple);
1481         /* We must recalculate physical address in case it changed */
1482         RelationInitPhysicalAddr(relation);
1483         /* Make sure targblock is reset in case rel was truncated */
1484         relation->rd_targblock = InvalidBlockNumber;
1485         /* Must free any AM cached data, too */
1486         if (relation->rd_amcache)
1487                 pfree(relation->rd_amcache);
1488         relation->rd_amcache = NULL;
1489         /* Okay, now it's valid again */
1490         relation->rd_isvalid = true;
1491 }
1492
1493 /*
1494  * RelationClearRelation
1495  *
1496  *       Physically blow away a relation cache entry, or reset it and rebuild
1497  *       it from scratch (that is, from catalog entries).  The latter path is
1498  *       usually used when we are notified of a change to an open relation
1499  *       (one with refcount > 0).  However, this routine just does whichever
1500  *       it's told to do; callers must determine which they want.
1501  */
1502 static void
1503 RelationClearRelation(Relation relation, bool rebuild)
1504 {
1505         Oid                     old_reltype = relation->rd_rel->reltype;
1506         MemoryContext oldcxt;
1507
1508         /*
1509          * Make sure smgr and lower levels close the relation's files, if they
1510          * weren't closed already.  If the relation is not getting deleted, the
1511          * next smgr access should reopen the files automatically.      This ensures
1512          * that the low-level file access state is updated after, say, a vacuum
1513          * truncation.
1514          */
1515         RelationCloseSmgr(relation);
1516
1517         /*
1518          * Never, never ever blow away a nailed-in system relation, because we'd
1519          * be unable to recover.  However, we must reset rd_targblock, in case we
1520          * got called because of a relation cache flush that was triggered by
1521          * VACUUM.
1522          *
1523          * If it's a nailed index, then we need to re-read the pg_class row to see
1524          * if its relfilenode changed.  We can't necessarily do that here, because
1525          * we might be in a failed transaction.  We assume it's okay to do it if
1526          * there are open references to the relcache entry (cf notes for
1527          * AtEOXact_RelationCache).  Otherwise just mark the entry as possibly
1528          * invalid, and it'll be fixed when next opened.
1529          */
1530         if (relation->rd_isnailed)
1531         {
1532                 relation->rd_targblock = InvalidBlockNumber;
1533                 if (relation->rd_rel->relkind == RELKIND_INDEX)
1534                 {
1535                         relation->rd_isvalid = false;           /* needs to be revalidated */
1536                         if (relation->rd_refcnt > 1)
1537                                 RelationReloadClassinfo(relation);
1538                 }
1539                 return;
1540         }
1541
1542         /*
1543          * Even non-system indexes should not be blown away if they are open and
1544          * have valid index support information.  This avoids problems with active
1545          * use of the index support information.  As with nailed indexes, we
1546          * re-read the pg_class row to handle possible physical relocation of
1547          * the index.
1548          */
1549         if (relation->rd_rel->relkind == RELKIND_INDEX &&
1550                 relation->rd_refcnt > 0 &&
1551                 relation->rd_indexcxt != NULL)
1552         {
1553                 relation->rd_isvalid = false;                   /* needs to be revalidated */
1554                 RelationReloadClassinfo(relation);
1555                 return;
1556         }
1557
1558         /*
1559          * Remove relation from hash tables
1560          *
1561          * Note: we might be reinserting it momentarily, but we must not have it
1562          * visible in the hash tables until it's valid again, so don't try to
1563          * optimize this away...
1564          */
1565         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
1566         RelationCacheDelete(relation);
1567         MemoryContextSwitchTo(oldcxt);
1568
1569         /* Clear out catcache's entries for this relation */
1570         CatalogCacheFlushRelation(RelationGetRelid(relation));
1571
1572         /*
1573          * Free all the subsidiary data structures of the relcache entry. We
1574          * cannot free rd_att if we are trying to rebuild the entry, however,
1575          * because pointers to it may be cached in various places. The rule
1576          * manager might also have pointers into the rewrite rules. So to begin
1577          * with, we can only get rid of these fields:
1578          */
1579         FreeTriggerDesc(relation->trigdesc);
1580         if (relation->rd_indextuple)
1581                 pfree(relation->rd_indextuple);
1582         if (relation->rd_am)
1583                 pfree(relation->rd_am);
1584         if (relation->rd_rel)
1585                 pfree(relation->rd_rel);
1586         list_free(relation->rd_indexlist);
1587         if (relation->rd_indexcxt)
1588                 MemoryContextDelete(relation->rd_indexcxt);
1589
1590         /*
1591          * If we're really done with the relcache entry, blow it away. But if
1592          * someone is still using it, reconstruct the whole deal without moving
1593          * the physical RelationData record (so that the someone's pointer is
1594          * still valid).
1595          */
1596         if (!rebuild)
1597         {
1598                 /* ok to zap remaining substructure */
1599                 flush_rowtype_cache(old_reltype);
1600                 FreeTupleDesc(relation->rd_att);
1601                 if (relation->rd_rulescxt)
1602                         MemoryContextDelete(relation->rd_rulescxt);
1603                 pfree(relation);
1604         }
1605         else
1606         {
1607                 /*
1608                  * When rebuilding an open relcache entry, must preserve ref count and
1609                  * rd_createSubid state.  Also attempt to preserve the tupledesc and
1610                  * rewrite-rule substructures in place.
1611                  *
1612                  * Note that this process does not touch CurrentResourceOwner; which
1613                  * is good because whatever ref counts the entry may have do not
1614                  * necessarily belong to that resource owner.
1615                  */
1616                 Oid                     save_relid = RelationGetRelid(relation);
1617                 int                     old_refcnt = relation->rd_refcnt;
1618                 SubTransactionId old_createSubid = relation->rd_createSubid;
1619                 TupleDesc       old_att = relation->rd_att;
1620                 RuleLock   *old_rules = relation->rd_rules;
1621                 MemoryContext old_rulescxt = relation->rd_rulescxt;
1622
1623                 if (RelationBuildDesc(save_relid, relation) != relation)
1624                 {
1625                         /* Should only get here if relation was deleted */
1626                         flush_rowtype_cache(old_reltype);
1627                         FreeTupleDesc(old_att);
1628                         if (old_rulescxt)
1629                                 MemoryContextDelete(old_rulescxt);
1630                         pfree(relation);
1631                         elog(ERROR, "relation %u deleted while still in use", save_relid);
1632                 }
1633                 relation->rd_refcnt = old_refcnt;
1634                 relation->rd_createSubid = old_createSubid;
1635                 if (equalTupleDescs(old_att, relation->rd_att))
1636                 {
1637                         /* needn't flush typcache here */
1638                         FreeTupleDesc(relation->rd_att);
1639                         relation->rd_att = old_att;
1640                 }
1641                 else
1642                 {
1643                         flush_rowtype_cache(old_reltype);
1644                         FreeTupleDesc(old_att);
1645                 }
1646                 if (equalRuleLocks(old_rules, relation->rd_rules))
1647                 {
1648                         if (relation->rd_rulescxt)
1649                                 MemoryContextDelete(relation->rd_rulescxt);
1650                         relation->rd_rules = old_rules;
1651                         relation->rd_rulescxt = old_rulescxt;
1652                 }
1653                 else
1654                 {
1655                         if (old_rulescxt)
1656                                 MemoryContextDelete(old_rulescxt);
1657                 }
1658         }
1659 }
1660
1661 /*
1662  * RelationFlushRelation
1663  *
1664  *       Rebuild the relation if it is open (refcount > 0), else blow it away.
1665  */
1666 static void
1667 RelationFlushRelation(Relation relation)
1668 {
1669         bool            rebuild;
1670
1671         if (relation->rd_createSubid != InvalidSubTransactionId)
1672         {
1673                 /*
1674                  * New relcache entries are always rebuilt, not flushed; else we'd
1675                  * forget the "new" status of the relation, which is a useful
1676                  * optimization to have.
1677                  */
1678                 rebuild = true;
1679         }
1680         else
1681         {
1682                 /*
1683                  * Pre-existing rels can be dropped from the relcache if not open.
1684                  */
1685                 rebuild = !RelationHasReferenceCountZero(relation);
1686         }
1687
1688         RelationClearRelation(relation, rebuild);
1689 }
1690
1691 /*
1692  * RelationForgetRelation - unconditionally remove a relcache entry
1693  *
1694  *                 External interface for destroying a relcache entry when we
1695  *                 drop the relation.
1696  */
1697 void
1698 RelationForgetRelation(Oid rid)
1699 {
1700         Relation        relation;
1701
1702         RelationIdCacheLookup(rid, relation);
1703
1704         if (!PointerIsValid(relation))
1705                 return;                                 /* not in cache, nothing to do */
1706
1707         if (!RelationHasReferenceCountZero(relation))
1708                 elog(ERROR, "relation %u is still open", rid);
1709
1710         /* Unconditionally destroy the relcache entry */
1711         RelationClearRelation(relation, false);
1712 }
1713
1714 /*
1715  *              RelationCacheInvalidateEntry
1716  *
1717  *              This routine is invoked for SI cache flush messages.
1718  *
1719  * Any relcache entry matching the relid must be flushed.  (Note: caller has
1720  * already determined that the relid belongs to our database or is a shared
1721  * relation.)
1722  *
1723  * We used to skip local relations, on the grounds that they could
1724  * not be targets of cross-backend SI update messages; but it seems
1725  * safer to process them, so that our *own* SI update messages will
1726  * have the same effects during CommandCounterIncrement for both
1727  * local and nonlocal relations.
1728  */
1729 void
1730 RelationCacheInvalidateEntry(Oid relationId)
1731 {
1732         Relation        relation;
1733
1734         RelationIdCacheLookup(relationId, relation);
1735
1736         if (PointerIsValid(relation))
1737         {
1738                 relcacheInvalsReceived++;
1739                 RelationFlushRelation(relation);
1740         }
1741 }
1742
1743 /*
1744  * RelationCacheInvalidate
1745  *       Blow away cached relation descriptors that have zero reference counts,
1746  *       and rebuild those with positive reference counts.      Also reset the smgr
1747  *       relation cache.
1748  *
1749  *       This is currently used only to recover from SI message buffer overflow,
1750  *       so we do not touch new-in-transaction relations; they cannot be targets
1751  *       of cross-backend SI updates (and our own updates now go through a
1752  *       separate linked list that isn't limited by the SI message buffer size).
1753  *
1754  *       We do this in two phases: the first pass deletes deletable items, and
1755  *       the second one rebuilds the rebuildable items.  This is essential for
1756  *       safety, because hash_seq_search only copes with concurrent deletion of
1757  *       the element it is currently visiting.  If a second SI overflow were to
1758  *       occur while we are walking the table, resulting in recursive entry to
1759  *       this routine, we could crash because the inner invocation blows away
1760  *       the entry next to be visited by the outer scan.  But this way is OK,
1761  *       because (a) during the first pass we won't process any more SI messages,
1762  *       so hash_seq_search will complete safely; (b) during the second pass we
1763  *       only hold onto pointers to nondeletable entries.
1764  *
1765  *       The two-phase approach also makes it easy to ensure that we process
1766  *       nailed-in-cache indexes before other nondeletable items, and that we
1767  *       process pg_class_oid_index first of all.  In scenarios where a nailed
1768  *       index has been given a new relfilenode, we have to detect that update
1769  *       before the nailed index is used in reloading any other relcache entry.
1770  */
1771 void
1772 RelationCacheInvalidate(void)
1773 {
1774         HASH_SEQ_STATUS status;
1775         RelIdCacheEnt *idhentry;
1776         Relation        relation;
1777         List       *rebuildFirstList = NIL;
1778         List       *rebuildList = NIL;
1779         ListCell   *l;
1780
1781         /* Phase 1 */
1782         hash_seq_init(&status, RelationIdCache);
1783
1784         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
1785         {
1786                 relation = idhentry->reldesc;
1787
1788                 /* Must close all smgr references to avoid leaving dangling ptrs */
1789                 RelationCloseSmgr(relation);
1790
1791                 /* Ignore new relations, since they are never SI targets */
1792                 if (relation->rd_createSubid != InvalidSubTransactionId)
1793                         continue;
1794
1795                 relcacheInvalsReceived++;
1796
1797                 if (RelationHasReferenceCountZero(relation))
1798                 {
1799                         /* Delete this entry immediately */
1800                         Assert(!relation->rd_isnailed);
1801                         RelationClearRelation(relation, false);
1802                 }
1803                 else
1804                 {
1805                         /*
1806                          * Add this entry to list of stuff to rebuild in second pass.
1807                          * pg_class_oid_index goes on the front of rebuildFirstList, other
1808                          * nailed indexes on the back, and everything else into
1809                          * rebuildList (in no particular order).
1810                          */
1811                         if (relation->rd_isnailed &&
1812                                 relation->rd_rel->relkind == RELKIND_INDEX)
1813                         {
1814                                 if (RelationGetRelid(relation) == ClassOidIndexId)
1815                                         rebuildFirstList = lcons(relation, rebuildFirstList);
1816                                 else
1817                                         rebuildFirstList = lappend(rebuildFirstList, relation);
1818                         }
1819                         else
1820                                 rebuildList = lcons(relation, rebuildList);
1821                 }
1822         }
1823
1824         /*
1825          * Now zap any remaining smgr cache entries.  This must happen before we
1826          * start to rebuild entries, since that may involve catalog fetches which
1827          * will re-open catalog files.
1828          */
1829         smgrcloseall();
1830
1831         /* Phase 2: rebuild the items found to need rebuild in phase 1 */
1832         foreach(l, rebuildFirstList)
1833         {
1834                 relation = (Relation) lfirst(l);
1835                 RelationClearRelation(relation, true);
1836         }
1837         list_free(rebuildFirstList);
1838         foreach(l, rebuildList)
1839         {
1840                 relation = (Relation) lfirst(l);
1841                 RelationClearRelation(relation, true);
1842         }
1843         list_free(rebuildList);
1844 }
1845
1846 /*
1847  * AtEOXact_RelationCache
1848  *
1849  *      Clean up the relcache at main-transaction commit or abort.
1850  *
1851  * Note: this must be called *before* processing invalidation messages.
1852  * In the case of abort, we don't want to try to rebuild any invalidated
1853  * cache entries (since we can't safely do database accesses).  Therefore
1854  * we must reset refcnts before handling pending invalidations.
1855  *
1856  * As of PostgreSQL 8.1, relcache refcnts should get released by the
1857  * ResourceOwner mechanism.  This routine just does a debugging
1858  * cross-check that no pins remain.  However, we also need to do special
1859  * cleanup when the current transaction created any relations or made use
1860  * of forced index lists.
1861  */
1862 void
1863 AtEOXact_RelationCache(bool isCommit)
1864 {
1865         HASH_SEQ_STATUS status;
1866         RelIdCacheEnt *idhentry;
1867
1868         /*
1869          * To speed up transaction exit, we want to avoid scanning the relcache
1870          * unless there is actually something for this routine to do.  Other than
1871          * the debug-only Assert checks, most transactions don't create any work
1872          * for us to do here, so we keep a static flag that gets set if there is
1873          * anything to do.      (Currently, this means either a relation is created in
1874          * the current xact, or an index list is forced.)  For simplicity, the
1875          * flag remains set till end of top-level transaction, even though we
1876          * could clear it at subtransaction end in some cases.
1877          */
1878         if (!need_eoxact_work
1879 #ifdef USE_ASSERT_CHECKING
1880                 && !assert_enabled
1881 #endif
1882                 )
1883                 return;
1884
1885         hash_seq_init(&status, RelationIdCache);
1886
1887         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
1888         {
1889                 Relation        relation = idhentry->reldesc;
1890
1891                 /*
1892                  * The relcache entry's ref count should be back to its normal
1893                  * not-in-a-transaction state: 0 unless it's nailed in cache.
1894                  *
1895                  * In bootstrap mode, this is NOT true, so don't check it --- the
1896                  * bootstrap code expects relations to stay open across start/commit
1897                  * transaction calls.  (That seems bogus, but it's not worth fixing.)
1898                  */
1899 #ifdef USE_ASSERT_CHECKING
1900                 if (!IsBootstrapProcessingMode())
1901                 {
1902                         int                     expected_refcnt;
1903
1904                         expected_refcnt = relation->rd_isnailed ? 1 : 0;
1905                         Assert(relation->rd_refcnt == expected_refcnt);
1906                 }
1907 #endif
1908
1909                 /*
1910                  * Is it a relation created in the current transaction?
1911                  *
1912                  * During commit, reset the flag to zero, since we are now out of the
1913                  * creating transaction.  During abort, simply delete the relcache
1914                  * entry --- it isn't interesting any longer.  (NOTE: if we have
1915                  * forgotten the new-ness of a new relation due to a forced cache
1916                  * flush, the entry will get deleted anyway by shared-cache-inval
1917                  * processing of the aborted pg_class insertion.)
1918                  */
1919                 if (relation->rd_createSubid != InvalidSubTransactionId)
1920                 {
1921                         if (isCommit)
1922                                 relation->rd_createSubid = InvalidSubTransactionId;
1923                         else
1924                         {
1925                                 RelationClearRelation(relation, false);
1926                                 continue;
1927                         }
1928                 }
1929
1930                 /*
1931                  * Flush any temporary index list.
1932                  */
1933                 if (relation->rd_indexvalid == 2)
1934                 {
1935                         list_free(relation->rd_indexlist);
1936                         relation->rd_indexlist = NIL;
1937                         relation->rd_oidindex = InvalidOid;
1938                         relation->rd_indexvalid = 0;
1939                 }
1940         }
1941
1942         /* Once done with the transaction, we can reset need_eoxact_work */
1943         need_eoxact_work = false;
1944 }
1945
1946 /*
1947  * AtEOSubXact_RelationCache
1948  *
1949  *      Clean up the relcache at sub-transaction commit or abort.
1950  *
1951  * Note: this must be called *before* processing invalidation messages.
1952  */
1953 void
1954 AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
1955                                                   SubTransactionId parentSubid)
1956 {
1957         HASH_SEQ_STATUS status;
1958         RelIdCacheEnt *idhentry;
1959
1960         /*
1961          * Skip the relcache scan if nothing to do --- see notes for
1962          * AtEOXact_RelationCache.
1963          */
1964         if (!need_eoxact_work)
1965                 return;
1966
1967         hash_seq_init(&status, RelationIdCache);
1968
1969         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
1970         {
1971                 Relation        relation = idhentry->reldesc;
1972
1973                 /*
1974                  * Is it a relation created in the current subtransaction?
1975                  *
1976                  * During subcommit, mark it as belonging to the parent, instead.
1977                  * During subabort, simply delete the relcache entry.
1978                  */
1979                 if (relation->rd_createSubid == mySubid)
1980                 {
1981                         if (isCommit)
1982                                 relation->rd_createSubid = parentSubid;
1983                         else
1984                         {
1985                                 Assert(RelationHasReferenceCountZero(relation));
1986                                 RelationClearRelation(relation, false);
1987                                 continue;
1988                         }
1989                 }
1990
1991                 /*
1992                  * Flush any temporary index list.
1993                  */
1994                 if (relation->rd_indexvalid == 2)
1995                 {
1996                         list_free(relation->rd_indexlist);
1997                         relation->rd_indexlist = NIL;
1998                         relation->rd_oidindex = InvalidOid;
1999                         relation->rd_indexvalid = 0;
2000                 }
2001         }
2002 }
2003
2004 /*
2005  *              RelationBuildLocalRelation
2006  *                      Build a relcache entry for an about-to-be-created relation,
2007  *                      and enter it into the relcache.
2008  */
2009 Relation
2010 RelationBuildLocalRelation(const char *relname,
2011                                                    Oid relnamespace,
2012                                                    TupleDesc tupDesc,
2013                                                    Oid relid,
2014                                                    Oid reltablespace,
2015                                                    bool shared_relation)
2016 {
2017         Relation        rel;
2018         MemoryContext oldcxt;
2019         int                     natts = tupDesc->natts;
2020         int                     i;
2021         bool            has_not_null;
2022         bool            nailit;
2023
2024         AssertArg(natts >= 0);
2025
2026         /*
2027          * check for creation of a rel that must be nailed in cache.
2028          *
2029          * XXX this list had better match RelationCacheInitialize's list.
2030          */
2031         switch (relid)
2032         {
2033                 case RelationRelationId:
2034                 case AttributeRelationId:
2035                 case ProcedureRelationId:
2036                 case TypeRelationId:
2037                         nailit = true;
2038                         break;
2039                 default:
2040                         nailit = false;
2041                         break;
2042         }
2043
2044         /*
2045          * switch to the cache context to create the relcache entry.
2046          */
2047         if (!CacheMemoryContext)
2048                 CreateCacheMemoryContext();
2049
2050         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2051
2052         /*
2053          * allocate a new relation descriptor and fill in basic state fields.
2054          */
2055         rel = (Relation) palloc0(sizeof(RelationData));
2056
2057         rel->rd_targblock = InvalidBlockNumber;
2058
2059         /* make sure relation is marked as having no open file yet */
2060         rel->rd_smgr = NULL;
2061
2062         /* mark it nailed if appropriate */
2063         rel->rd_isnailed = nailit;
2064
2065         rel->rd_refcnt = nailit ? 1 : 0;
2066
2067         /* it's being created in this transaction */
2068         rel->rd_createSubid = GetCurrentSubTransactionId();
2069
2070         /* must flag that we have rels created in this transaction */
2071         need_eoxact_work = true;
2072
2073         /* is it a temporary relation? */
2074         rel->rd_istemp = isTempNamespace(relnamespace);
2075
2076         /*
2077          * create a new tuple descriptor from the one passed in.  We do this
2078          * partly to copy it into the cache context, and partly because the new
2079          * relation can't have any defaults or constraints yet; they have to be
2080          * added in later steps, because they require additions to multiple system
2081          * catalogs.  We can copy attnotnull constraints here, however.
2082          */
2083         rel->rd_att = CreateTupleDescCopy(tupDesc);
2084         has_not_null = false;
2085         for (i = 0; i < natts; i++)
2086         {
2087                 rel->rd_att->attrs[i]->attnotnull = tupDesc->attrs[i]->attnotnull;
2088                 has_not_null |= tupDesc->attrs[i]->attnotnull;
2089         }
2090
2091         if (has_not_null)
2092         {
2093                 TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
2094
2095                 constr->has_not_null = true;
2096                 rel->rd_att->constr = constr;
2097         }
2098
2099         /*
2100          * initialize relation tuple form (caller may add/override data later)
2101          */
2102         rel->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
2103
2104         namestrcpy(&rel->rd_rel->relname, relname);
2105         rel->rd_rel->relnamespace = relnamespace;
2106
2107         rel->rd_rel->relkind = RELKIND_UNCATALOGED;
2108         rel->rd_rel->relhasoids = rel->rd_att->tdhasoid;
2109         rel->rd_rel->relnatts = natts;
2110         rel->rd_rel->reltype = InvalidOid;
2111         /* needed when bootstrapping: */
2112         rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
2113
2114         /*
2115          * Insert relation physical and logical identifiers (OIDs) into the right
2116          * places.      Note that the physical ID (relfilenode) is initially the same
2117          * as the logical ID (OID).
2118          */
2119         rel->rd_rel->relisshared = shared_relation;
2120
2121         RelationGetRelid(rel) = relid;
2122
2123         for (i = 0; i < natts; i++)
2124                 rel->rd_att->attrs[i]->attrelid = relid;
2125
2126         rel->rd_rel->relfilenode = relid;
2127         rel->rd_rel->reltablespace = reltablespace;
2128
2129         RelationInitLockInfo(rel);      /* see lmgr.c */
2130
2131         RelationInitPhysicalAddr(rel);
2132
2133         /*
2134          * Okay to insert into the relcache hash tables.
2135          */
2136         RelationCacheInsert(rel);
2137
2138         /*
2139          * done building relcache entry.
2140          */
2141         MemoryContextSwitchTo(oldcxt);
2142
2143         /* It's fully valid */
2144         rel->rd_isvalid = true;
2145
2146         /*
2147          * Caller expects us to pin the returned entry.
2148          */
2149         RelationIncrementReferenceCount(rel);
2150
2151         return rel;
2152 }
2153
2154 /*
2155  *              RelationCacheInitialize
2156  *
2157  *              This initializes the relation descriptor cache.  At the time
2158  *              that this is invoked, we can't do database access yet (mainly
2159  *              because the transaction subsystem is not up), so we can't get
2160  *              "real" info.  However it's okay to read the pg_internal.init
2161  *              cache file, if one is available.  Otherwise we make phony
2162  *              entries for the minimum set of nailed-in-cache relations.
2163  */
2164
2165 #define INITRELCACHESIZE                400
2166
2167 void
2168 RelationCacheInitialize(void)
2169 {
2170         MemoryContext oldcxt;
2171         HASHCTL         ctl;
2172
2173         /*
2174          * switch to cache memory context
2175          */
2176         if (!CacheMemoryContext)
2177                 CreateCacheMemoryContext();
2178
2179         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2180
2181         /*
2182          * create hashtables that index the relcache
2183          */
2184         MemSet(&ctl, 0, sizeof(ctl));
2185         ctl.keysize = sizeof(Oid);
2186         ctl.entrysize = sizeof(RelIdCacheEnt);
2187         ctl.hash = oid_hash;
2188         RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
2189                                                                   &ctl, HASH_ELEM | HASH_FUNCTION);
2190
2191         /*
2192          * Try to load the relcache cache file.  If successful, we're done for
2193          * now.  Otherwise, initialize the cache with pre-made descriptors for the
2194          * critical "nailed-in" system catalogs.
2195          */
2196         if (IsBootstrapProcessingMode() ||
2197                 !load_relcache_init_file())
2198         {
2199                 formrdesc("pg_class", PG_CLASS_RELTYPE_OID,
2200                                   true, Natts_pg_class, Desc_pg_class);
2201                 formrdesc("pg_attribute", PG_ATTRIBUTE_RELTYPE_OID,
2202                                   false, Natts_pg_attribute, Desc_pg_attribute);
2203                 formrdesc("pg_proc", PG_PROC_RELTYPE_OID,
2204                                   true, Natts_pg_proc, Desc_pg_proc);
2205                 formrdesc("pg_type", PG_TYPE_RELTYPE_OID,
2206                                   true, Natts_pg_type, Desc_pg_type);
2207
2208 #define NUM_CRITICAL_RELS       4       /* fix if you change list above */
2209         }
2210
2211         MemoryContextSwitchTo(oldcxt);
2212 }
2213
2214 /*
2215  *              RelationCacheInitializePhase2
2216  *
2217  *              This is called as soon as the catcache and transaction system
2218  *              are functional.  At this point we can actually read data from
2219  *              the system catalogs.  Update the relcache entries made during
2220  *              RelationCacheInitialize, and make sure we have entries for the
2221  *              critical system indexes.
2222  */
2223 void
2224 RelationCacheInitializePhase2(void)
2225 {
2226         HASH_SEQ_STATUS status;
2227         RelIdCacheEnt *idhentry;
2228
2229         if (IsBootstrapProcessingMode())
2230                 return;
2231
2232         /*
2233          * If we didn't get the critical system indexes loaded into relcache, do
2234          * so now.      These are critical because the catcache depends on them for
2235          * catcache fetches that are done during relcache load.  Thus, we have an
2236          * infinite-recursion problem.  We can break the recursion by doing
2237          * heapscans instead of indexscans at certain key spots. To avoid hobbling
2238          * performance, we only want to do that until we have the critical indexes
2239          * loaded into relcache.  Thus, the flag criticalRelcachesBuilt is used to
2240          * decide whether to do heapscan or indexscan at the key spots, and we set
2241          * it true after we've loaded the critical indexes.
2242          *
2243          * The critical indexes are marked as "nailed in cache", partly to make it
2244          * easy for load_relcache_init_file to count them, but mainly because we
2245          * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
2246          * true.  (NOTE: perhaps it would be possible to reload them by
2247          * temporarily setting criticalRelcachesBuilt to false again.  For now,
2248          * though, we just nail 'em in.)
2249          *
2250          * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
2251          * in the same way as the others, because the critical catalogs don't
2252          * (currently) have any rules or triggers, and so these indexes can be
2253          * rebuilt without inducing recursion.  However they are used during
2254          * relcache load when a rel does have rules or triggers, so we choose to
2255          * nail them for performance reasons.
2256          */
2257         if (!criticalRelcachesBuilt)
2258         {
2259                 Relation        ird;
2260
2261 #define LOAD_CRIT_INDEX(indexoid) \
2262                 do { \
2263                         ird = RelationBuildDesc((indexoid), NULL); \
2264                         ird->rd_isnailed = true; \
2265                         ird->rd_refcnt = 1; \
2266                 } while (0)
2267
2268                 LOAD_CRIT_INDEX(ClassOidIndexId);
2269                 LOAD_CRIT_INDEX(AttributeRelidNumIndexId);
2270                 LOAD_CRIT_INDEX(IndexRelidIndexId);
2271                 LOAD_CRIT_INDEX(AccessMethodStrategyIndexId);
2272                 LOAD_CRIT_INDEX(AccessMethodProcedureIndexId);
2273                 LOAD_CRIT_INDEX(OperatorOidIndexId);
2274                 LOAD_CRIT_INDEX(RewriteRelRulenameIndexId);
2275                 LOAD_CRIT_INDEX(TriggerRelidNameIndexId);
2276
2277 #define NUM_CRITICAL_INDEXES    8               /* fix if you change list above */
2278
2279                 criticalRelcachesBuilt = true;
2280         }
2281
2282         /*
2283          * Now, scan all the relcache entries and update anything that might be
2284          * wrong in the results from formrdesc or the relcache cache file. If we
2285          * faked up relcache entries using formrdesc, then read the real pg_class
2286          * rows and replace the fake entries with them. Also, if any of the
2287          * relcache entries have rules or triggers, load that info the hard way
2288          * since it isn't recorded in the cache file.
2289          */
2290         hash_seq_init(&status, RelationIdCache);
2291
2292         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2293         {
2294                 Relation        relation = idhentry->reldesc;
2295
2296                 /*
2297                  * If it's a faked-up entry, read the real pg_class tuple.
2298                  */
2299                 if (needNewCacheFile && relation->rd_isnailed)
2300                 {
2301                         HeapTuple       htup;
2302                         Form_pg_class relp;
2303
2304                         htup = SearchSysCache(RELOID,
2305                                                                 ObjectIdGetDatum(RelationGetRelid(relation)),
2306                                                                   0, 0, 0);
2307                         if (!HeapTupleIsValid(htup))
2308                                 elog(FATAL, "cache lookup failed for relation %u",
2309                                          RelationGetRelid(relation));
2310                         relp = (Form_pg_class) GETSTRUCT(htup);
2311
2312                         /*
2313                          * Copy tuple to relation->rd_rel. (See notes in
2314                          * AllocateRelationDesc())
2315                          */
2316                         Assert(relation->rd_rel != NULL);
2317                         memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
2318
2319                         /*
2320                          * Also update the derived fields in rd_att.
2321                          */
2322                         relation->rd_att->tdtypeid = relp->reltype;
2323                         relation->rd_att->tdtypmod = -1;        /* unnecessary, but... */
2324                         relation->rd_att->tdhasoid = relp->relhasoids;
2325
2326                         ReleaseSysCache(htup);
2327                 }
2328
2329                 /*
2330                  * Fix data that isn't saved in relcache cache file.
2331                  */
2332                 if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
2333                         RelationBuildRuleLock(relation);
2334                 if (relation->rd_rel->reltriggers > 0 && relation->trigdesc == NULL)
2335                         RelationBuildTriggers(relation);
2336         }
2337 }
2338
2339 /*
2340  *              RelationCacheInitializePhase3
2341  *
2342  *              Final step of relcache initialization: write out a new relcache
2343  *              cache file if one is needed.
2344  */
2345 void
2346 RelationCacheInitializePhase3(void)
2347 {
2348         if (IsBootstrapProcessingMode())
2349                 return;
2350
2351         if (needNewCacheFile)
2352         {
2353                 /*
2354                  * Force all the catcaches to finish initializing and thereby open the
2355                  * catalogs and indexes they use.  This will preload the relcache with
2356                  * entries for all the most important system catalogs and indexes, so
2357                  * that the init file will be most useful for future backends.
2358                  */
2359                 InitCatalogCachePhase2();
2360
2361                 /* now write the file */
2362                 write_relcache_init_file();
2363         }
2364 }
2365
2366 /*
2367  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
2368  *
2369  * We need this kluge because we have to be able to access non-fixed-width
2370  * fields of pg_index before we have the standard catalog caches available.
2371  * We use predefined data that's set up in just the same way as the
2372  * bootstrapped reldescs used by formrdesc().  The resulting tupdesc is
2373  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid,
2374  * nor does it have a TupleConstr field.  But it's good enough for the
2375  * purpose of extracting fields.
2376  */
2377 static TupleDesc
2378 GetPgIndexDescriptor(void)
2379 {
2380         static TupleDesc pgindexdesc = NULL;
2381         MemoryContext oldcxt;
2382         int                     i;
2383
2384         /* Already done? */
2385         if (pgindexdesc)
2386                 return pgindexdesc;
2387
2388         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2389
2390         pgindexdesc = CreateTemplateTupleDesc(Natts_pg_index, false);
2391         pgindexdesc->tdtypeid = RECORDOID;      /* not right, but we don't care */
2392         pgindexdesc->tdtypmod = -1;
2393
2394         for (i = 0; i < Natts_pg_index; i++)
2395         {
2396                 memcpy(pgindexdesc->attrs[i],
2397                            &Desc_pg_index[i],
2398                            ATTRIBUTE_TUPLE_SIZE);
2399                 /* make sure attcacheoff is valid */
2400                 pgindexdesc->attrs[i]->attcacheoff = -1;
2401         }
2402
2403         /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
2404         pgindexdesc->attrs[0]->attcacheoff = 0;
2405
2406         /* Note: we don't bother to set up a TupleConstr entry */
2407
2408         MemoryContextSwitchTo(oldcxt);
2409
2410         return pgindexdesc;
2411 }
2412
2413 static void
2414 AttrDefaultFetch(Relation relation)
2415 {
2416         AttrDefault *attrdef = relation->rd_att->constr->defval;
2417         int                     ndef = relation->rd_att->constr->num_defval;
2418         Relation        adrel;
2419         SysScanDesc adscan;
2420         ScanKeyData skey;
2421         HeapTuple       htup;
2422         Datum           val;
2423         bool            isnull;
2424         int                     found;
2425         int                     i;
2426
2427         ScanKeyInit(&skey,
2428                                 Anum_pg_attrdef_adrelid,
2429                                 BTEqualStrategyNumber, F_OIDEQ,
2430                                 ObjectIdGetDatum(RelationGetRelid(relation)));
2431
2432         adrel = heap_open(AttrDefaultRelationId, AccessShareLock);
2433         adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
2434                                                                 SnapshotNow, 1, &skey);
2435         found = 0;
2436
2437         while (HeapTupleIsValid(htup = systable_getnext(adscan)))
2438         {
2439                 Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
2440
2441                 for (i = 0; i < ndef; i++)
2442                 {
2443                         if (adform->adnum != attrdef[i].adnum)
2444                                 continue;
2445                         if (attrdef[i].adbin != NULL)
2446                                 elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
2447                                 NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
2448                                          RelationGetRelationName(relation));
2449                         else
2450                                 found++;
2451
2452                         val = fastgetattr(htup,
2453                                                           Anum_pg_attrdef_adbin,
2454                                                           adrel->rd_att, &isnull);
2455                         if (isnull)
2456                                 elog(WARNING, "null adbin for attr %s of rel %s",
2457                                 NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
2458                                          RelationGetRelationName(relation));
2459                         else
2460                                 attrdef[i].adbin = MemoryContextStrdup(CacheMemoryContext,
2461                                                                  DatumGetCString(DirectFunctionCall1(textout,
2462                                                                                                                                          val)));
2463                         break;
2464                 }
2465
2466                 if (i >= ndef)
2467                         elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
2468                                  adform->adnum, RelationGetRelationName(relation));
2469         }
2470
2471         systable_endscan(adscan);
2472         heap_close(adrel, AccessShareLock);
2473
2474         if (found != ndef)
2475                 elog(WARNING, "%d attrdef record(s) missing for rel %s",
2476                          ndef - found, RelationGetRelationName(relation));
2477 }
2478
2479 static void
2480 CheckConstraintFetch(Relation relation)
2481 {
2482         ConstrCheck *check = relation->rd_att->constr->check;
2483         int                     ncheck = relation->rd_att->constr->num_check;
2484         Relation        conrel;
2485         SysScanDesc conscan;
2486         ScanKeyData skey[1];
2487         HeapTuple       htup;
2488         Datum           val;
2489         bool            isnull;
2490         int                     found = 0;
2491
2492         ScanKeyInit(&skey[0],
2493                                 Anum_pg_constraint_conrelid,
2494                                 BTEqualStrategyNumber, F_OIDEQ,
2495                                 ObjectIdGetDatum(RelationGetRelid(relation)));
2496
2497         conrel = heap_open(ConstraintRelationId, AccessShareLock);
2498         conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
2499                                                                  SnapshotNow, 1, skey);
2500
2501         while (HeapTupleIsValid(htup = systable_getnext(conscan)))
2502         {
2503                 Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup);
2504
2505                 /* We want check constraints only */
2506                 if (conform->contype != CONSTRAINT_CHECK)
2507                         continue;
2508
2509                 if (found >= ncheck)
2510                         elog(ERROR, "unexpected constraint record found for rel %s",
2511                                  RelationGetRelationName(relation));
2512
2513                 check[found].ccname = MemoryContextStrdup(CacheMemoryContext,
2514                                                                                                   NameStr(conform->conname));
2515
2516                 /* Grab and test conbin is actually set */
2517                 val = fastgetattr(htup,
2518                                                   Anum_pg_constraint_conbin,
2519                                                   conrel->rd_att, &isnull);
2520                 if (isnull)
2521                         elog(ERROR, "null conbin for rel %s",
2522                                  RelationGetRelationName(relation));
2523
2524                 check[found].ccbin = MemoryContextStrdup(CacheMemoryContext,
2525                                                                  DatumGetCString(DirectFunctionCall1(textout,
2526                                                                                                                                          val)));
2527                 found++;
2528         }
2529
2530         systable_endscan(conscan);
2531         heap_close(conrel, AccessShareLock);
2532
2533         if (found != ncheck)
2534                 elog(ERROR, "%d constraint record(s) missing for rel %s",
2535                          ncheck - found, RelationGetRelationName(relation));
2536 }
2537
2538 /*
2539  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
2540  *
2541  * The index list is created only if someone requests it.  We scan pg_index
2542  * to find relevant indexes, and add the list to the relcache entry so that
2543  * we won't have to compute it again.  Note that shared cache inval of a
2544  * relcache entry will delete the old list and set rd_indexvalid to 0,
2545  * so that we must recompute the index list on next request.  This handles
2546  * creation or deletion of an index.
2547  *
2548  * The returned list is guaranteed to be sorted in order by OID.  This is
2549  * needed by the executor, since for index types that we obtain exclusive
2550  * locks on when updating the index, all backends must lock the indexes in
2551  * the same order or we will get deadlocks (see ExecOpenIndices()).  Any
2552  * consistent ordering would do, but ordering by OID is easy.
2553  *
2554  * Since shared cache inval causes the relcache's copy of the list to go away,
2555  * we return a copy of the list palloc'd in the caller's context.  The caller
2556  * may list_free() the returned list after scanning it. This is necessary
2557  * since the caller will typically be doing syscache lookups on the relevant
2558  * indexes, and syscache lookup could cause SI messages to be processed!
2559  *
2560  * We also update rd_oidindex, which this module treats as effectively part
2561  * of the index list.  rd_oidindex is valid when rd_indexvalid isn't zero;
2562  * it is the pg_class OID of a unique index on OID when the relation has one,
2563  * and InvalidOid if there is no such index.
2564  */
2565 List *
2566 RelationGetIndexList(Relation relation)
2567 {
2568         Relation        indrel;
2569         SysScanDesc indscan;
2570         ScanKeyData skey;
2571         HeapTuple       htup;
2572         List       *result;
2573         Oid                     oidIndex;
2574         MemoryContext oldcxt;
2575
2576         /* Quick exit if we already computed the list. */
2577         if (relation->rd_indexvalid != 0)
2578                 return list_copy(relation->rd_indexlist);
2579
2580         /*
2581          * We build the list we intend to return (in the caller's context) while
2582          * doing the scan.      After successfully completing the scan, we copy that
2583          * list into the relcache entry.  This avoids cache-context memory leakage
2584          * if we get some sort of error partway through.
2585          */
2586         result = NIL;
2587         oidIndex = InvalidOid;
2588
2589         /* Prepare to scan pg_index for entries having indrelid = this rel. */
2590         ScanKeyInit(&skey,
2591                                 Anum_pg_index_indrelid,
2592                                 BTEqualStrategyNumber, F_OIDEQ,
2593                                 ObjectIdGetDatum(RelationGetRelid(relation)));
2594
2595         indrel = heap_open(IndexRelationId, AccessShareLock);
2596         indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
2597                                                                  SnapshotNow, 1, &skey);
2598
2599         while (HeapTupleIsValid(htup = systable_getnext(indscan)))
2600         {
2601                 Form_pg_index index = (Form_pg_index) GETSTRUCT(htup);
2602
2603                 /* Add index's OID to result list in the proper order */
2604                 result = insert_ordered_oid(result, index->indexrelid);
2605
2606                 /* Check to see if it is a unique, non-partial btree index on OID */
2607                 if (index->indnatts == 1 &&
2608                         index->indisunique &&
2609                         index->indkey.values[0] == ObjectIdAttributeNumber &&
2610                         index->indclass.values[0] == OID_BTREE_OPS_OID &&
2611                         heap_attisnull(htup, Anum_pg_index_indpred))
2612                         oidIndex = index->indexrelid;
2613         }
2614
2615         systable_endscan(indscan);
2616         heap_close(indrel, AccessShareLock);
2617
2618         /* Now save a copy of the completed list in the relcache entry. */
2619         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2620         relation->rd_indexlist = list_copy(result);
2621         relation->rd_oidindex = oidIndex;
2622         relation->rd_indexvalid = 1;
2623         MemoryContextSwitchTo(oldcxt);
2624
2625         return result;
2626 }
2627
2628 /*
2629  * insert_ordered_oid
2630  *              Insert a new Oid into a sorted list of Oids, preserving ordering
2631  *
2632  * Building the ordered list this way is O(N^2), but with a pretty small
2633  * constant, so for the number of entries we expect it will probably be
2634  * faster than trying to apply qsort().  Most tables don't have very many
2635  * indexes...
2636  */
2637 static List *
2638 insert_ordered_oid(List *list, Oid datum)
2639 {
2640         ListCell   *prev;
2641
2642         /* Does the datum belong at the front? */
2643         if (list == NIL || datum < linitial_oid(list))
2644                 return lcons_oid(datum, list);
2645         /* No, so find the entry it belongs after */
2646         prev = list_head(list);
2647         for (;;)
2648         {
2649                 ListCell   *curr = lnext(prev);
2650
2651                 if (curr == NULL || datum < lfirst_oid(curr))
2652                         break;                          /* it belongs after 'prev', before 'curr' */
2653
2654                 prev = curr;
2655         }
2656         /* Insert datum into list after 'prev' */
2657         lappend_cell_oid(list, prev, datum);
2658         return list;
2659 }
2660
2661 /*
2662  * RelationSetIndexList -- externally force the index list contents
2663  *
2664  * This is used to temporarily override what we think the set of valid
2665  * indexes is (including the presence or absence of an OID index).
2666  * The forcing will be valid only until transaction commit or abort.
2667  *
2668  * This should only be applied to nailed relations, because in a non-nailed
2669  * relation the hacked index list could be lost at any time due to SI
2670  * messages.  In practice it is only used on pg_class (see REINDEX).
2671  *
2672  * It is up to the caller to make sure the given list is correctly ordered.
2673  */
2674 void
2675 RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
2676 {
2677         MemoryContext oldcxt;
2678
2679         Assert(relation->rd_isnailed);
2680         /* Copy the list into the cache context (could fail for lack of mem) */
2681         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2682         indexIds = list_copy(indexIds);
2683         MemoryContextSwitchTo(oldcxt);
2684         /* Okay to replace old list */
2685         list_free(relation->rd_indexlist);
2686         relation->rd_indexlist = indexIds;
2687         relation->rd_oidindex = oidIndex;
2688         relation->rd_indexvalid = 2;    /* mark list as forced */
2689         /* must flag that we have a forced index list */
2690         need_eoxact_work = true;
2691 }
2692
2693 /*
2694  * RelationGetOidIndex -- get the pg_class OID of the relation's OID index
2695  *
2696  * Returns InvalidOid if there is no such index.
2697  */
2698 Oid
2699 RelationGetOidIndex(Relation relation)
2700 {
2701         List       *ilist;
2702
2703         /*
2704          * If relation doesn't have OIDs at all, caller is probably confused. (We
2705          * could just silently return InvalidOid, but it seems better to throw an
2706          * assertion.)
2707          */
2708         Assert(relation->rd_rel->relhasoids);
2709
2710         if (relation->rd_indexvalid == 0)
2711         {
2712                 /* RelationGetIndexList does the heavy lifting. */
2713                 ilist = RelationGetIndexList(relation);
2714                 list_free(ilist);
2715                 Assert(relation->rd_indexvalid != 0);
2716         }
2717
2718         return relation->rd_oidindex;
2719 }
2720
2721 /*
2722  * RelationGetIndexExpressions -- get the index expressions for an index
2723  *
2724  * We cache the result of transforming pg_index.indexprs into a node tree.
2725  * If the rel is not an index or has no expressional columns, we return NIL.
2726  * Otherwise, the returned tree is copied into the caller's memory context.
2727  * (We don't want to return a pointer to the relcache copy, since it could
2728  * disappear due to relcache invalidation.)
2729  */
2730 List *
2731 RelationGetIndexExpressions(Relation relation)
2732 {
2733         List       *result;
2734         Datum           exprsDatum;
2735         bool            isnull;
2736         char       *exprsString;
2737         MemoryContext oldcxt;
2738
2739         /* Quick exit if we already computed the result. */
2740         if (relation->rd_indexprs)
2741                 return (List *) copyObject(relation->rd_indexprs);
2742
2743         /* Quick exit if there is nothing to do. */
2744         if (relation->rd_indextuple == NULL ||
2745                 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs))
2746                 return NIL;
2747
2748         /*
2749          * We build the tree we intend to return in the caller's context. After
2750          * successfully completing the work, we copy it into the relcache entry.
2751          * This avoids problems if we get some sort of error partway through.
2752          */
2753         exprsDatum = heap_getattr(relation->rd_indextuple,
2754                                                           Anum_pg_index_indexprs,
2755                                                           GetPgIndexDescriptor(),
2756                                                           &isnull);
2757         Assert(!isnull);
2758         exprsString = DatumGetCString(DirectFunctionCall1(textout, exprsDatum));
2759         result = (List *) stringToNode(exprsString);
2760         pfree(exprsString);
2761
2762         /*
2763          * Run the expressions through eval_const_expressions. This is not just an
2764          * optimization, but is necessary, because the planner will be comparing
2765          * them to similarly-processed qual clauses, and may fail to detect valid
2766          * matches without this.  We don't bother with canonicalize_qual, however.
2767          */
2768         result = (List *) eval_const_expressions((Node *) result);
2769
2770         /*
2771          * Also mark any coercion format fields as "don't care", so that the
2772          * planner can match to both explicit and implicit coercions.
2773          */
2774         set_coercionform_dontcare((Node *) result);
2775
2776         /* May as well fix opfuncids too */
2777         fix_opfuncids((Node *) result);
2778
2779         /* Now save a copy of the completed tree in the relcache entry. */
2780         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2781         relation->rd_indexprs = (List *) copyObject(result);
2782         MemoryContextSwitchTo(oldcxt);
2783
2784         return result;
2785 }
2786
2787 /*
2788  * RelationGetIndexPredicate -- get the index predicate for an index
2789  *
2790  * We cache the result of transforming pg_index.indpred into an implicit-AND
2791  * node tree (suitable for ExecQual).
2792  * If the rel is not an index or has no predicate, we return NIL.
2793  * Otherwise, the returned tree is copied into the caller's memory context.
2794  * (We don't want to return a pointer to the relcache copy, since it could
2795  * disappear due to relcache invalidation.)
2796  */
2797 List *
2798 RelationGetIndexPredicate(Relation relation)
2799 {
2800         List       *result;
2801         Datum           predDatum;
2802         bool            isnull;
2803         char       *predString;
2804         MemoryContext oldcxt;
2805
2806         /* Quick exit if we already computed the result. */
2807         if (relation->rd_indpred)
2808                 return (List *) copyObject(relation->rd_indpred);
2809
2810         /* Quick exit if there is nothing to do. */
2811         if (relation->rd_indextuple == NULL ||
2812                 heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred))
2813                 return NIL;
2814
2815         /*
2816          * We build the tree we intend to return in the caller's context. After
2817          * successfully completing the work, we copy it into the relcache entry.
2818          * This avoids problems if we get some sort of error partway through.
2819          */
2820         predDatum = heap_getattr(relation->rd_indextuple,
2821                                                          Anum_pg_index_indpred,
2822                                                          GetPgIndexDescriptor(),
2823                                                          &isnull);
2824         Assert(!isnull);
2825         predString = DatumGetCString(DirectFunctionCall1(textout, predDatum));
2826         result = (List *) stringToNode(predString);
2827         pfree(predString);
2828
2829         /*
2830          * Run the expression through const-simplification and canonicalization.
2831          * This is not just an optimization, but is necessary, because the planner
2832          * will be comparing it to similarly-processed qual clauses, and may fail
2833          * to detect valid matches without this.  This must match the processing
2834          * done to qual clauses in preprocess_expression()!  (We can skip the
2835          * stuff involving subqueries, however, since we don't allow any in index
2836          * predicates.)
2837          */
2838         result = (List *) eval_const_expressions((Node *) result);
2839
2840         result = (List *) canonicalize_qual((Expr *) result);
2841
2842         /*
2843          * Also mark any coercion format fields as "don't care", so that the
2844          * planner can match to both explicit and implicit coercions.
2845          */
2846         set_coercionform_dontcare((Node *) result);
2847
2848         /* Also convert to implicit-AND format */
2849         result = make_ands_implicit((Expr *) result);
2850
2851         /* May as well fix opfuncids too */
2852         fix_opfuncids((Node *) result);
2853
2854         /* Now save a copy of the completed tree in the relcache entry. */
2855         oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
2856         relation->rd_indpred = (List *) copyObject(result);
2857         MemoryContextSwitchTo(oldcxt);
2858
2859         return result;
2860 }
2861
2862
2863 /*
2864  *      load_relcache_init_file, write_relcache_init_file
2865  *
2866  *              In late 1992, we started regularly having databases with more than
2867  *              a thousand classes in them.  With this number of classes, it became
2868  *              critical to do indexed lookups on the system catalogs.
2869  *
2870  *              Bootstrapping these lookups is very hard.  We want to be able to
2871  *              use an index on pg_attribute, for example, but in order to do so,
2872  *              we must have read pg_attribute for the attributes in the index,
2873  *              which implies that we need to use the index.
2874  *
2875  *              In order to get around the problem, we do the following:
2876  *
2877  *                 +  When the database system is initialized (at initdb time), we
2878  *                        don't use indexes.  We do sequential scans.
2879  *
2880  *                 +  When the backend is started up in normal mode, we load an image
2881  *                        of the appropriate relation descriptors, in internal format,
2882  *                        from an initialization file in the data/base/... directory.
2883  *
2884  *                 +  If the initialization file isn't there, then we create the
2885  *                        relation descriptors using sequential scans and write 'em to
2886  *                        the initialization file for use by subsequent backends.
2887  *
2888  *              We could dispense with the initialization file and just build the
2889  *              critical reldescs the hard way on every backend startup, but that
2890  *              slows down backend startup noticeably.
2891  *
2892  *              We can in fact go further, and save more relcache entries than
2893  *              just the ones that are absolutely critical; this allows us to speed
2894  *              up backend startup by not having to build such entries the hard way.
2895  *              Presently, all the catalog and index entries that are referred to
2896  *              by catcaches are stored in the initialization file.
2897  *
2898  *              The same mechanism that detects when catcache and relcache entries
2899  *              need to be invalidated (due to catalog updates) also arranges to
2900  *              unlink the initialization file when its contents may be out of date.
2901  *              The file will then be rebuilt during the next backend startup.
2902  */
2903
2904 /*
2905  * load_relcache_init_file -- attempt to load cache from the init file
2906  *
2907  * If successful, return TRUE and set criticalRelcachesBuilt to true.
2908  * If not successful, return FALSE and set needNewCacheFile to true.
2909  *
2910  * NOTE: we assume we are already switched into CacheMemoryContext.
2911  */
2912 static bool
2913 load_relcache_init_file(void)
2914 {
2915         FILE       *fp;
2916         char            initfilename[MAXPGPATH];
2917         Relation   *rels;
2918         int                     relno,
2919                                 num_rels,
2920                                 max_rels,
2921                                 nailed_rels,
2922                                 nailed_indexes,
2923                                 magic;
2924         int                     i;
2925
2926         snprintf(initfilename, sizeof(initfilename), "%s/%s",
2927                          DatabasePath, RELCACHE_INIT_FILENAME);
2928
2929         fp = AllocateFile(initfilename, PG_BINARY_R);
2930         if (fp == NULL)
2931         {
2932                 needNewCacheFile = true;
2933                 return false;
2934         }
2935
2936         /*
2937          * Read the index relcache entries from the file.  Note we will not enter
2938          * any of them into the cache if the read fails partway through; this
2939          * helps to guard against broken init files.
2940          */
2941         max_rels = 100;
2942         rels = (Relation *) palloc(max_rels * sizeof(Relation));
2943         num_rels = 0;
2944         nailed_rels = nailed_indexes = 0;
2945         initFileRelationIds = NIL;
2946
2947         /* check for correct magic number (compatible version) */
2948         if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
2949                 goto read_failed;
2950         if (magic != RELCACHE_INIT_FILEMAGIC)
2951                 goto read_failed;
2952
2953         for (relno = 0;; relno++)
2954         {
2955                 Size            len;
2956                 size_t          nread;
2957                 Relation        rel;
2958                 Form_pg_class relform;
2959                 bool            has_not_null;
2960                 Datum           indclassDatum;
2961                 bool            isnull;
2962
2963                 /* first read the relation descriptor length */
2964                 if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
2965                 {
2966                         if (nread == 0)
2967                                 break;                  /* end of file */
2968                         goto read_failed;
2969                 }
2970
2971                 /* safety check for incompatible relcache layout */
2972                 if (len != sizeof(RelationData))
2973                         goto read_failed;
2974
2975                 /* allocate another relcache header */
2976                 if (num_rels >= max_rels)
2977                 {
2978                         max_rels *= 2;
2979                         rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
2980                 }
2981
2982                 rel = rels[num_rels++] = (Relation) palloc(len);
2983
2984                 /* then, read the Relation structure */
2985                 if ((nread = fread(rel, 1, len, fp)) != len)
2986                         goto read_failed;
2987
2988                 /* next read the relation tuple form */
2989                 if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
2990                         goto read_failed;
2991
2992                 relform = (Form_pg_class) palloc(len);
2993                 if ((nread = fread(relform, 1, len, fp)) != len)
2994                         goto read_failed;
2995
2996                 rel->rd_rel = relform;
2997
2998                 /* initialize attribute tuple forms */
2999                 rel->rd_att = CreateTemplateTupleDesc(relform->relnatts,
3000                                                                                           relform->relhasoids);
3001                 rel->rd_att->tdtypeid = relform->reltype;
3002                 rel->rd_att->tdtypmod = -1;             /* unnecessary, but... */
3003
3004                 /* next read all the attribute tuple form data entries */
3005                 has_not_null = false;
3006                 for (i = 0; i < relform->relnatts; i++)
3007                 {
3008                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3009                                 goto read_failed;
3010                         if (len != ATTRIBUTE_TUPLE_SIZE)
3011                                 goto read_failed;
3012                         if ((nread = fread(rel->rd_att->attrs[i], 1, len, fp)) != len)
3013                                 goto read_failed;
3014
3015                         has_not_null |= rel->rd_att->attrs[i]->attnotnull;
3016                 }
3017
3018                 /* mark not-null status */
3019                 if (has_not_null)
3020                 {
3021                         TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3022
3023                         constr->has_not_null = true;
3024                         rel->rd_att->constr = constr;
3025                 }
3026
3027                 /* If it's an index, there's more to do */
3028                 if (rel->rd_rel->relkind == RELKIND_INDEX)
3029                 {
3030                         Form_pg_am      am;
3031                         MemoryContext indexcxt;
3032                         Oid                *operator;
3033                         RegProcedure *support;
3034                         int                     nsupport;
3035
3036                         /* Count nailed indexes to ensure we have 'em all */
3037                         if (rel->rd_isnailed)
3038                                 nailed_indexes++;
3039
3040                         /* next, read the pg_index tuple */
3041                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3042                                 goto read_failed;
3043
3044                         rel->rd_indextuple = (HeapTuple) palloc(len);
3045                         if ((nread = fread(rel->rd_indextuple, 1, len, fp)) != len)
3046                                 goto read_failed;
3047
3048                         /* Fix up internal pointers in the tuple -- see heap_copytuple */
3049                         rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
3050                         rel->rd_index = (Form_pg_index) GETSTRUCT(rel->rd_indextuple);
3051
3052                         /* fix up indclass pointer too */
3053                         indclassDatum = fastgetattr(rel->rd_indextuple,
3054                                                                                 Anum_pg_index_indclass,
3055                                                                                 GetPgIndexDescriptor(),
3056                                                                                 &isnull);
3057                         Assert(!isnull);
3058                         rel->rd_indclass = (oidvector *) DatumGetPointer(indclassDatum);
3059
3060                         /* next, read the access method tuple form */
3061                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3062                                 goto read_failed;
3063
3064                         am = (Form_pg_am) palloc(len);
3065                         if ((nread = fread(am, 1, len, fp)) != len)
3066                                 goto read_failed;
3067                         rel->rd_am = am;
3068
3069                         /*
3070                          * prepare index info context --- parameters should match
3071                          * RelationInitIndexAccessInfo
3072                          */
3073                         indexcxt = AllocSetContextCreate(CacheMemoryContext,
3074                                                                                          RelationGetRelationName(rel),
3075                                                                                          ALLOCSET_SMALL_MINSIZE,
3076                                                                                          ALLOCSET_SMALL_INITSIZE,
3077                                                                                          ALLOCSET_SMALL_MAXSIZE);
3078                         rel->rd_indexcxt = indexcxt;
3079
3080                         /* next, read the vector of operator OIDs */
3081                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3082                                 goto read_failed;
3083
3084                         operator = (Oid *) MemoryContextAlloc(indexcxt, len);
3085                         if ((nread = fread(operator, 1, len, fp)) != len)
3086                                 goto read_failed;
3087
3088                         rel->rd_operator = operator;
3089
3090                         /* finally, read the vector of support procedures */
3091                         if ((nread = fread(&len, 1, sizeof(len), fp)) != sizeof(len))
3092                                 goto read_failed;
3093                         support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
3094                         if ((nread = fread(support, 1, len, fp)) != len)
3095                                 goto read_failed;
3096
3097                         rel->rd_support = support;
3098
3099                         /* set up zeroed fmgr-info vectors */
3100                         rel->rd_aminfo = (RelationAmInfo *)
3101                                 MemoryContextAllocZero(indexcxt, sizeof(RelationAmInfo));
3102                         nsupport = relform->relnatts * am->amsupport;
3103                         rel->rd_supportinfo = (FmgrInfo *)
3104                                 MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
3105                 }
3106                 else
3107                 {
3108                         /* Count nailed rels to ensure we have 'em all */
3109                         if (rel->rd_isnailed)
3110                                 nailed_rels++;
3111
3112                         Assert(rel->rd_index == NULL);
3113                         Assert(rel->rd_indextuple == NULL);
3114                         Assert(rel->rd_indclass == NULL);
3115                         Assert(rel->rd_am == NULL);
3116                         Assert(rel->rd_indexcxt == NULL);
3117                         Assert(rel->rd_aminfo == NULL);
3118                         Assert(rel->rd_operator == NULL);
3119                         Assert(rel->rd_support == NULL);
3120                         Assert(rel->rd_supportinfo == NULL);
3121                 }
3122
3123                 /*
3124                  * Rules and triggers are not saved (mainly because the internal
3125                  * format is complex and subject to change).  They must be rebuilt if
3126                  * needed by RelationCacheInitializePhase2.  This is not expected to
3127                  * be a big performance hit since few system catalogs have such. Ditto
3128                  * for index expressions and predicates.
3129                  */
3130                 rel->rd_rules = NULL;
3131                 rel->rd_rulescxt = NULL;
3132                 rel->trigdesc = NULL;
3133                 rel->rd_indexprs = NIL;
3134                 rel->rd_indpred = NIL;
3135
3136                 /*
3137                  * Reset transient-state fields in the relcache entry
3138                  */
3139                 rel->rd_smgr = NULL;
3140                 rel->rd_targblock = InvalidBlockNumber;
3141                 if (rel->rd_isnailed)
3142                         rel->rd_refcnt = 1;
3143                 else
3144                         rel->rd_refcnt = 0;
3145                 rel->rd_indexvalid = 0;
3146                 rel->rd_indexlist = NIL;
3147                 rel->rd_oidindex = InvalidOid;
3148                 rel->rd_createSubid = InvalidSubTransactionId;
3149                 rel->rd_amcache = NULL;
3150                 MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
3151
3152                 /*
3153                  * Recompute lock and physical addressing info.  This is needed in
3154                  * case the pg_internal.init file was copied from some other database
3155                  * by CREATE DATABASE.
3156                  */
3157                 RelationInitLockInfo(rel);
3158                 RelationInitPhysicalAddr(rel);
3159         }
3160
3161         /*
3162          * We reached the end of the init file without apparent problem. Did we
3163          * get the right number of nailed items?  (This is a useful crosscheck in
3164          * case the set of critical rels or indexes changes.)
3165          */
3166         if (nailed_rels != NUM_CRITICAL_RELS ||
3167                 nailed_indexes != NUM_CRITICAL_INDEXES)
3168                 goto read_failed;
3169
3170         /*
3171          * OK, all appears well.
3172          *
3173          * Now insert all the new relcache entries into the cache.
3174          */
3175         for (relno = 0; relno < num_rels; relno++)
3176         {
3177                 RelationCacheInsert(rels[relno]);
3178                 /* also make a list of their OIDs, for RelationIdIsInInitFile */
3179                 initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
3180                                                                                 initFileRelationIds);
3181         }
3182
3183         pfree(rels);
3184         FreeFile(fp);
3185
3186         criticalRelcachesBuilt = true;
3187         return true;
3188
3189         /*
3190          * init file is broken, so do it the hard way.  We don't bother trying to
3191          * free the clutter we just allocated; it's not in the relcache so it
3192          * won't hurt.
3193          */
3194 read_failed:
3195         pfree(rels);
3196         FreeFile(fp);
3197
3198         needNewCacheFile = true;
3199         return false;
3200 }
3201
3202 /*
3203  * Write out a new initialization file with the current contents
3204  * of the relcache.
3205  */
3206 static void
3207 write_relcache_init_file(void)
3208 {
3209         FILE       *fp;
3210         char            tempfilename[MAXPGPATH];
3211         char            finalfilename[MAXPGPATH];
3212         int                     magic;
3213         HASH_SEQ_STATUS status;
3214         RelIdCacheEnt *idhentry;
3215         MemoryContext oldcxt;
3216         int                     i;
3217
3218         /*
3219          * We must write a temporary file and rename it into place. Otherwise,
3220          * another backend starting at about the same time might crash trying to
3221          * read the partially-complete file.
3222          */
3223         snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
3224                          DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
3225         snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
3226                          DatabasePath, RELCACHE_INIT_FILENAME);
3227
3228         unlink(tempfilename);           /* in case it exists w/wrong permissions */
3229
3230         fp = AllocateFile(tempfilename, PG_BINARY_W);
3231         if (fp == NULL)
3232         {
3233                 /*
3234                  * We used to consider this a fatal error, but we might as well
3235                  * continue with backend startup ...
3236                  */
3237                 ereport(WARNING,
3238                                 (errcode_for_file_access(),
3239                                  errmsg("could not create relation-cache initialization file \"%s\": %m",
3240                                                 tempfilename),
3241                           errdetail("Continuing anyway, but there's something wrong.")));
3242                 return;
3243         }
3244
3245         /*
3246          * Write a magic number to serve as a file version identifier.  We can
3247          * change the magic number whenever the relcache layout changes.
3248          */
3249         magic = RELCACHE_INIT_FILEMAGIC;
3250         if (fwrite(&magic, 1, sizeof(magic), fp) != sizeof(magic))
3251                 elog(FATAL, "could not write init file");
3252
3253         /*
3254          * Write all the reldescs (in no particular order).
3255          */
3256         hash_seq_init(&status, RelationIdCache);
3257
3258         initFileRelationIds = NIL;
3259
3260         while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3261         {
3262                 Relation        rel = idhentry->reldesc;
3263                 Form_pg_class relform = rel->rd_rel;
3264                 Size            len;
3265
3266                 /*
3267                  * first write the relcache entry proper
3268                  */
3269                 len = sizeof(RelationData);
3270
3271                 /* first, write the relation descriptor length */
3272                 if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3273                         elog(FATAL, "could not write init file");
3274
3275                 /* next, write out the Relation structure */
3276                 if (fwrite(rel, 1, len, fp) != len)
3277                         elog(FATAL, "could not write init file");
3278
3279                 /* next write the relation tuple form */
3280                 len = sizeof(FormData_pg_class);
3281                 if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3282                         elog(FATAL, "could not write init file");
3283
3284                 if (fwrite(relform, 1, len, fp) != len)
3285                         elog(FATAL, "could not write init file");
3286
3287                 /* next, do all the attribute tuple form data entries */
3288                 for (i = 0; i < relform->relnatts; i++)
3289                 {
3290                         len = ATTRIBUTE_TUPLE_SIZE;
3291                         if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3292                                 elog(FATAL, "could not write init file");
3293                         if (fwrite(rel->rd_att->attrs[i], 1, len, fp) != len)
3294                                 elog(FATAL, "could not write init file");
3295                 }
3296
3297                 /* If it's an index, there's more to do */
3298                 if (rel->rd_rel->relkind == RELKIND_INDEX)
3299                 {
3300                         Form_pg_am      am = rel->rd_am;
3301
3302                         /* write the pg_index tuple */
3303                         /* we assume this was created by heap_copytuple! */
3304                         len = HEAPTUPLESIZE + rel->rd_indextuple->t_len;
3305                         if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3306                                 elog(FATAL, "could not write init file");
3307
3308                         if (fwrite(rel->rd_indextuple, 1, len, fp) != len)
3309                                 elog(FATAL, "could not write init file");
3310
3311                         /* next, write the access method tuple form */
3312                         len = sizeof(FormData_pg_am);
3313                         if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3314                                 elog(FATAL, "could not write init file");
3315
3316                         if (fwrite(am, 1, len, fp) != len)
3317                                 elog(FATAL, "could not write init file");
3318
3319                         /* next, write the vector of operator OIDs */
3320                         len = relform->relnatts * (am->amstrategies * sizeof(Oid));
3321                         if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3322                                 elog(FATAL, "could not write init file");
3323
3324                         if (fwrite(rel->rd_operator, 1, len, fp) != len)
3325                                 elog(FATAL, "could not write init file");
3326
3327                         /* finally, write the vector of support procedures */
3328                         len = relform->relnatts * (am->amsupport * sizeof(RegProcedure));
3329                         if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
3330                                 elog(FATAL, "could not write init file");
3331
3332                         if (fwrite(rel->rd_support, 1, len, fp) != len)
3333                                 elog(FATAL, "could not write init file");
3334                 }
3335
3336                 /* also make a list of their OIDs, for RelationIdIsInInitFile */
3337                 oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
3338                 initFileRelationIds = lcons_oid(RelationGetRelid(rel),
3339                                                                                 initFileRelationIds);
3340                 MemoryContextSwitchTo(oldcxt);
3341         }
3342
3343         if (FreeFile(fp))
3344                 elog(FATAL, "could not write init file");
3345
3346         /*
3347          * Now we have to check whether the data we've so painstakingly
3348          * accumulated is already obsolete due to someone else's just-committed
3349          * catalog changes.  If so, we just delete the temp file and leave it to
3350          * the next backend to try again.  (Our own relcache entries will be
3351          * updated by SI message processing, but we can't be sure whether what we
3352          * wrote out was up-to-date.)
3353          *
3354          * This mustn't run concurrently with RelationCacheInitFileInvalidate, so
3355          * grab a serialization lock for the duration.
3356          */
3357         LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
3358
3359         /* Make sure we have seen all incoming SI messages */
3360         AcceptInvalidationMessages();
3361
3362         /*
3363          * If we have received any SI relcache invals since backend start, assume
3364          * we may have written out-of-date data.
3365          */
3366         if (relcacheInvalsReceived == 0L)
3367         {
3368                 /*
3369                  * OK, rename the temp file to its final name, deleting any
3370                  * previously-existing init file.
3371                  *
3372                  * Note: a failure here is possible under Cygwin, if some other
3373                  * backend is holding open an unlinked-but-not-yet-gone init file. So
3374                  * treat this as a noncritical failure; just remove the useless temp
3375                  * file on failure.
3376                  */
3377                 if (rename(tempfilename, finalfilename) < 0)
3378                         unlink(tempfilename);
3379         }
3380         else
3381         {
3382                 /* Delete the already-obsolete temp file */
3383                 unlink(tempfilename);
3384         }
3385
3386         LWLockRelease(RelCacheInitLock);
3387 }
3388
3389 /*
3390  * Detect whether a given relation (identified by OID) is one of the ones
3391  * we store in the init file.
3392  *
3393  * Note that we effectively assume that all backends running in a database
3394  * would choose to store the same set of relations in the init file;
3395  * otherwise there are cases where we'd fail to detect the need for an init
3396  * file invalidation.  This does not seem likely to be a problem in practice.
3397  */
3398 bool
3399 RelationIdIsInInitFile(Oid relationId)
3400 {
3401         return list_member_oid(initFileRelationIds, relationId);
3402 }
3403
3404 /*
3405  * Invalidate (remove) the init file during commit of a transaction that
3406  * changed one or more of the relation cache entries that are kept in the
3407  * init file.
3408  *
3409  * We actually need to remove the init file twice: once just before sending
3410  * the SI messages that include relcache inval for such relations, and once
3411  * just after sending them.  The unlink before ensures that a backend that's
3412  * currently starting cannot read the now-obsolete init file and then miss
3413  * the SI messages that will force it to update its relcache entries.  (This
3414  * works because the backend startup sequence gets into the PGPROC array before
3415  * trying to load the init file.)  The unlink after is to synchronize with a
3416  * backend that may currently be trying to write an init file based on data
3417  * that we've just rendered invalid.  Such a backend will see the SI messages,
3418  * but we can't leave the init file sitting around to fool later backends.
3419  *
3420  * Ignore any failure to unlink the file, since it might not be there if
3421  * no backend has been started since the last removal.
3422  */
3423 void
3424 RelationCacheInitFileInvalidate(bool beforeSend)
3425 {
3426         char            initfilename[MAXPGPATH];
3427
3428         snprintf(initfilename, sizeof(initfilename), "%s/%s",
3429                          DatabasePath, RELCACHE_INIT_FILENAME);
3430
3431         if (beforeSend)
3432         {
3433                 /* no interlock needed here */
3434                 unlink(initfilename);
3435         }
3436         else
3437         {
3438                 /*
3439                  * We need to interlock this against write_relcache_init_file, to
3440                  * guard against possibility that someone renames a new-but-
3441                  * already-obsolete init file into place just after we unlink. With
3442                  * the interlock, it's certain that write_relcache_init_file will
3443                  * notice our SI inval message before renaming into place, or else
3444                  * that we will execute second and successfully unlink the file.
3445                  */
3446                 LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
3447                 unlink(initfilename);
3448                 LWLockRelease(RelCacheInitLock);
3449         }
3450 }