OSDN Git Service

Refactor broken CREATE TABLE IF NOT EXISTS support.
[pg-rex/syncrep.git] / src / backend / executor / execMain.c
1 /*-------------------------------------------------------------------------
2  *
3  * execMain.c
4  *        top level executor interface routines
5  *
6  * INTERFACE ROUTINES
7  *      ExecutorStart()
8  *      ExecutorRun()
9  *      ExecutorFinish()
10  *      ExecutorEnd()
11  *
12  *      These four procedures are the external interface to the executor.
13  *      In each case, the query descriptor is required as an argument.
14  *
15  *      ExecutorStart must be called at the beginning of execution of any
16  *      query plan and ExecutorEnd must always be called at the end of
17  *      execution of a plan (unless it is aborted due to error).
18  *
19  *      ExecutorRun accepts direction and count arguments that specify whether
20  *      the plan is to be executed forwards, backwards, and for how many tuples.
21  *      In some cases ExecutorRun may be called multiple times to process all
22  *      the tuples for a plan.  It is also acceptable to stop short of executing
23  *      the whole plan (but only if it is a SELECT).
24  *
25  *      ExecutorFinish must be called after the final ExecutorRun call and
26  *      before ExecutorEnd.  This can be omitted only in case of EXPLAIN,
27  *      which should also omit ExecutorRun.
28  *
29  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
30  * Portions Copyright (c) 1994, Regents of the University of California
31  *
32  *
33  * IDENTIFICATION
34  *        src/backend/executor/execMain.c
35  *
36  *-------------------------------------------------------------------------
37  */
38 #include "postgres.h"
39
40 #include "access/reloptions.h"
41 #include "access/sysattr.h"
42 #include "access/transam.h"
43 #include "access/xact.h"
44 #include "catalog/heap.h"
45 #include "catalog/namespace.h"
46 #include "catalog/toasting.h"
47 #include "commands/tablespace.h"
48 #include "commands/trigger.h"
49 #include "executor/execdebug.h"
50 #include "executor/instrument.h"
51 #include "miscadmin.h"
52 #include "optimizer/clauses.h"
53 #include "parser/parse_clause.h"
54 #include "parser/parsetree.h"
55 #include "storage/bufmgr.h"
56 #include "storage/lmgr.h"
57 #include "storage/smgr.h"
58 #include "tcop/utility.h"
59 #include "utils/acl.h"
60 #include "utils/lsyscache.h"
61 #include "utils/memutils.h"
62 #include "utils/snapmgr.h"
63 #include "utils/tqual.h"
64
65
66 /* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */
67 ExecutorStart_hook_type ExecutorStart_hook = NULL;
68 ExecutorRun_hook_type ExecutorRun_hook = NULL;
69 ExecutorFinish_hook_type ExecutorFinish_hook = NULL;
70 ExecutorEnd_hook_type ExecutorEnd_hook = NULL;
71
72 /* Hook for plugin to get control in ExecCheckRTPerms() */
73 ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook = NULL;
74
75 /* decls for local routines only used within this module */
76 static void InitPlan(QueryDesc *queryDesc, int eflags);
77 static void ExecPostprocessPlan(EState *estate);
78 static void ExecEndPlan(PlanState *planstate, EState *estate);
79 static void ExecutePlan(EState *estate, PlanState *planstate,
80                         CmdType operation,
81                         bool sendTuples,
82                         long numberTuples,
83                         ScanDirection direction,
84                         DestReceiver *dest);
85 static bool ExecCheckRTEPerms(RangeTblEntry *rte);
86 static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
87 static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate,
88                                   Plan *planTree);
89 static void OpenIntoRel(QueryDesc *queryDesc);
90 static void CloseIntoRel(QueryDesc *queryDesc);
91 static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
92 static void intorel_receive(TupleTableSlot *slot, DestReceiver *self);
93 static void intorel_shutdown(DestReceiver *self);
94 static void intorel_destroy(DestReceiver *self);
95
96 /* end of local decls */
97
98
99 /* ----------------------------------------------------------------
100  *              ExecutorStart
101  *
102  *              This routine must be called at the beginning of any execution of any
103  *              query plan
104  *
105  * Takes a QueryDesc previously created by CreateQueryDesc (which is separate
106  * only because some places use QueryDescs for utility commands).  The tupDesc
107  * field of the QueryDesc is filled in to describe the tuples that will be
108  * returned, and the internal fields (estate and planstate) are set up.
109  *
110  * eflags contains flag bits as described in executor.h.
111  *
112  * NB: the CurrentMemoryContext when this is called will become the parent
113  * of the per-query context used for this Executor invocation.
114  *
115  * We provide a function hook variable that lets loadable plugins
116  * get control when ExecutorStart is called.  Such a plugin would
117  * normally call standard_ExecutorStart().
118  *
119  * ----------------------------------------------------------------
120  */
121 void
122 ExecutorStart(QueryDesc *queryDesc, int eflags)
123 {
124         if (ExecutorStart_hook)
125                 (*ExecutorStart_hook) (queryDesc, eflags);
126         else
127                 standard_ExecutorStart(queryDesc, eflags);
128 }
129
130 void
131 standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
132 {
133         EState     *estate;
134         MemoryContext oldcontext;
135
136         /* sanity checks: queryDesc must not be started already */
137         Assert(queryDesc != NULL);
138         Assert(queryDesc->estate == NULL);
139
140         /*
141          * If the transaction is read-only, we need to check if any writes are
142          * planned to non-temporary tables.  EXPLAIN is considered read-only.
143          */
144         if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
145                 ExecCheckXactReadOnly(queryDesc->plannedstmt);
146
147         /*
148          * Build EState, switch into per-query memory context for startup.
149          */
150         estate = CreateExecutorState();
151         queryDesc->estate = estate;
152
153         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
154
155         /*
156          * Fill in external parameters, if any, from queryDesc; and allocate
157          * workspace for internal parameters
158          */
159         estate->es_param_list_info = queryDesc->params;
160
161         if (queryDesc->plannedstmt->nParamExec > 0)
162                 estate->es_param_exec_vals = (ParamExecData *)
163                         palloc0(queryDesc->plannedstmt->nParamExec * sizeof(ParamExecData));
164
165         /*
166          * If non-read-only query, set the command ID to mark output tuples with
167          */
168         switch (queryDesc->operation)
169         {
170                 case CMD_SELECT:
171
172                         /*
173                          * SELECT INTO, SELECT FOR UPDATE/SHARE and modifying CTEs need to
174                          * mark tuples
175                          */
176                         if (queryDesc->plannedstmt->intoClause != NULL ||
177                                 queryDesc->plannedstmt->rowMarks != NIL ||
178                                 queryDesc->plannedstmt->hasModifyingCTE)
179                                 estate->es_output_cid = GetCurrentCommandId(true);
180
181                         /*
182                          * A SELECT without modifying CTEs can't possibly queue triggers,
183                          * so force skip-triggers mode. This is just a marginal efficiency
184                          * hack, since AfterTriggerBeginQuery/AfterTriggerEndQuery aren't
185                          * all that expensive, but we might as well do it.
186                          */
187                         if (!queryDesc->plannedstmt->hasModifyingCTE)
188                                 eflags |= EXEC_FLAG_SKIP_TRIGGERS;
189                         break;
190
191                 case CMD_INSERT:
192                 case CMD_DELETE:
193                 case CMD_UPDATE:
194                         estate->es_output_cid = GetCurrentCommandId(true);
195                         break;
196
197                 default:
198                         elog(ERROR, "unrecognized operation code: %d",
199                                  (int) queryDesc->operation);
200                         break;
201         }
202
203         /*
204          * Copy other important information into the EState
205          */
206         estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot);
207         estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot);
208         estate->es_top_eflags = eflags;
209         estate->es_instrument = queryDesc->instrument_options;
210
211         /*
212          * Initialize the plan state tree
213          */
214         InitPlan(queryDesc, eflags);
215
216         /*
217          * Set up an AFTER-trigger statement context, unless told not to, or
218          * unless it's EXPLAIN-only mode (when ExecutorFinish won't be called).
219          */
220         if (!(eflags & (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY)))
221                 AfterTriggerBeginQuery();
222
223         MemoryContextSwitchTo(oldcontext);
224 }
225
226 /* ----------------------------------------------------------------
227  *              ExecutorRun
228  *
229  *              This is the main routine of the executor module. It accepts
230  *              the query descriptor from the traffic cop and executes the
231  *              query plan.
232  *
233  *              ExecutorStart must have been called already.
234  *
235  *              If direction is NoMovementScanDirection then nothing is done
236  *              except to start up/shut down the destination.  Otherwise,
237  *              we retrieve up to 'count' tuples in the specified direction.
238  *
239  *              Note: count = 0 is interpreted as no portal limit, i.e., run to
240  *              completion.
241  *
242  *              There is no return value, but output tuples (if any) are sent to
243  *              the destination receiver specified in the QueryDesc; and the number
244  *              of tuples processed at the top level can be found in
245  *              estate->es_processed.
246  *
247  *              We provide a function hook variable that lets loadable plugins
248  *              get control when ExecutorRun is called.  Such a plugin would
249  *              normally call standard_ExecutorRun().
250  *
251  * ----------------------------------------------------------------
252  */
253 void
254 ExecutorRun(QueryDesc *queryDesc,
255                         ScanDirection direction, long count)
256 {
257         if (ExecutorRun_hook)
258                 (*ExecutorRun_hook) (queryDesc, direction, count);
259         else
260                 standard_ExecutorRun(queryDesc, direction, count);
261 }
262
263 void
264 standard_ExecutorRun(QueryDesc *queryDesc,
265                                          ScanDirection direction, long count)
266 {
267         EState     *estate;
268         CmdType         operation;
269         DestReceiver *dest;
270         bool            sendTuples;
271         MemoryContext oldcontext;
272
273         /* sanity checks */
274         Assert(queryDesc != NULL);
275
276         estate = queryDesc->estate;
277
278         Assert(estate != NULL);
279         Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
280
281         /*
282          * Switch into per-query memory context
283          */
284         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
285
286         /* Allow instrumentation of Executor overall runtime */
287         if (queryDesc->totaltime)
288                 InstrStartNode(queryDesc->totaltime);
289
290         /*
291          * extract information from the query descriptor and the query feature.
292          */
293         operation = queryDesc->operation;
294         dest = queryDesc->dest;
295
296         /*
297          * startup tuple receiver, if we will be emitting tuples
298          */
299         estate->es_processed = 0;
300         estate->es_lastoid = InvalidOid;
301
302         sendTuples = (operation == CMD_SELECT ||
303                                   queryDesc->plannedstmt->hasReturning);
304
305         if (sendTuples)
306                 (*dest->rStartup) (dest, operation, queryDesc->tupDesc);
307
308         /*
309          * run plan
310          */
311         if (!ScanDirectionIsNoMovement(direction))
312                 ExecutePlan(estate,
313                                         queryDesc->planstate,
314                                         operation,
315                                         sendTuples,
316                                         count,
317                                         direction,
318                                         dest);
319
320         /*
321          * shutdown tuple receiver, if we started it
322          */
323         if (sendTuples)
324                 (*dest->rShutdown) (dest);
325
326         if (queryDesc->totaltime)
327                 InstrStopNode(queryDesc->totaltime, estate->es_processed);
328
329         MemoryContextSwitchTo(oldcontext);
330 }
331
332 /* ----------------------------------------------------------------
333  *              ExecutorFinish
334  *
335  *              This routine must be called after the last ExecutorRun call.
336  *              It performs cleanup such as firing AFTER triggers.      It is
337  *              separate from ExecutorEnd because EXPLAIN ANALYZE needs to
338  *              include these actions in the total runtime.
339  *
340  *              We provide a function hook variable that lets loadable plugins
341  *              get control when ExecutorFinish is called.      Such a plugin would
342  *              normally call standard_ExecutorFinish().
343  *
344  * ----------------------------------------------------------------
345  */
346 void
347 ExecutorFinish(QueryDesc *queryDesc)
348 {
349         if (ExecutorFinish_hook)
350                 (*ExecutorFinish_hook) (queryDesc);
351         else
352                 standard_ExecutorFinish(queryDesc);
353 }
354
355 void
356 standard_ExecutorFinish(QueryDesc *queryDesc)
357 {
358         EState     *estate;
359         MemoryContext oldcontext;
360
361         /* sanity checks */
362         Assert(queryDesc != NULL);
363
364         estate = queryDesc->estate;
365
366         Assert(estate != NULL);
367         Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
368
369         /* This should be run once and only once per Executor instance */
370         Assert(!estate->es_finished);
371
372         /* Switch into per-query memory context */
373         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
374
375         /* Allow instrumentation of Executor overall runtime */
376         if (queryDesc->totaltime)
377                 InstrStartNode(queryDesc->totaltime);
378
379         /* Run ModifyTable nodes to completion */
380         ExecPostprocessPlan(estate);
381
382         /* Execute queued AFTER triggers, unless told not to */
383         if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS))
384                 AfterTriggerEndQuery(estate);
385
386         if (queryDesc->totaltime)
387                 InstrStopNode(queryDesc->totaltime, 0);
388
389         MemoryContextSwitchTo(oldcontext);
390
391         estate->es_finished = true;
392 }
393
394 /* ----------------------------------------------------------------
395  *              ExecutorEnd
396  *
397  *              This routine must be called at the end of execution of any
398  *              query plan
399  *
400  *              We provide a function hook variable that lets loadable plugins
401  *              get control when ExecutorEnd is called.  Such a plugin would
402  *              normally call standard_ExecutorEnd().
403  *
404  * ----------------------------------------------------------------
405  */
406 void
407 ExecutorEnd(QueryDesc *queryDesc)
408 {
409         if (ExecutorEnd_hook)
410                 (*ExecutorEnd_hook) (queryDesc);
411         else
412                 standard_ExecutorEnd(queryDesc);
413 }
414
415 void
416 standard_ExecutorEnd(QueryDesc *queryDesc)
417 {
418         EState     *estate;
419         MemoryContext oldcontext;
420
421         /* sanity checks */
422         Assert(queryDesc != NULL);
423
424         estate = queryDesc->estate;
425
426         Assert(estate != NULL);
427
428         /*
429          * Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This
430          * Assert is needed because ExecutorFinish is new as of 9.1, and callers
431          * might forget to call it.
432          */
433         Assert(estate->es_finished ||
434                    (estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
435
436         /*
437          * Switch into per-query memory context to run ExecEndPlan
438          */
439         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
440
441         ExecEndPlan(queryDesc->planstate, estate);
442
443         /*
444          * Close the SELECT INTO relation if any
445          */
446         if (estate->es_select_into)
447                 CloseIntoRel(queryDesc);
448
449         /* do away with our snapshots */
450         UnregisterSnapshot(estate->es_snapshot);
451         UnregisterSnapshot(estate->es_crosscheck_snapshot);
452
453         /*
454          * Must switch out of context before destroying it
455          */
456         MemoryContextSwitchTo(oldcontext);
457
458         /*
459          * Release EState and per-query memory context.  This should release
460          * everything the executor has allocated.
461          */
462         FreeExecutorState(estate);
463
464         /* Reset queryDesc fields that no longer point to anything */
465         queryDesc->tupDesc = NULL;
466         queryDesc->estate = NULL;
467         queryDesc->planstate = NULL;
468         queryDesc->totaltime = NULL;
469 }
470
471 /* ----------------------------------------------------------------
472  *              ExecutorRewind
473  *
474  *              This routine may be called on an open queryDesc to rewind it
475  *              to the start.
476  * ----------------------------------------------------------------
477  */
478 void
479 ExecutorRewind(QueryDesc *queryDesc)
480 {
481         EState     *estate;
482         MemoryContext oldcontext;
483
484         /* sanity checks */
485         Assert(queryDesc != NULL);
486
487         estate = queryDesc->estate;
488
489         Assert(estate != NULL);
490
491         /* It's probably not sensible to rescan updating queries */
492         Assert(queryDesc->operation == CMD_SELECT);
493
494         /*
495          * Switch into per-query memory context
496          */
497         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
498
499         /*
500          * rescan plan
501          */
502         ExecReScan(queryDesc->planstate);
503
504         MemoryContextSwitchTo(oldcontext);
505 }
506
507
508 /*
509  * ExecCheckRTPerms
510  *              Check access permissions for all relations listed in a range table.
511  *
512  * Returns true if permissions are adequate.  Otherwise, throws an appropriate
513  * error if ereport_on_violation is true, or simply returns false otherwise.
514  */
515 bool
516 ExecCheckRTPerms(List *rangeTable, bool ereport_on_violation)
517 {
518         ListCell   *l;
519         bool            result = true;
520
521         foreach(l, rangeTable)
522         {
523                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
524
525                 result = ExecCheckRTEPerms(rte);
526                 if (!result)
527                 {
528                         Assert(rte->rtekind == RTE_RELATION);
529                         if (ereport_on_violation)
530                                 aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
531                                                            get_rel_name(rte->relid));
532                         return false;
533                 }
534         }
535
536         if (ExecutorCheckPerms_hook)
537                 result = (*ExecutorCheckPerms_hook) (rangeTable,
538                                                                                          ereport_on_violation);
539         return result;
540 }
541
542 /*
543  * ExecCheckRTEPerms
544  *              Check access permissions for a single RTE.
545  */
546 static bool
547 ExecCheckRTEPerms(RangeTblEntry *rte)
548 {
549         AclMode         requiredPerms;
550         AclMode         relPerms;
551         AclMode         remainingPerms;
552         Oid                     relOid;
553         Oid                     userid;
554         Bitmapset  *tmpset;
555         int                     col;
556
557         /*
558          * Only plain-relation RTEs need to be checked here.  Function RTEs are
559          * checked by init_fcache when the function is prepared for execution.
560          * Join, subquery, and special RTEs need no checks.
561          */
562         if (rte->rtekind != RTE_RELATION)
563                 return true;
564
565         /*
566          * No work if requiredPerms is empty.
567          */
568         requiredPerms = rte->requiredPerms;
569         if (requiredPerms == 0)
570                 return true;
571
572         relOid = rte->relid;
573
574         /*
575          * userid to check as: current user unless we have a setuid indication.
576          *
577          * Note: GetUserId() is presently fast enough that there's no harm in
578          * calling it separately for each RTE.  If that stops being true, we could
579          * call it once in ExecCheckRTPerms and pass the userid down from there.
580          * But for now, no need for the extra clutter.
581          */
582         userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
583
584         /*
585          * We must have *all* the requiredPerms bits, but some of the bits can be
586          * satisfied from column-level rather than relation-level permissions.
587          * First, remove any bits that are satisfied by relation permissions.
588          */
589         relPerms = pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL);
590         remainingPerms = requiredPerms & ~relPerms;
591         if (remainingPerms != 0)
592         {
593                 /*
594                  * If we lack any permissions that exist only as relation permissions,
595                  * we can fail straight away.
596                  */
597                 if (remainingPerms & ~(ACL_SELECT | ACL_INSERT | ACL_UPDATE))
598                         return false;
599
600                 /*
601                  * Check to see if we have the needed privileges at column level.
602                  *
603                  * Note: failures just report a table-level error; it would be nicer
604                  * to report a column-level error if we have some but not all of the
605                  * column privileges.
606                  */
607                 if (remainingPerms & ACL_SELECT)
608                 {
609                         /*
610                          * When the query doesn't explicitly reference any columns (for
611                          * example, SELECT COUNT(*) FROM table), allow the query if we
612                          * have SELECT on any column of the rel, as per SQL spec.
613                          */
614                         if (bms_is_empty(rte->selectedCols))
615                         {
616                                 if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
617                                                                                           ACLMASK_ANY) != ACLCHECK_OK)
618                                         return false;
619                         }
620
621                         tmpset = bms_copy(rte->selectedCols);
622                         while ((col = bms_first_member(tmpset)) >= 0)
623                         {
624                                 /* remove the column number offset */
625                                 col += FirstLowInvalidHeapAttributeNumber;
626                                 if (col == InvalidAttrNumber)
627                                 {
628                                         /* Whole-row reference, must have priv on all cols */
629                                         if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
630                                                                                                   ACLMASK_ALL) != ACLCHECK_OK)
631                                                 return false;
632                                 }
633                                 else
634                                 {
635                                         if (pg_attribute_aclcheck(relOid, col, userid,
636                                                                                           ACL_SELECT) != ACLCHECK_OK)
637                                                 return false;
638                                 }
639                         }
640                         bms_free(tmpset);
641                 }
642
643                 /*
644                  * Basically the same for the mod columns, with either INSERT or
645                  * UPDATE privilege as specified by remainingPerms.
646                  */
647                 remainingPerms &= ~ACL_SELECT;
648                 if (remainingPerms != 0)
649                 {
650                         /*
651                          * When the query doesn't explicitly change any columns, allow the
652                          * query if we have permission on any column of the rel.  This is
653                          * to handle SELECT FOR UPDATE as well as possible corner cases in
654                          * INSERT and UPDATE.
655                          */
656                         if (bms_is_empty(rte->modifiedCols))
657                         {
658                                 if (pg_attribute_aclcheck_all(relOid, userid, remainingPerms,
659                                                                                           ACLMASK_ANY) != ACLCHECK_OK)
660                                         return false;
661                         }
662
663                         tmpset = bms_copy(rte->modifiedCols);
664                         while ((col = bms_first_member(tmpset)) >= 0)
665                         {
666                                 /* remove the column number offset */
667                                 col += FirstLowInvalidHeapAttributeNumber;
668                                 if (col == InvalidAttrNumber)
669                                 {
670                                         /* whole-row reference can't happen here */
671                                         elog(ERROR, "whole-row update is not implemented");
672                                 }
673                                 else
674                                 {
675                                         if (pg_attribute_aclcheck(relOid, col, userid,
676                                                                                           remainingPerms) != ACLCHECK_OK)
677                                                 return false;
678                                 }
679                         }
680                         bms_free(tmpset);
681                 }
682         }
683         return true;
684 }
685
686 /*
687  * Check that the query does not imply any writes to non-temp tables.
688  *
689  * Note: in a Hot Standby slave this would need to reject writes to temp
690  * tables as well; but an HS slave can't have created any temp tables
691  * in the first place, so no need to check that.
692  */
693 static void
694 ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
695 {
696         ListCell   *l;
697
698         /*
699          * CREATE TABLE AS or SELECT INTO?
700          *
701          * XXX should we allow this if the destination is temp?  Considering that
702          * it would still require catalog changes, probably not.
703          */
704         if (plannedstmt->intoClause != NULL)
705                 PreventCommandIfReadOnly(CreateCommandTag((Node *) plannedstmt));
706
707         /* Fail if write permissions are requested on any non-temp table */
708         foreach(l, plannedstmt->rtable)
709         {
710                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
711
712                 if (rte->rtekind != RTE_RELATION)
713                         continue;
714
715                 if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
716                         continue;
717
718                 if (isTempNamespace(get_rel_namespace(rte->relid)))
719                         continue;
720
721                 PreventCommandIfReadOnly(CreateCommandTag((Node *) plannedstmt));
722         }
723 }
724
725
726 /* ----------------------------------------------------------------
727  *              InitPlan
728  *
729  *              Initializes the query plan: open files, allocate storage
730  *              and start up the rule manager
731  * ----------------------------------------------------------------
732  */
733 static void
734 InitPlan(QueryDesc *queryDesc, int eflags)
735 {
736         CmdType         operation = queryDesc->operation;
737         PlannedStmt *plannedstmt = queryDesc->plannedstmt;
738         Plan       *plan = plannedstmt->planTree;
739         List       *rangeTable = plannedstmt->rtable;
740         EState     *estate = queryDesc->estate;
741         PlanState  *planstate;
742         TupleDesc       tupType;
743         ListCell   *l;
744         int                     i;
745
746         /*
747          * Do permissions checks
748          */
749         ExecCheckRTPerms(rangeTable, true);
750
751         /*
752          * initialize the node's execution state
753          */
754         estate->es_range_table = rangeTable;
755         estate->es_plannedstmt = plannedstmt;
756
757         /*
758          * initialize result relation stuff, and open/lock the result rels.
759          *
760          * We must do this before initializing the plan tree, else we might try to
761          * do a lock upgrade if a result rel is also a source rel.
762          */
763         if (plannedstmt->resultRelations)
764         {
765                 List       *resultRelations = plannedstmt->resultRelations;
766                 int                     numResultRelations = list_length(resultRelations);
767                 ResultRelInfo *resultRelInfos;
768                 ResultRelInfo *resultRelInfo;
769
770                 resultRelInfos = (ResultRelInfo *)
771                         palloc(numResultRelations * sizeof(ResultRelInfo));
772                 resultRelInfo = resultRelInfos;
773                 foreach(l, resultRelations)
774                 {
775                         Index           resultRelationIndex = lfirst_int(l);
776                         Oid                     resultRelationOid;
777                         Relation        resultRelation;
778
779                         resultRelationOid = getrelid(resultRelationIndex, rangeTable);
780                         resultRelation = heap_open(resultRelationOid, RowExclusiveLock);
781                         InitResultRelInfo(resultRelInfo,
782                                                           resultRelation,
783                                                           resultRelationIndex,
784                                                           estate->es_instrument);
785                         resultRelInfo++;
786                 }
787                 estate->es_result_relations = resultRelInfos;
788                 estate->es_num_result_relations = numResultRelations;
789                 /* es_result_relation_info is NULL except when within ModifyTable */
790                 estate->es_result_relation_info = NULL;
791         }
792         else
793         {
794                 /*
795                  * if no result relation, then set state appropriately
796                  */
797                 estate->es_result_relations = NULL;
798                 estate->es_num_result_relations = 0;
799                 estate->es_result_relation_info = NULL;
800         }
801
802         /*
803          * Similarly, we have to lock relations selected FOR UPDATE/FOR SHARE
804          * before we initialize the plan tree, else we'd be risking lock upgrades.
805          * While we are at it, build the ExecRowMark list.
806          */
807         estate->es_rowMarks = NIL;
808         foreach(l, plannedstmt->rowMarks)
809         {
810                 PlanRowMark *rc = (PlanRowMark *) lfirst(l);
811                 Oid                     relid;
812                 Relation        relation;
813                 ExecRowMark *erm;
814
815                 /* ignore "parent" rowmarks; they are irrelevant at runtime */
816                 if (rc->isParent)
817                         continue;
818
819                 switch (rc->markType)
820                 {
821                         case ROW_MARK_EXCLUSIVE:
822                         case ROW_MARK_SHARE:
823                                 relid = getrelid(rc->rti, rangeTable);
824                                 relation = heap_open(relid, RowShareLock);
825                                 break;
826                         case ROW_MARK_REFERENCE:
827                                 relid = getrelid(rc->rti, rangeTable);
828                                 relation = heap_open(relid, AccessShareLock);
829                                 break;
830                         case ROW_MARK_COPY:
831                                 /* there's no real table here ... */
832                                 relation = NULL;
833                                 break;
834                         default:
835                                 elog(ERROR, "unrecognized markType: %d", rc->markType);
836                                 relation = NULL;        /* keep compiler quiet */
837                                 break;
838                 }
839
840                 /* if foreign table, tuples can't be locked */
841                 if (relation && relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
842                         ereport(ERROR,
843                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
844                                          errmsg("SELECT FOR UPDATE/SHARE cannot be used with foreign table \"%s\"",
845                                                         RelationGetRelationName(relation))));
846
847                 erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
848                 erm->relation = relation;
849                 erm->rti = rc->rti;
850                 erm->prti = rc->prti;
851                 erm->rowmarkId = rc->rowmarkId;
852                 erm->markType = rc->markType;
853                 erm->noWait = rc->noWait;
854                 ItemPointerSetInvalid(&(erm->curCtid));
855                 estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
856         }
857
858         /*
859          * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
860          * flag appropriately so that the plan tree will be initialized with the
861          * correct tuple descriptors.  (Other SELECT INTO stuff comes later.)
862          */
863         estate->es_select_into = false;
864         if (operation == CMD_SELECT && plannedstmt->intoClause != NULL)
865         {
866                 estate->es_select_into = true;
867                 estate->es_into_oids = interpretOidsOption(plannedstmt->intoClause->options);
868         }
869
870         /*
871          * Initialize the executor's tuple table to empty.
872          */
873         estate->es_tupleTable = NIL;
874         estate->es_trig_tuple_slot = NULL;
875         estate->es_trig_oldtup_slot = NULL;
876
877         /* mark EvalPlanQual not active */
878         estate->es_epqTuple = NULL;
879         estate->es_epqTupleSet = NULL;
880         estate->es_epqScanDone = NULL;
881
882         /*
883          * Initialize private state information for each SubPlan.  We must do this
884          * before running ExecInitNode on the main query tree, since
885          * ExecInitSubPlan expects to be able to find these entries.
886          */
887         Assert(estate->es_subplanstates == NIL);
888         i = 1;                                          /* subplan indices count from 1 */
889         foreach(l, plannedstmt->subplans)
890         {
891                 Plan       *subplan = (Plan *) lfirst(l);
892                 PlanState  *subplanstate;
893                 int                     sp_eflags;
894
895                 /*
896                  * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
897                  * it is a parameterless subplan (not initplan), we suggest that it be
898                  * prepared to handle REWIND efficiently; otherwise there is no need.
899                  */
900                 sp_eflags = eflags & EXEC_FLAG_EXPLAIN_ONLY;
901                 if (bms_is_member(i, plannedstmt->rewindPlanIDs))
902                         sp_eflags |= EXEC_FLAG_REWIND;
903
904                 subplanstate = ExecInitNode(subplan, estate, sp_eflags);
905
906                 estate->es_subplanstates = lappend(estate->es_subplanstates,
907                                                                                    subplanstate);
908
909                 i++;
910         }
911
912         /*
913          * Initialize the private state information for all the nodes in the query
914          * tree.  This opens files, allocates storage and leaves us ready to start
915          * processing tuples.
916          */
917         planstate = ExecInitNode(plan, estate, eflags);
918
919         /*
920          * Get the tuple descriptor describing the type of tuples to return. (this
921          * is especially important if we are creating a relation with "SELECT
922          * INTO")
923          */
924         tupType = ExecGetResultType(planstate);
925
926         /*
927          * Initialize the junk filter if needed.  SELECT queries need a filter if
928          * there are any junk attrs in the top-level tlist.
929          */
930         if (operation == CMD_SELECT)
931         {
932                 bool            junk_filter_needed = false;
933                 ListCell   *tlist;
934
935                 foreach(tlist, plan->targetlist)
936                 {
937                         TargetEntry *tle = (TargetEntry *) lfirst(tlist);
938
939                         if (tle->resjunk)
940                         {
941                                 junk_filter_needed = true;
942                                 break;
943                         }
944                 }
945
946                 if (junk_filter_needed)
947                 {
948                         JunkFilter *j;
949
950                         j = ExecInitJunkFilter(planstate->plan->targetlist,
951                                                                    tupType->tdhasoid,
952                                                                    ExecInitExtraTupleSlot(estate));
953                         estate->es_junkFilter = j;
954
955                         /* Want to return the cleaned tuple type */
956                         tupType = j->jf_cleanTupType;
957                 }
958         }
959
960         queryDesc->tupDesc = tupType;
961         queryDesc->planstate = planstate;
962
963         /*
964          * If doing SELECT INTO, initialize the "into" relation.  We must wait
965          * till now so we have the "clean" result tuple type to create the new
966          * table from.
967          *
968          * If EXPLAIN, skip creating the "into" relation.
969          */
970         if (estate->es_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
971                 OpenIntoRel(queryDesc);
972 }
973
974 /*
975  * Check that a proposed result relation is a legal target for the operation
976  *
977  * In most cases parser and/or planner should have noticed this already, but
978  * let's make sure.  In the view case we do need a test here, because if the
979  * view wasn't rewritten by a rule, it had better have an INSTEAD trigger.
980  */
981 void
982 CheckValidResultRel(Relation resultRel, CmdType operation)
983 {
984         TriggerDesc *trigDesc = resultRel->trigdesc;
985
986         switch (resultRel->rd_rel->relkind)
987         {
988                 case RELKIND_RELATION:
989                         /* OK */
990                         break;
991                 case RELKIND_SEQUENCE:
992                         ereport(ERROR,
993                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
994                                          errmsg("cannot change sequence \"%s\"",
995                                                         RelationGetRelationName(resultRel))));
996                         break;
997                 case RELKIND_TOASTVALUE:
998                         ereport(ERROR,
999                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1000                                          errmsg("cannot change TOAST relation \"%s\"",
1001                                                         RelationGetRelationName(resultRel))));
1002                         break;
1003                 case RELKIND_VIEW:
1004                         switch (operation)
1005                         {
1006                                 case CMD_INSERT:
1007                                         if (!trigDesc || !trigDesc->trig_insert_instead_row)
1008                                                 ereport(ERROR,
1009                                                   (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1010                                                    errmsg("cannot insert into view \"%s\"",
1011                                                                   RelationGetRelationName(resultRel)),
1012                                                    errhint("You need an unconditional ON INSERT DO INSTEAD rule or an INSTEAD OF INSERT trigger.")));
1013                                         break;
1014                                 case CMD_UPDATE:
1015                                         if (!trigDesc || !trigDesc->trig_update_instead_row)
1016                                                 ereport(ERROR,
1017                                                   (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1018                                                    errmsg("cannot update view \"%s\"",
1019                                                                   RelationGetRelationName(resultRel)),
1020                                                    errhint("You need an unconditional ON UPDATE DO INSTEAD rule or an INSTEAD OF UPDATE trigger.")));
1021                                         break;
1022                                 case CMD_DELETE:
1023                                         if (!trigDesc || !trigDesc->trig_delete_instead_row)
1024                                                 ereport(ERROR,
1025                                                   (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1026                                                    errmsg("cannot delete from view \"%s\"",
1027                                                                   RelationGetRelationName(resultRel)),
1028                                                    errhint("You need an unconditional ON DELETE DO INSTEAD rule or an INSTEAD OF DELETE trigger.")));
1029                                         break;
1030                                 default:
1031                                         elog(ERROR, "unrecognized CmdType: %d", (int) operation);
1032                                         break;
1033                         }
1034                         break;
1035                 case RELKIND_FOREIGN_TABLE:
1036                         ereport(ERROR,
1037                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1038                                          errmsg("cannot change foreign table \"%s\"",
1039                                                         RelationGetRelationName(resultRel))));
1040                         break;
1041                 default:
1042                         ereport(ERROR,
1043                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1044                                          errmsg("cannot change relation \"%s\"",
1045                                                         RelationGetRelationName(resultRel))));
1046                         break;
1047         }
1048 }
1049
1050 /*
1051  * Initialize ResultRelInfo data for one result relation
1052  *
1053  * Caution: before Postgres 9.1, this function included the relkind checking
1054  * that's now in CheckValidResultRel, and it also did ExecOpenIndices if
1055  * appropriate.  Be sure callers cover those needs.
1056  */
1057 void
1058 InitResultRelInfo(ResultRelInfo *resultRelInfo,
1059                                   Relation resultRelationDesc,
1060                                   Index resultRelationIndex,
1061                                   int instrument_options)
1062 {
1063         MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
1064         resultRelInfo->type = T_ResultRelInfo;
1065         resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
1066         resultRelInfo->ri_RelationDesc = resultRelationDesc;
1067         resultRelInfo->ri_NumIndices = 0;
1068         resultRelInfo->ri_IndexRelationDescs = NULL;
1069         resultRelInfo->ri_IndexRelationInfo = NULL;
1070         /* make a copy so as not to depend on relcache info not changing... */
1071         resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
1072         if (resultRelInfo->ri_TrigDesc)
1073         {
1074                 int                     n = resultRelInfo->ri_TrigDesc->numtriggers;
1075
1076                 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
1077                         palloc0(n * sizeof(FmgrInfo));
1078                 resultRelInfo->ri_TrigWhenExprs = (List **)
1079                         palloc0(n * sizeof(List *));
1080                 if (instrument_options)
1081                         resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options);
1082         }
1083         else
1084         {
1085                 resultRelInfo->ri_TrigFunctions = NULL;
1086                 resultRelInfo->ri_TrigWhenExprs = NULL;
1087                 resultRelInfo->ri_TrigInstrument = NULL;
1088         }
1089         resultRelInfo->ri_ConstraintExprs = NULL;
1090         resultRelInfo->ri_junkFilter = NULL;
1091         resultRelInfo->ri_projectReturning = NULL;
1092 }
1093
1094 /*
1095  *              ExecGetTriggerResultRel
1096  *
1097  * Get a ResultRelInfo for a trigger target relation.  Most of the time,
1098  * triggers are fired on one of the result relations of the query, and so
1099  * we can just return a member of the es_result_relations array.  (Note: in
1100  * self-join situations there might be multiple members with the same OID;
1101  * if so it doesn't matter which one we pick.)  However, it is sometimes
1102  * necessary to fire triggers on other relations; this happens mainly when an
1103  * RI update trigger queues additional triggers on other relations, which will
1104  * be processed in the context of the outer query.      For efficiency's sake,
1105  * we want to have a ResultRelInfo for those triggers too; that can avoid
1106  * repeated re-opening of the relation.  (It also provides a way for EXPLAIN
1107  * ANALYZE to report the runtimes of such triggers.)  So we make additional
1108  * ResultRelInfo's as needed, and save them in es_trig_target_relations.
1109  */
1110 ResultRelInfo *
1111 ExecGetTriggerResultRel(EState *estate, Oid relid)
1112 {
1113         ResultRelInfo *rInfo;
1114         int                     nr;
1115         ListCell   *l;
1116         Relation        rel;
1117         MemoryContext oldcontext;
1118
1119         /* First, search through the query result relations */
1120         rInfo = estate->es_result_relations;
1121         nr = estate->es_num_result_relations;
1122         while (nr > 0)
1123         {
1124                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1125                         return rInfo;
1126                 rInfo++;
1127                 nr--;
1128         }
1129         /* Nope, but maybe we already made an extra ResultRelInfo for it */
1130         foreach(l, estate->es_trig_target_relations)
1131         {
1132                 rInfo = (ResultRelInfo *) lfirst(l);
1133                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1134                         return rInfo;
1135         }
1136         /* Nope, so we need a new one */
1137
1138         /*
1139          * Open the target relation's relcache entry.  We assume that an
1140          * appropriate lock is still held by the backend from whenever the trigger
1141          * event got queued, so we need take no new lock here.  Also, we need not
1142          * recheck the relkind, so no need for CheckValidResultRel.
1143          */
1144         rel = heap_open(relid, NoLock);
1145
1146         /*
1147          * Make the new entry in the right context.
1148          */
1149         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
1150         rInfo = makeNode(ResultRelInfo);
1151         InitResultRelInfo(rInfo,
1152                                           rel,
1153                                           0,            /* dummy rangetable index */
1154                                           estate->es_instrument);
1155         estate->es_trig_target_relations =
1156                 lappend(estate->es_trig_target_relations, rInfo);
1157         MemoryContextSwitchTo(oldcontext);
1158
1159         /*
1160          * Currently, we don't need any index information in ResultRelInfos used
1161          * only for triggers, so no need to call ExecOpenIndices.
1162          */
1163
1164         return rInfo;
1165 }
1166
1167 /*
1168  *              ExecContextForcesOids
1169  *
1170  * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
1171  * we need to ensure that result tuples have space for an OID iff they are
1172  * going to be stored into a relation that has OIDs.  In other contexts
1173  * we are free to choose whether to leave space for OIDs in result tuples
1174  * (we generally don't want to, but we do if a physical-tlist optimization
1175  * is possible).  This routine checks the plan context and returns TRUE if the
1176  * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
1177  * *hasoids is set to the required value.
1178  *
1179  * One reason this is ugly is that all plan nodes in the plan tree will emit
1180  * tuples with space for an OID, though we really only need the topmost node
1181  * to do so.  However, node types like Sort don't project new tuples but just
1182  * return their inputs, and in those cases the requirement propagates down
1183  * to the input node.  Eventually we might make this code smart enough to
1184  * recognize how far down the requirement really goes, but for now we just
1185  * make all plan nodes do the same thing if the top level forces the choice.
1186  *
1187  * We assume that if we are generating tuples for INSERT or UPDATE,
1188  * estate->es_result_relation_info is already set up to describe the target
1189  * relation.  Note that in an UPDATE that spans an inheritance tree, some of
1190  * the target relations may have OIDs and some not.  We have to make the
1191  * decisions on a per-relation basis as we initialize each of the subplans of
1192  * the ModifyTable node, so ModifyTable has to set es_result_relation_info
1193  * while initializing each subplan.
1194  *
1195  * SELECT INTO is even uglier, because we don't have the INTO relation's
1196  * descriptor available when this code runs; we have to look aside at a
1197  * flag set by InitPlan().
1198  */
1199 bool
1200 ExecContextForcesOids(PlanState *planstate, bool *hasoids)
1201 {
1202         ResultRelInfo *ri = planstate->state->es_result_relation_info;
1203
1204         if (ri != NULL)
1205         {
1206                 Relation        rel = ri->ri_RelationDesc;
1207
1208                 if (rel != NULL)
1209                 {
1210                         *hasoids = rel->rd_rel->relhasoids;
1211                         return true;
1212                 }
1213         }
1214
1215         if (planstate->state->es_select_into)
1216         {
1217                 *hasoids = planstate->state->es_into_oids;
1218                 return true;
1219         }
1220
1221         return false;
1222 }
1223
1224 /* ----------------------------------------------------------------
1225  *              ExecPostprocessPlan
1226  *
1227  *              Give plan nodes a final chance to execute before shutdown
1228  * ----------------------------------------------------------------
1229  */
1230 static void
1231 ExecPostprocessPlan(EState *estate)
1232 {
1233         ListCell   *lc;
1234
1235         /*
1236          * Make sure nodes run forward.
1237          */
1238         estate->es_direction = ForwardScanDirection;
1239
1240         /*
1241          * Run any secondary ModifyTable nodes to completion, in case the main
1242          * query did not fetch all rows from them.      (We do this to ensure that
1243          * such nodes have predictable results.)
1244          */
1245         foreach(lc, estate->es_auxmodifytables)
1246         {
1247                 PlanState  *ps = (PlanState *) lfirst(lc);
1248
1249                 for (;;)
1250                 {
1251                         TupleTableSlot *slot;
1252
1253                         /* Reset the per-output-tuple exprcontext each time */
1254                         ResetPerTupleExprContext(estate);
1255
1256                         slot = ExecProcNode(ps);
1257
1258                         if (TupIsNull(slot))
1259                                 break;
1260                 }
1261         }
1262 }
1263
1264 /* ----------------------------------------------------------------
1265  *              ExecEndPlan
1266  *
1267  *              Cleans up the query plan -- closes files and frees up storage
1268  *
1269  * NOTE: we are no longer very worried about freeing storage per se
1270  * in this code; FreeExecutorState should be guaranteed to release all
1271  * memory that needs to be released.  What we are worried about doing
1272  * is closing relations and dropping buffer pins.  Thus, for example,
1273  * tuple tables must be cleared or dropped to ensure pins are released.
1274  * ----------------------------------------------------------------
1275  */
1276 static void
1277 ExecEndPlan(PlanState *planstate, EState *estate)
1278 {
1279         ResultRelInfo *resultRelInfo;
1280         int                     i;
1281         ListCell   *l;
1282
1283         /*
1284          * shut down the node-type-specific query processing
1285          */
1286         ExecEndNode(planstate);
1287
1288         /*
1289          * for subplans too
1290          */
1291         foreach(l, estate->es_subplanstates)
1292         {
1293                 PlanState  *subplanstate = (PlanState *) lfirst(l);
1294
1295                 ExecEndNode(subplanstate);
1296         }
1297
1298         /*
1299          * destroy the executor's tuple table.  Actually we only care about
1300          * releasing buffer pins and tupdesc refcounts; there's no need to pfree
1301          * the TupleTableSlots, since the containing memory context is about to go
1302          * away anyway.
1303          */
1304         ExecResetTupleTable(estate->es_tupleTable, false);
1305
1306         /*
1307          * close the result relation(s) if any, but hold locks until xact commit.
1308          */
1309         resultRelInfo = estate->es_result_relations;
1310         for (i = estate->es_num_result_relations; i > 0; i--)
1311         {
1312                 /* Close indices and then the relation itself */
1313                 ExecCloseIndices(resultRelInfo);
1314                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1315                 resultRelInfo++;
1316         }
1317
1318         /*
1319          * likewise close any trigger target relations
1320          */
1321         foreach(l, estate->es_trig_target_relations)
1322         {
1323                 resultRelInfo = (ResultRelInfo *) lfirst(l);
1324                 /* Close indices and then the relation itself */
1325                 ExecCloseIndices(resultRelInfo);
1326                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1327         }
1328
1329         /*
1330          * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1331          */
1332         foreach(l, estate->es_rowMarks)
1333         {
1334                 ExecRowMark *erm = (ExecRowMark *) lfirst(l);
1335
1336                 if (erm->relation)
1337                         heap_close(erm->relation, NoLock);
1338         }
1339 }
1340
1341 /* ----------------------------------------------------------------
1342  *              ExecutePlan
1343  *
1344  *              Processes the query plan until we have processed 'numberTuples' tuples,
1345  *              moving in the specified direction.
1346  *
1347  *              Runs to completion if numberTuples is 0
1348  *
1349  * Note: the ctid attribute is a 'junk' attribute that is removed before the
1350  * user can see it
1351  * ----------------------------------------------------------------
1352  */
1353 static void
1354 ExecutePlan(EState *estate,
1355                         PlanState *planstate,
1356                         CmdType operation,
1357                         bool sendTuples,
1358                         long numberTuples,
1359                         ScanDirection direction,
1360                         DestReceiver *dest)
1361 {
1362         TupleTableSlot *slot;
1363         long            current_tuple_count;
1364
1365         /*
1366          * initialize local variables
1367          */
1368         current_tuple_count = 0;
1369
1370         /*
1371          * Set the direction.
1372          */
1373         estate->es_direction = direction;
1374
1375         /*
1376          * Loop until we've processed the proper number of tuples from the plan.
1377          */
1378         for (;;)
1379         {
1380                 /* Reset the per-output-tuple exprcontext */
1381                 ResetPerTupleExprContext(estate);
1382
1383                 /*
1384                  * Execute the plan and obtain a tuple
1385                  */
1386                 slot = ExecProcNode(planstate);
1387
1388                 /*
1389                  * if the tuple is null, then we assume there is nothing more to
1390                  * process so we just end the loop...
1391                  */
1392                 if (TupIsNull(slot))
1393                         break;
1394
1395                 /*
1396                  * If we have a junk filter, then project a new tuple with the junk
1397                  * removed.
1398                  *
1399                  * Store this new "clean" tuple in the junkfilter's resultSlot.
1400                  * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
1401                  * because that tuple slot has the wrong descriptor.)
1402                  */
1403                 if (estate->es_junkFilter != NULL)
1404                         slot = ExecFilterJunk(estate->es_junkFilter, slot);
1405
1406                 /*
1407                  * If we are supposed to send the tuple somewhere, do so. (In
1408                  * practice, this is probably always the case at this point.)
1409                  */
1410                 if (sendTuples)
1411                         (*dest->receiveSlot) (slot, dest);
1412
1413                 /*
1414                  * Count tuples processed, if this is a SELECT.  (For other operation
1415                  * types, the ModifyTable plan node must count the appropriate
1416                  * events.)
1417                  */
1418                 if (operation == CMD_SELECT)
1419                         (estate->es_processed)++;
1420
1421                 /*
1422                  * check our tuple count.. if we've processed the proper number then
1423                  * quit, else loop again and process more tuples.  Zero numberTuples
1424                  * means no limit.
1425                  */
1426                 current_tuple_count++;
1427                 if (numberTuples && numberTuples == current_tuple_count)
1428                         break;
1429         }
1430 }
1431
1432
1433 /*
1434  * ExecRelCheck --- check that tuple meets constraints for result relation
1435  */
1436 static const char *
1437 ExecRelCheck(ResultRelInfo *resultRelInfo,
1438                          TupleTableSlot *slot, EState *estate)
1439 {
1440         Relation        rel = resultRelInfo->ri_RelationDesc;
1441         int                     ncheck = rel->rd_att->constr->num_check;
1442         ConstrCheck *check = rel->rd_att->constr->check;
1443         ExprContext *econtext;
1444         MemoryContext oldContext;
1445         List       *qual;
1446         int                     i;
1447
1448         /*
1449          * If first time through for this result relation, build expression
1450          * nodetrees for rel's constraint expressions.  Keep them in the per-query
1451          * memory context so they'll survive throughout the query.
1452          */
1453         if (resultRelInfo->ri_ConstraintExprs == NULL)
1454         {
1455                 oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
1456                 resultRelInfo->ri_ConstraintExprs =
1457                         (List **) palloc(ncheck * sizeof(List *));
1458                 for (i = 0; i < ncheck; i++)
1459                 {
1460                         /* ExecQual wants implicit-AND form */
1461                         qual = make_ands_implicit(stringToNode(check[i].ccbin));
1462                         resultRelInfo->ri_ConstraintExprs[i] = (List *)
1463                                 ExecPrepareExpr((Expr *) qual, estate);
1464                 }
1465                 MemoryContextSwitchTo(oldContext);
1466         }
1467
1468         /*
1469          * We will use the EState's per-tuple context for evaluating constraint
1470          * expressions (creating it if it's not already there).
1471          */
1472         econtext = GetPerTupleExprContext(estate);
1473
1474         /* Arrange for econtext's scan tuple to be the tuple under test */
1475         econtext->ecxt_scantuple = slot;
1476
1477         /* And evaluate the constraints */
1478         for (i = 0; i < ncheck; i++)
1479         {
1480                 qual = resultRelInfo->ri_ConstraintExprs[i];
1481
1482                 /*
1483                  * NOTE: SQL92 specifies that a NULL result from a constraint
1484                  * expression is not to be treated as a failure.  Therefore, tell
1485                  * ExecQual to return TRUE for NULL.
1486                  */
1487                 if (!ExecQual(qual, econtext, true))
1488                         return check[i].ccname;
1489         }
1490
1491         /* NULL result means no error */
1492         return NULL;
1493 }
1494
1495 void
1496 ExecConstraints(ResultRelInfo *resultRelInfo,
1497                                 TupleTableSlot *slot, EState *estate)
1498 {
1499         Relation        rel = resultRelInfo->ri_RelationDesc;
1500         TupleConstr *constr = rel->rd_att->constr;
1501
1502         Assert(constr);
1503
1504         if (constr->has_not_null)
1505         {
1506                 int                     natts = rel->rd_att->natts;
1507                 int                     attrChk;
1508
1509                 for (attrChk = 1; attrChk <= natts; attrChk++)
1510                 {
1511                         if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1512                                 slot_attisnull(slot, attrChk))
1513                                 ereport(ERROR,
1514                                                 (errcode(ERRCODE_NOT_NULL_VIOLATION),
1515                                                  errmsg("null value in column \"%s\" violates not-null constraint",
1516                                                 NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1517                 }
1518         }
1519
1520         if (constr->num_check > 0)
1521         {
1522                 const char *failed;
1523
1524                 if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1525                         ereport(ERROR,
1526                                         (errcode(ERRCODE_CHECK_VIOLATION),
1527                                          errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1528                                                         RelationGetRelationName(rel), failed)));
1529         }
1530 }
1531
1532
1533 /*
1534  * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
1535  */
1536 ExecRowMark *
1537 ExecFindRowMark(EState *estate, Index rti)
1538 {
1539         ListCell   *lc;
1540
1541         foreach(lc, estate->es_rowMarks)
1542         {
1543                 ExecRowMark *erm = (ExecRowMark *) lfirst(lc);
1544
1545                 if (erm->rti == rti)
1546                         return erm;
1547         }
1548         elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
1549         return NULL;                            /* keep compiler quiet */
1550 }
1551
1552 /*
1553  * ExecBuildAuxRowMark -- create an ExecAuxRowMark struct
1554  *
1555  * Inputs are the underlying ExecRowMark struct and the targetlist of the
1556  * input plan node (not planstate node!).  We need the latter to find out
1557  * the column numbers of the resjunk columns.
1558  */
1559 ExecAuxRowMark *
1560 ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
1561 {
1562         ExecAuxRowMark *aerm = (ExecAuxRowMark *) palloc0(sizeof(ExecAuxRowMark));
1563         char            resname[32];
1564
1565         aerm->rowmark = erm;
1566
1567         /* Look up the resjunk columns associated with this rowmark */
1568         if (erm->relation)
1569         {
1570                 Assert(erm->markType != ROW_MARK_COPY);
1571
1572                 /* if child rel, need tableoid */
1573                 if (erm->rti != erm->prti)
1574                 {
1575                         snprintf(resname, sizeof(resname), "tableoid%u", erm->rowmarkId);
1576                         aerm->toidAttNo = ExecFindJunkAttributeInTlist(targetlist,
1577                                                                                                                    resname);
1578                         if (!AttributeNumberIsValid(aerm->toidAttNo))
1579                                 elog(ERROR, "could not find junk %s column", resname);
1580                 }
1581
1582                 /* always need ctid for real relations */
1583                 snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId);
1584                 aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist,
1585                                                                                                            resname);
1586                 if (!AttributeNumberIsValid(aerm->ctidAttNo))
1587                         elog(ERROR, "could not find junk %s column", resname);
1588         }
1589         else
1590         {
1591                 Assert(erm->markType == ROW_MARK_COPY);
1592
1593                 snprintf(resname, sizeof(resname), "wholerow%u", erm->rowmarkId);
1594                 aerm->wholeAttNo = ExecFindJunkAttributeInTlist(targetlist,
1595                                                                                                                 resname);
1596                 if (!AttributeNumberIsValid(aerm->wholeAttNo))
1597                         elog(ERROR, "could not find junk %s column", resname);
1598         }
1599
1600         return aerm;
1601 }
1602
1603
1604 /*
1605  * EvalPlanQual logic --- recheck modified tuple(s) to see if we want to
1606  * process the updated version under READ COMMITTED rules.
1607  *
1608  * See backend/executor/README for some info about how this works.
1609  */
1610
1611
1612 /*
1613  * Check a modified tuple to see if we want to process its updated version
1614  * under READ COMMITTED rules.
1615  *
1616  *      estate - outer executor state data
1617  *      epqstate - state for EvalPlanQual rechecking
1618  *      relation - table containing tuple
1619  *      rti - rangetable index of table containing tuple
1620  *      *tid - t_ctid from the outdated tuple (ie, next updated version)
1621  *      priorXmax - t_xmax from the outdated tuple
1622  *
1623  * *tid is also an output parameter: it's modified to hold the TID of the
1624  * latest version of the tuple (note this may be changed even on failure)
1625  *
1626  * Returns a slot containing the new candidate update/delete tuple, or
1627  * NULL if we determine we shouldn't process the row.
1628  */
1629 TupleTableSlot *
1630 EvalPlanQual(EState *estate, EPQState *epqstate,
1631                          Relation relation, Index rti,
1632                          ItemPointer tid, TransactionId priorXmax)
1633 {
1634         TupleTableSlot *slot;
1635         HeapTuple       copyTuple;
1636
1637         Assert(rti > 0);
1638
1639         /*
1640          * Get and lock the updated version of the row; if fail, return NULL.
1641          */
1642         copyTuple = EvalPlanQualFetch(estate, relation, LockTupleExclusive,
1643                                                                   tid, priorXmax);
1644
1645         if (copyTuple == NULL)
1646                 return NULL;
1647
1648         /*
1649          * For UPDATE/DELETE we have to return tid of actual row we're executing
1650          * PQ for.
1651          */
1652         *tid = copyTuple->t_self;
1653
1654         /*
1655          * Need to run a recheck subquery.      Initialize or reinitialize EPQ state.
1656          */
1657         EvalPlanQualBegin(epqstate, estate);
1658
1659         /*
1660          * Free old test tuple, if any, and store new tuple where relation's scan
1661          * node will see it
1662          */
1663         EvalPlanQualSetTuple(epqstate, rti, copyTuple);
1664
1665         /*
1666          * Fetch any non-locked source rows
1667          */
1668         EvalPlanQualFetchRowMarks(epqstate);
1669
1670         /*
1671          * Run the EPQ query.  We assume it will return at most one tuple.
1672          */
1673         slot = EvalPlanQualNext(epqstate);
1674
1675         /*
1676          * If we got a tuple, force the slot to materialize the tuple so that it
1677          * is not dependent on any local state in the EPQ query (in particular,
1678          * it's highly likely that the slot contains references to any pass-by-ref
1679          * datums that may be present in copyTuple).  As with the next step, this
1680          * is to guard against early re-use of the EPQ query.
1681          */
1682         if (!TupIsNull(slot))
1683                 (void) ExecMaterializeSlot(slot);
1684
1685         /*
1686          * Clear out the test tuple.  This is needed in case the EPQ query is
1687          * re-used to test a tuple for a different relation.  (Not clear that can
1688          * really happen, but let's be safe.)
1689          */
1690         EvalPlanQualSetTuple(epqstate, rti, NULL);
1691
1692         return slot;
1693 }
1694
1695 /*
1696  * Fetch a copy of the newest version of an outdated tuple
1697  *
1698  *      estate - executor state data
1699  *      relation - table containing tuple
1700  *      lockmode - requested tuple lock mode
1701  *      *tid - t_ctid from the outdated tuple (ie, next updated version)
1702  *      priorXmax - t_xmax from the outdated tuple
1703  *
1704  * Returns a palloc'd copy of the newest tuple version, or NULL if we find
1705  * that there is no newest version (ie, the row was deleted not updated).
1706  * If successful, we have locked the newest tuple version, so caller does not
1707  * need to worry about it changing anymore.
1708  *
1709  * Note: properly, lockmode should be declared as enum LockTupleMode,
1710  * but we use "int" to avoid having to include heapam.h in executor.h.
1711  */
1712 HeapTuple
1713 EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
1714                                   ItemPointer tid, TransactionId priorXmax)
1715 {
1716         HeapTuple       copyTuple = NULL;
1717         HeapTupleData tuple;
1718         SnapshotData SnapshotDirty;
1719
1720         /*
1721          * fetch target tuple
1722          *
1723          * Loop here to deal with updated or busy tuples
1724          */
1725         InitDirtySnapshot(SnapshotDirty);
1726         tuple.t_self = *tid;
1727         for (;;)
1728         {
1729                 Buffer          buffer;
1730
1731                 if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
1732                 {
1733                         HTSU_Result test;
1734                         ItemPointerData update_ctid;
1735                         TransactionId update_xmax;
1736
1737                         /*
1738                          * If xmin isn't what we're expecting, the slot must have been
1739                          * recycled and reused for an unrelated tuple.  This implies that
1740                          * the latest version of the row was deleted, so we need do
1741                          * nothing.  (Should be safe to examine xmin without getting
1742                          * buffer's content lock, since xmin never changes in an existing
1743                          * tuple.)
1744                          */
1745                         if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
1746                                                                          priorXmax))
1747                         {
1748                                 ReleaseBuffer(buffer);
1749                                 return NULL;
1750                         }
1751
1752                         /* otherwise xmin should not be dirty... */
1753                         if (TransactionIdIsValid(SnapshotDirty.xmin))
1754                                 elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
1755
1756                         /*
1757                          * If tuple is being updated by other transaction then we have to
1758                          * wait for its commit/abort.
1759                          */
1760                         if (TransactionIdIsValid(SnapshotDirty.xmax))
1761                         {
1762                                 ReleaseBuffer(buffer);
1763                                 XactLockTableWait(SnapshotDirty.xmax);
1764                                 continue;               /* loop back to repeat heap_fetch */
1765                         }
1766
1767                         /*
1768                          * If tuple was inserted by our own transaction, we have to check
1769                          * cmin against es_output_cid: cmin >= current CID means our
1770                          * command cannot see the tuple, so we should ignore it.  Without
1771                          * this we are open to the "Halloween problem" of indefinitely
1772                          * re-updating the same tuple. (We need not check cmax because
1773                          * HeapTupleSatisfiesDirty will consider a tuple deleted by our
1774                          * transaction dead, regardless of cmax.)  We just checked that
1775                          * priorXmax == xmin, so we can test that variable instead of
1776                          * doing HeapTupleHeaderGetXmin again.
1777                          */
1778                         if (TransactionIdIsCurrentTransactionId(priorXmax) &&
1779                                 HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
1780                         {
1781                                 ReleaseBuffer(buffer);
1782                                 return NULL;
1783                         }
1784
1785                         /*
1786                          * This is a live tuple, so now try to lock it.
1787                          */
1788                         test = heap_lock_tuple(relation, &tuple, &buffer,
1789                                                                    &update_ctid, &update_xmax,
1790                                                                    estate->es_output_cid,
1791                                                                    lockmode, false);
1792                         /* We now have two pins on the buffer, get rid of one */
1793                         ReleaseBuffer(buffer);
1794
1795                         switch (test)
1796                         {
1797                                 case HeapTupleSelfUpdated:
1798                                         /* treat it as deleted; do not process */
1799                                         ReleaseBuffer(buffer);
1800                                         return NULL;
1801
1802                                 case HeapTupleMayBeUpdated:
1803                                         /* successfully locked */
1804                                         break;
1805
1806                                 case HeapTupleUpdated:
1807                                         ReleaseBuffer(buffer);
1808                                         if (IsolationUsesXactSnapshot())
1809                                                 ereport(ERROR,
1810                                                                 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1811                                                                  errmsg("could not serialize access due to concurrent update")));
1812                                         if (!ItemPointerEquals(&update_ctid, &tuple.t_self))
1813                                         {
1814                                                 /* it was updated, so look at the updated version */
1815                                                 tuple.t_self = update_ctid;
1816                                                 /* updated row should have xmin matching this xmax */
1817                                                 priorXmax = update_xmax;
1818                                                 continue;
1819                                         }
1820                                         /* tuple was deleted, so give up */
1821                                         return NULL;
1822
1823                                 default:
1824                                         ReleaseBuffer(buffer);
1825                                         elog(ERROR, "unrecognized heap_lock_tuple status: %u",
1826                                                  test);
1827                                         return NULL;    /* keep compiler quiet */
1828                         }
1829
1830                         /*
1831                          * We got tuple - now copy it for use by recheck query.
1832                          */
1833                         copyTuple = heap_copytuple(&tuple);
1834                         ReleaseBuffer(buffer);
1835                         break;
1836                 }
1837
1838                 /*
1839                  * If the referenced slot was actually empty, the latest version of
1840                  * the row must have been deleted, so we need do nothing.
1841                  */
1842                 if (tuple.t_data == NULL)
1843                 {
1844                         ReleaseBuffer(buffer);
1845                         return NULL;
1846                 }
1847
1848                 /*
1849                  * As above, if xmin isn't what we're expecting, do nothing.
1850                  */
1851                 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
1852                                                                  priorXmax))
1853                 {
1854                         ReleaseBuffer(buffer);
1855                         return NULL;
1856                 }
1857
1858                 /*
1859                  * If we get here, the tuple was found but failed SnapshotDirty.
1860                  * Assuming the xmin is either a committed xact or our own xact (as it
1861                  * certainly should be if we're trying to modify the tuple), this must
1862                  * mean that the row was updated or deleted by either a committed xact
1863                  * or our own xact.  If it was deleted, we can ignore it; if it was
1864                  * updated then chain up to the next version and repeat the whole
1865                  * process.
1866                  *
1867                  * As above, it should be safe to examine xmax and t_ctid without the
1868                  * buffer content lock, because they can't be changing.
1869                  */
1870                 if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
1871                 {
1872                         /* deleted, so forget about it */
1873                         ReleaseBuffer(buffer);
1874                         return NULL;
1875                 }
1876
1877                 /* updated, so look at the updated row */
1878                 tuple.t_self = tuple.t_data->t_ctid;
1879                 /* updated row should have xmin matching this xmax */
1880                 priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
1881                 ReleaseBuffer(buffer);
1882                 /* loop back to fetch next in chain */
1883         }
1884
1885         /*
1886          * Return the copied tuple
1887          */
1888         return copyTuple;
1889 }
1890
1891 /*
1892  * EvalPlanQualInit -- initialize during creation of a plan state node
1893  * that might need to invoke EPQ processing.
1894  *
1895  * Note: subplan/auxrowmarks can be NULL/NIL if they will be set later
1896  * with EvalPlanQualSetPlan.
1897  */
1898 void
1899 EvalPlanQualInit(EPQState *epqstate, EState *estate,
1900                                  Plan *subplan, List *auxrowmarks, int epqParam)
1901 {
1902         /* Mark the EPQ state inactive */
1903         epqstate->estate = NULL;
1904         epqstate->planstate = NULL;
1905         epqstate->origslot = NULL;
1906         /* ... and remember data that EvalPlanQualBegin will need */
1907         epqstate->plan = subplan;
1908         epqstate->arowMarks = auxrowmarks;
1909         epqstate->epqParam = epqParam;
1910 }
1911
1912 /*
1913  * EvalPlanQualSetPlan -- set or change subplan of an EPQState.
1914  *
1915  * We need this so that ModifyTuple can deal with multiple subplans.
1916  */
1917 void
1918 EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan, List *auxrowmarks)
1919 {
1920         /* If we have a live EPQ query, shut it down */
1921         EvalPlanQualEnd(epqstate);
1922         /* And set/change the plan pointer */
1923         epqstate->plan = subplan;
1924         /* The rowmarks depend on the plan, too */
1925         epqstate->arowMarks = auxrowmarks;
1926 }
1927
1928 /*
1929  * Install one test tuple into EPQ state, or clear test tuple if tuple == NULL
1930  *
1931  * NB: passed tuple must be palloc'd; it may get freed later
1932  */
1933 void
1934 EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple)
1935 {
1936         EState     *estate = epqstate->estate;
1937
1938         Assert(rti > 0);
1939
1940         /*
1941          * free old test tuple, if any, and store new tuple where relation's scan
1942          * node will see it
1943          */
1944         if (estate->es_epqTuple[rti - 1] != NULL)
1945                 heap_freetuple(estate->es_epqTuple[rti - 1]);
1946         estate->es_epqTuple[rti - 1] = tuple;
1947         estate->es_epqTupleSet[rti - 1] = true;
1948 }
1949
1950 /*
1951  * Fetch back the current test tuple (if any) for the specified RTI
1952  */
1953 HeapTuple
1954 EvalPlanQualGetTuple(EPQState *epqstate, Index rti)
1955 {
1956         EState     *estate = epqstate->estate;
1957
1958         Assert(rti > 0);
1959
1960         return estate->es_epqTuple[rti - 1];
1961 }
1962
1963 /*
1964  * Fetch the current row values for any non-locked relations that need
1965  * to be scanned by an EvalPlanQual operation.  origslot must have been set
1966  * to contain the current result row (top-level row) that we need to recheck.
1967  */
1968 void
1969 EvalPlanQualFetchRowMarks(EPQState *epqstate)
1970 {
1971         ListCell   *l;
1972
1973         Assert(epqstate->origslot != NULL);
1974
1975         foreach(l, epqstate->arowMarks)
1976         {
1977                 ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(l);
1978                 ExecRowMark *erm = aerm->rowmark;
1979                 Datum           datum;
1980                 bool            isNull;
1981                 HeapTupleData tuple;
1982
1983                 if (RowMarkRequiresRowShareLock(erm->markType))
1984                         elog(ERROR, "EvalPlanQual doesn't support locking rowmarks");
1985
1986                 /* clear any leftover test tuple for this rel */
1987                 EvalPlanQualSetTuple(epqstate, erm->rti, NULL);
1988
1989                 if (erm->relation)
1990                 {
1991                         Buffer          buffer;
1992
1993                         Assert(erm->markType == ROW_MARK_REFERENCE);
1994
1995                         /* if child rel, must check whether it produced this row */
1996                         if (erm->rti != erm->prti)
1997                         {
1998                                 Oid                     tableoid;
1999
2000                                 datum = ExecGetJunkAttribute(epqstate->origslot,
2001                                                                                          aerm->toidAttNo,
2002                                                                                          &isNull);
2003                                 /* non-locked rels could be on the inside of outer joins */
2004                                 if (isNull)
2005                                         continue;
2006                                 tableoid = DatumGetObjectId(datum);
2007
2008                                 if (tableoid != RelationGetRelid(erm->relation))
2009                                 {
2010                                         /* this child is inactive right now */
2011                                         continue;
2012                                 }
2013                         }
2014
2015                         /* fetch the tuple's ctid */
2016                         datum = ExecGetJunkAttribute(epqstate->origslot,
2017                                                                                  aerm->ctidAttNo,
2018                                                                                  &isNull);
2019                         /* non-locked rels could be on the inside of outer joins */
2020                         if (isNull)
2021                                 continue;
2022                         tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
2023
2024                         /* okay, fetch the tuple */
2025                         if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
2026                                                         false, NULL))
2027                                 elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
2028
2029                         /* successful, copy and store tuple */
2030                         EvalPlanQualSetTuple(epqstate, erm->rti,
2031                                                                  heap_copytuple(&tuple));
2032                         ReleaseBuffer(buffer);
2033                 }
2034                 else
2035                 {
2036                         HeapTupleHeader td;
2037
2038                         Assert(erm->markType == ROW_MARK_COPY);
2039
2040                         /* fetch the whole-row Var for the relation */
2041                         datum = ExecGetJunkAttribute(epqstate->origslot,
2042                                                                                  aerm->wholeAttNo,
2043                                                                                  &isNull);
2044                         /* non-locked rels could be on the inside of outer joins */
2045                         if (isNull)
2046                                 continue;
2047                         td = DatumGetHeapTupleHeader(datum);
2048
2049                         /* build a temporary HeapTuple control structure */
2050                         tuple.t_len = HeapTupleHeaderGetDatumLength(td);
2051                         ItemPointerSetInvalid(&(tuple.t_self));
2052                         tuple.t_tableOid = InvalidOid;
2053                         tuple.t_data = td;
2054
2055                         /* copy and store tuple */
2056                         EvalPlanQualSetTuple(epqstate, erm->rti,
2057                                                                  heap_copytuple(&tuple));
2058                 }
2059         }
2060 }
2061
2062 /*
2063  * Fetch the next row (if any) from EvalPlanQual testing
2064  *
2065  * (In practice, there should never be more than one row...)
2066  */
2067 TupleTableSlot *
2068 EvalPlanQualNext(EPQState *epqstate)
2069 {
2070         MemoryContext oldcontext;
2071         TupleTableSlot *slot;
2072
2073         oldcontext = MemoryContextSwitchTo(epqstate->estate->es_query_cxt);
2074         slot = ExecProcNode(epqstate->planstate);
2075         MemoryContextSwitchTo(oldcontext);
2076
2077         return slot;
2078 }
2079
2080 /*
2081  * Initialize or reset an EvalPlanQual state tree
2082  */
2083 void
2084 EvalPlanQualBegin(EPQState *epqstate, EState *parentestate)
2085 {
2086         EState     *estate = epqstate->estate;
2087
2088         if (estate == NULL)
2089         {
2090                 /* First time through, so create a child EState */
2091                 EvalPlanQualStart(epqstate, parentestate, epqstate->plan);
2092         }
2093         else
2094         {
2095                 /*
2096                  * We already have a suitable child EPQ tree, so just reset it.
2097                  */
2098                 int                     rtsize = list_length(parentestate->es_range_table);
2099                 PlanState  *planstate = epqstate->planstate;
2100
2101                 MemSet(estate->es_epqScanDone, 0, rtsize * sizeof(bool));
2102
2103                 /* Recopy current values of parent parameters */
2104                 if (parentestate->es_plannedstmt->nParamExec > 0)
2105                 {
2106                         int                     i = parentestate->es_plannedstmt->nParamExec;
2107
2108                         while (--i >= 0)
2109                         {
2110                                 /* copy value if any, but not execPlan link */
2111                                 estate->es_param_exec_vals[i].value =
2112                                         parentestate->es_param_exec_vals[i].value;
2113                                 estate->es_param_exec_vals[i].isnull =
2114                                         parentestate->es_param_exec_vals[i].isnull;
2115                         }
2116                 }
2117
2118                 /*
2119                  * Mark child plan tree as needing rescan at all scan nodes.  The
2120                  * first ExecProcNode will take care of actually doing the rescan.
2121                  */
2122                 planstate->chgParam = bms_add_member(planstate->chgParam,
2123                                                                                          epqstate->epqParam);
2124         }
2125 }
2126
2127 /*
2128  * Start execution of an EvalPlanQual plan tree.
2129  *
2130  * This is a cut-down version of ExecutorStart(): we copy some state from
2131  * the top-level estate rather than initializing it fresh.
2132  */
2133 static void
2134 EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
2135 {
2136         EState     *estate;
2137         int                     rtsize;
2138         MemoryContext oldcontext;
2139         ListCell   *l;
2140
2141         rtsize = list_length(parentestate->es_range_table);
2142
2143         epqstate->estate = estate = CreateExecutorState();
2144
2145         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
2146
2147         /*
2148          * Child EPQ EStates share the parent's copy of unchanging state such as
2149          * the snapshot, rangetable, result-rel info, and external Param info.
2150          * They need their own copies of local state, including a tuple table,
2151          * es_param_exec_vals, etc.
2152          */
2153         estate->es_direction = ForwardScanDirection;
2154         estate->es_snapshot = parentestate->es_snapshot;
2155         estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot;
2156         estate->es_range_table = parentestate->es_range_table;
2157         estate->es_plannedstmt = parentestate->es_plannedstmt;
2158         estate->es_junkFilter = parentestate->es_junkFilter;
2159         estate->es_output_cid = parentestate->es_output_cid;
2160         estate->es_result_relations = parentestate->es_result_relations;
2161         estate->es_num_result_relations = parentestate->es_num_result_relations;
2162         estate->es_result_relation_info = parentestate->es_result_relation_info;
2163         /* es_trig_target_relations must NOT be copied */
2164         estate->es_rowMarks = parentestate->es_rowMarks;
2165         estate->es_top_eflags = parentestate->es_top_eflags;
2166         estate->es_instrument = parentestate->es_instrument;
2167         estate->es_select_into = parentestate->es_select_into;
2168         estate->es_into_oids = parentestate->es_into_oids;
2169         /* es_auxmodifytables must NOT be copied */
2170
2171         /*
2172          * The external param list is simply shared from parent.  The internal
2173          * param workspace has to be local state, but we copy the initial values
2174          * from the parent, so as to have access to any param values that were
2175          * already set from other parts of the parent's plan tree.
2176          */
2177         estate->es_param_list_info = parentestate->es_param_list_info;
2178         if (parentestate->es_plannedstmt->nParamExec > 0)
2179         {
2180                 int                     i = parentestate->es_plannedstmt->nParamExec;
2181
2182                 estate->es_param_exec_vals = (ParamExecData *)
2183                         palloc0(i * sizeof(ParamExecData));
2184                 while (--i >= 0)
2185                 {
2186                         /* copy value if any, but not execPlan link */
2187                         estate->es_param_exec_vals[i].value =
2188                                 parentestate->es_param_exec_vals[i].value;
2189                         estate->es_param_exec_vals[i].isnull =
2190                                 parentestate->es_param_exec_vals[i].isnull;
2191                 }
2192         }
2193
2194         /*
2195          * Each EState must have its own es_epqScanDone state, but if we have
2196          * nested EPQ checks they should share es_epqTuple arrays.      This allows
2197          * sub-rechecks to inherit the values being examined by an outer recheck.
2198          */
2199         estate->es_epqScanDone = (bool *) palloc0(rtsize * sizeof(bool));
2200         if (parentestate->es_epqTuple != NULL)
2201         {
2202                 estate->es_epqTuple = parentestate->es_epqTuple;
2203                 estate->es_epqTupleSet = parentestate->es_epqTupleSet;
2204         }
2205         else
2206         {
2207                 estate->es_epqTuple = (HeapTuple *)
2208                         palloc0(rtsize * sizeof(HeapTuple));
2209                 estate->es_epqTupleSet = (bool *)
2210                         palloc0(rtsize * sizeof(bool));
2211         }
2212
2213         /*
2214          * Each estate also has its own tuple table.
2215          */
2216         estate->es_tupleTable = NIL;
2217
2218         /*
2219          * Initialize private state information for each SubPlan.  We must do this
2220          * before running ExecInitNode on the main query tree, since
2221          * ExecInitSubPlan expects to be able to find these entries. Some of the
2222          * SubPlans might not be used in the part of the plan tree we intend to
2223          * run, but since it's not easy to tell which, we just initialize them
2224          * all.  (However, if the subplan is headed by a ModifyTable node, then it
2225          * must be a data-modifying CTE, which we will certainly not need to
2226          * re-run, so we can skip initializing it.      This is just an efficiency
2227          * hack; it won't skip data-modifying CTEs for which the ModifyTable node
2228          * is not at the top.)
2229          */
2230         Assert(estate->es_subplanstates == NIL);
2231         foreach(l, parentestate->es_plannedstmt->subplans)
2232         {
2233                 Plan       *subplan = (Plan *) lfirst(l);
2234                 PlanState  *subplanstate;
2235
2236                 /* Don't initialize ModifyTable subplans, per comment above */
2237                 if (IsA(subplan, ModifyTable))
2238                         subplanstate = NULL;
2239                 else
2240                         subplanstate = ExecInitNode(subplan, estate, 0);
2241
2242                 estate->es_subplanstates = lappend(estate->es_subplanstates,
2243                                                                                    subplanstate);
2244         }
2245
2246         /*
2247          * Initialize the private state information for all the nodes in the part
2248          * of the plan tree we need to run.  This opens files, allocates storage
2249          * and leaves us ready to start processing tuples.
2250          */
2251         epqstate->planstate = ExecInitNode(planTree, estate, 0);
2252
2253         MemoryContextSwitchTo(oldcontext);
2254 }
2255
2256 /*
2257  * EvalPlanQualEnd -- shut down at termination of parent plan state node,
2258  * or if we are done with the current EPQ child.
2259  *
2260  * This is a cut-down version of ExecutorEnd(); basically we want to do most
2261  * of the normal cleanup, but *not* close result relations (which we are
2262  * just sharing from the outer query).  We do, however, have to close any
2263  * trigger target relations that got opened, since those are not shared.
2264  * (There probably shouldn't be any of the latter, but just in case...)
2265  */
2266 void
2267 EvalPlanQualEnd(EPQState *epqstate)
2268 {
2269         EState     *estate = epqstate->estate;
2270         MemoryContext oldcontext;
2271         ListCell   *l;
2272
2273         if (estate == NULL)
2274                 return;                                 /* idle, so nothing to do */
2275
2276         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
2277
2278         ExecEndNode(epqstate->planstate);
2279
2280         foreach(l, estate->es_subplanstates)
2281         {
2282                 PlanState  *subplanstate = (PlanState *) lfirst(l);
2283
2284                 ExecEndNode(subplanstate);
2285         }
2286
2287         /* throw away the per-estate tuple table */
2288         ExecResetTupleTable(estate->es_tupleTable, false);
2289
2290         /* close any trigger target relations attached to this EState */
2291         foreach(l, estate->es_trig_target_relations)
2292         {
2293                 ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l);
2294
2295                 /* Close indices and then the relation itself */
2296                 ExecCloseIndices(resultRelInfo);
2297                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
2298         }
2299
2300         MemoryContextSwitchTo(oldcontext);
2301
2302         FreeExecutorState(estate);
2303
2304         /* Mark EPQState idle */
2305         epqstate->estate = NULL;
2306         epqstate->planstate = NULL;
2307         epqstate->origslot = NULL;
2308 }
2309
2310
2311 /*
2312  * Support for SELECT INTO (a/k/a CREATE TABLE AS)
2313  *
2314  * We implement SELECT INTO by diverting SELECT's normal output with
2315  * a specialized DestReceiver type.
2316  */
2317
2318 typedef struct
2319 {
2320         DestReceiver pub;                       /* publicly-known function pointers */
2321         EState     *estate;                     /* EState we are working with */
2322         Relation        rel;                    /* Relation to write to */
2323         int                     hi_options;             /* heap_insert performance options */
2324         BulkInsertState bistate;        /* bulk insert state */
2325 } DR_intorel;
2326
2327 /*
2328  * OpenIntoRel --- actually create the SELECT INTO target relation
2329  *
2330  * This also replaces QueryDesc->dest with the special DestReceiver for
2331  * SELECT INTO.  We assume that the correct result tuple type has already
2332  * been placed in queryDesc->tupDesc.
2333  */
2334 static void
2335 OpenIntoRel(QueryDesc *queryDesc)
2336 {
2337         IntoClause *into = queryDesc->plannedstmt->intoClause;
2338         EState     *estate = queryDesc->estate;
2339         Relation        intoRelationDesc;
2340         char       *intoName;
2341         Oid                     namespaceId;
2342         Oid                     tablespaceId;
2343         Datum           reloptions;
2344         Oid                     intoRelationId;
2345         TupleDesc       tupdesc;
2346         DR_intorel *myState;
2347         static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
2348
2349         Assert(into);
2350
2351         /*
2352          * XXX This code needs to be kept in sync with DefineRelation(). Maybe we
2353          * should try to use that function instead.
2354          */
2355
2356         /*
2357          * Check consistency of arguments
2358          */
2359         if (into->onCommit != ONCOMMIT_NOOP
2360                 && into->rel->relpersistence != RELPERSISTENCE_TEMP)
2361                 ereport(ERROR,
2362                                 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
2363                                  errmsg("ON COMMIT can only be used on temporary tables")));
2364
2365         /*
2366          * Security check: disallow creating temp tables from security-restricted
2367          * code.  This is needed because calling code might not expect untrusted
2368          * tables to appear in pg_temp at the front of its search path.
2369          */
2370         if (into->rel->relpersistence == RELPERSISTENCE_TEMP
2371                 && InSecurityRestrictedOperation())
2372                 ereport(ERROR,
2373                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2374                                  errmsg("cannot create temporary table within security-restricted operation")));
2375
2376         /*
2377          * Find namespace to create in, check its permissions
2378          */
2379         intoName = into->rel->relname;
2380         namespaceId = RangeVarGetAndCheckCreationNamespace(into->rel);
2381
2382         /*
2383          * Select tablespace to use.  If not specified, use default tablespace
2384          * (which may in turn default to database's default).
2385          */
2386         if (into->tableSpaceName)
2387         {
2388                 tablespaceId = get_tablespace_oid(into->tableSpaceName, false);
2389         }
2390         else
2391         {
2392                 tablespaceId = GetDefaultTablespace(into->rel->relpersistence);
2393                 /* note InvalidOid is OK in this case */
2394         }
2395
2396         /* Check permissions except when using the database's default space */
2397         if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
2398         {
2399                 AclResult       aclresult;
2400
2401                 aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
2402                                                                                    ACL_CREATE);
2403
2404                 if (aclresult != ACLCHECK_OK)
2405                         aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
2406                                                    get_tablespace_name(tablespaceId));
2407         }
2408
2409         /* Parse and validate any reloptions */
2410         reloptions = transformRelOptions((Datum) 0,
2411                                                                          into->options,
2412                                                                          NULL,
2413                                                                          validnsps,
2414                                                                          true,
2415                                                                          false);
2416         (void) heap_reloptions(RELKIND_RELATION, reloptions, true);
2417
2418         /* Copy the tupdesc because heap_create_with_catalog modifies it */
2419         tupdesc = CreateTupleDescCopy(queryDesc->tupDesc);
2420
2421         /* Now we can actually create the new relation */
2422         intoRelationId = heap_create_with_catalog(intoName,
2423                                                                                           namespaceId,
2424                                                                                           tablespaceId,
2425                                                                                           InvalidOid,
2426                                                                                           InvalidOid,
2427                                                                                           InvalidOid,
2428                                                                                           GetUserId(),
2429                                                                                           tupdesc,
2430                                                                                           NIL,
2431                                                                                           RELKIND_RELATION,
2432                                                                                           into->rel->relpersistence,
2433                                                                                           false,
2434                                                                                           false,
2435                                                                                           true,
2436                                                                                           0,
2437                                                                                           into->onCommit,
2438                                                                                           reloptions,
2439                                                                                           true,
2440                                                                                           allowSystemTableMods);
2441         Assert(intoRelationId != InvalidOid);
2442
2443         FreeTupleDesc(tupdesc);
2444
2445         /*
2446          * Advance command counter so that the newly-created relation's catalog
2447          * tuples will be visible to heap_open.
2448          */
2449         CommandCounterIncrement();
2450
2451         /*
2452          * If necessary, create a TOAST table for the INTO relation. Note that
2453          * AlterTableCreateToastTable ends with CommandCounterIncrement(), so that
2454          * the TOAST table will be visible for insertion.
2455          */
2456         reloptions = transformRelOptions((Datum) 0,
2457                                                                          into->options,
2458                                                                          "toast",
2459                                                                          validnsps,
2460                                                                          true,
2461                                                                          false);
2462
2463         (void) heap_reloptions(RELKIND_TOASTVALUE, reloptions, true);
2464
2465         AlterTableCreateToastTable(intoRelationId, reloptions);
2466
2467         /*
2468          * And open the constructed table for writing.
2469          */
2470         intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
2471
2472         /*
2473          * Now replace the query's DestReceiver with one for SELECT INTO
2474          */
2475         queryDesc->dest = CreateDestReceiver(DestIntoRel);
2476         myState = (DR_intorel *) queryDesc->dest;
2477         Assert(myState->pub.mydest == DestIntoRel);
2478         myState->estate = estate;
2479         myState->rel = intoRelationDesc;
2480
2481         /*
2482          * We can skip WAL-logging the insertions, unless PITR or streaming
2483          * replication is in use. We can skip the FSM in any case.
2484          */
2485         myState->hi_options = HEAP_INSERT_SKIP_FSM |
2486                 (XLogIsNeeded() ? 0 : HEAP_INSERT_SKIP_WAL);
2487         myState->bistate = GetBulkInsertState();
2488
2489         /* Not using WAL requires smgr_targblock be initially invalid */
2490         Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
2491 }
2492
2493 /*
2494  * CloseIntoRel --- clean up SELECT INTO at ExecutorEnd time
2495  */
2496 static void
2497 CloseIntoRel(QueryDesc *queryDesc)
2498 {
2499         DR_intorel *myState = (DR_intorel *) queryDesc->dest;
2500
2501         /* OpenIntoRel might never have gotten called */
2502         if (myState && myState->pub.mydest == DestIntoRel && myState->rel)
2503         {
2504                 FreeBulkInsertState(myState->bistate);
2505
2506                 /* If we skipped using WAL, must heap_sync before commit */
2507                 if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
2508                         heap_sync(myState->rel);
2509
2510                 /* close rel, but keep lock until commit */
2511                 heap_close(myState->rel, NoLock);
2512
2513                 myState->rel = NULL;
2514         }
2515 }
2516
2517 /*
2518  * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
2519  */
2520 DestReceiver *
2521 CreateIntoRelDestReceiver(void)
2522 {
2523         DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel));
2524
2525         self->pub.receiveSlot = intorel_receive;
2526         self->pub.rStartup = intorel_startup;
2527         self->pub.rShutdown = intorel_shutdown;
2528         self->pub.rDestroy = intorel_destroy;
2529         self->pub.mydest = DestIntoRel;
2530
2531         /* private fields will be set by OpenIntoRel */
2532
2533         return (DestReceiver *) self;
2534 }
2535
2536 /*
2537  * intorel_startup --- executor startup
2538  */
2539 static void
2540 intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
2541 {
2542         /* no-op */
2543 }
2544
2545 /*
2546  * intorel_receive --- receive one tuple
2547  */
2548 static void
2549 intorel_receive(TupleTableSlot *slot, DestReceiver *self)
2550 {
2551         DR_intorel *myState = (DR_intorel *) self;
2552         HeapTuple       tuple;
2553
2554         /*
2555          * get the heap tuple out of the tuple table slot, making sure we have a
2556          * writable copy
2557          */
2558         tuple = ExecMaterializeSlot(slot);
2559
2560         /*
2561          * force assignment of new OID (see comments in ExecInsert)
2562          */
2563         if (myState->rel->rd_rel->relhasoids)
2564                 HeapTupleSetOid(tuple, InvalidOid);
2565
2566         heap_insert(myState->rel,
2567                                 tuple,
2568                                 myState->estate->es_output_cid,
2569                                 myState->hi_options,
2570                                 myState->bistate);
2571
2572         /* We know this is a newly created relation, so there are no indexes */
2573 }
2574
2575 /*
2576  * intorel_shutdown --- executor end
2577  */
2578 static void
2579 intorel_shutdown(DestReceiver *self)
2580 {
2581         /* no-op */
2582 }
2583
2584 /*
2585  * intorel_destroy --- release DestReceiver object
2586  */
2587 static void
2588 intorel_destroy(DestReceiver *self)
2589 {
2590         pfree(self);
2591 }