src/backend/executor/execMain.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * execMain.c
   4  *        top level executor interface routines
   5  *
   6  * INTERFACE ROUTINES
   7  *      ExecutorStart()
   8  *      ExecutorRun()
   9  *      ExecutorEnd()
  10  *
  11  *      The old ExecutorMain() has been replaced by ExecutorStart(),
  12  *      ExecutorRun() and ExecutorEnd()
  13  *
  14  *      These three procedures are the external interfaces to the executor.
  15  *      In each case, the query descriptor is required as an argument.
  16  *
  17  *      ExecutorStart() must be called at the beginning of execution of any
  18  *      query plan and ExecutorEnd() should always be called at the end of
  19  *      execution of a plan.
  20  *
  21  *      ExecutorRun accepts direction and count arguments that specify whether
  22  *      the plan is to be executed forwards, backwards, and for how many tuples.
  23  *
  24  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  25  * Portions Copyright (c) 1994, Regents of the University of California
  26  *
  27  *
  28  * IDENTIFICATION
  29  *        $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.333 2009/10/12 18:10:41 tgl Exp $
  30  *
  31  *-------------------------------------------------------------------------
  32  */
  33 #include "postgres.h"
  34
  35 #include "access/reloptions.h"
  36 #include "access/sysattr.h"
  37 #include "access/transam.h"
  38 #include "access/xact.h"
  39 #include "catalog/heap.h"
  40 #include "catalog/namespace.h"
  41 #include "catalog/toasting.h"
  42 #include "commands/tablespace.h"
  43 #include "commands/trigger.h"
  44 #include "executor/execdebug.h"
  45 #include "executor/instrument.h"
  46 #include "miscadmin.h"
  47 #include "optimizer/clauses.h"
  48 #include "parser/parse_clause.h"
  49 #include "parser/parsetree.h"
  50 #include "storage/bufmgr.h"
  51 #include "storage/lmgr.h"
  52 #include "utils/acl.h"
  53 #include "utils/lsyscache.h"
  54 #include "utils/memutils.h"
  55 #include "utils/snapmgr.h"
  56 #include "utils/tqual.h"
  57
  58
  59 /* Hooks for plugins to get control in ExecutorStart/Run/End() */
  60 ExecutorStart_hook_type ExecutorStart_hook = NULL;
  61 ExecutorRun_hook_type ExecutorRun_hook = NULL;
  62 ExecutorEnd_hook_type ExecutorEnd_hook = NULL;
  63
  64 typedef struct evalPlanQual
  65 {
  66         Index           rti;
  67         EState     *estate;
  68         PlanState  *planstate;
  69         PlanState  *origplanstate;
  70         TupleTableSlot *resultslot;
  71         struct evalPlanQual *next;      /* stack of active PlanQual plans */
  72         struct evalPlanQual *free;      /* list of free PlanQual plans */
  73 } evalPlanQual;
  74
  75 /* decls for local routines only used within this module */
  76 static void InitPlan(QueryDesc *queryDesc, int eflags);
  77 static void ExecEndPlan(PlanState *planstate, EState *estate);
  78 static void ExecutePlan(EState *estate, PlanState *planstate,
  79                         CmdType operation,
  80                         bool sendTuples,
  81                         long numberTuples,
  82                         ScanDirection direction,
  83                         DestReceiver *dest);
  84 static void EndEvalPlanQual(EState *estate);
  85 static void ExecCheckRTPerms(List *rangeTable);
  86 static void ExecCheckRTEPerms(RangeTblEntry *rte);
  87 static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
  88 static void EvalPlanQualStart(evalPlanQual *epq, EState *estate,
  89                                                           Plan *planTree, evalPlanQual *priorepq);
  90 static void EvalPlanQualStop(evalPlanQual *epq);
  91 static void OpenIntoRel(QueryDesc *queryDesc);
  92 static void CloseIntoRel(QueryDesc *queryDesc);
  93 static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
  94 static void intorel_receive(TupleTableSlot *slot, DestReceiver *self);
  95 static void intorel_shutdown(DestReceiver *self);
  96 static void intorel_destroy(DestReceiver *self);
  97
  98 /* end of local decls */
  99
 100
 101 /* ----------------------------------------------------------------
 102  *              ExecutorStart
 103  *
 104  *              This routine must be called at the beginning of any execution of any
 105  *              query plan
 106  *
 107  * Takes a QueryDesc previously created by CreateQueryDesc (it's not real
 108  * clear why we bother to separate the two functions, but...).  The tupDesc
 109  * field of the QueryDesc is filled in to describe the tuples that will be
 110  * returned, and the internal fields (estate and planstate) are set up.
 111  *
 112  * eflags contains flag bits as described in executor.h.
 113  *
 114  * NB: the CurrentMemoryContext when this is called will become the parent
 115  * of the per-query context used for this Executor invocation.
 116  *
 117  * We provide a function hook variable that lets loadable plugins
 118  * get control when ExecutorStart is called.  Such a plugin would
 119  * normally call standard_ExecutorStart().
 120  *
 121  * ----------------------------------------------------------------
 122  */
 123 void
 124 ExecutorStart(QueryDesc *queryDesc, int eflags)
 125 {
 126         if (ExecutorStart_hook)
 127                 (*ExecutorStart_hook) (queryDesc, eflags);
 128         else
 129                 standard_ExecutorStart(queryDesc, eflags);
 130 }
 131
 132 void
 133 standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
 134 {
 135         EState     *estate;
 136         MemoryContext oldcontext;
 137
 138         /* sanity checks: queryDesc must not be started already */
 139         Assert(queryDesc != NULL);
 140         Assert(queryDesc->estate == NULL);
 141
 142         /*
 143          * If the transaction is read-only, we need to check if any writes are
 144          * planned to non-temporary tables.  EXPLAIN is considered read-only.
 145          */
 146         if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
 147                 ExecCheckXactReadOnly(queryDesc->plannedstmt);
 148
 149         /*
 150          * Build EState, switch into per-query memory context for startup.
 151          */
 152         estate = CreateExecutorState();
 153         queryDesc->estate = estate;
 154
 155         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 156
 157         /*
 158          * Fill in parameters, if any, from queryDesc
 159          */
 160         estate->es_param_list_info = queryDesc->params;
 161
 162         if (queryDesc->plannedstmt->nParamExec > 0)
 163                 estate->es_param_exec_vals = (ParamExecData *)
 164                         palloc0(queryDesc->plannedstmt->nParamExec * sizeof(ParamExecData));
 165
 166         /*
 167          * If non-read-only query, set the command ID to mark output tuples with
 168          */
 169         switch (queryDesc->operation)
 170         {
 171                 case CMD_SELECT:
 172                         /* SELECT INTO and SELECT FOR UPDATE/SHARE need to mark tuples */
 173                         if (queryDesc->plannedstmt->intoClause != NULL ||
 174                                 queryDesc->plannedstmt->rowMarks != NIL)
 175                                 estate->es_output_cid = GetCurrentCommandId(true);
 176                         break;
 177
 178                 case CMD_INSERT:
 179                 case CMD_DELETE:
 180                 case CMD_UPDATE:
 181                         estate->es_output_cid = GetCurrentCommandId(true);
 182                         break;
 183
 184                 default:
 185                         elog(ERROR, "unrecognized operation code: %d",
 186                                  (int) queryDesc->operation);
 187                         break;
 188         }
 189
 190         /*
 191          * Copy other important information into the EState
 192          */
 193         estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot);
 194         estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot);
 195         estate->es_instrument = queryDesc->doInstrument;
 196
 197         /*
 198          * Initialize the plan state tree
 199          */
 200         InitPlan(queryDesc, eflags);
 201
 202         MemoryContextSwitchTo(oldcontext);
 203 }
 204
 205 /* ----------------------------------------------------------------
 206  *              ExecutorRun
 207  *
 208  *              This is the main routine of the executor module. It accepts
 209  *              the query descriptor from the traffic cop and executes the
 210  *              query plan.
 211  *
 212  *              ExecutorStart must have been called already.
 213  *
 214  *              If direction is NoMovementScanDirection then nothing is done
 215  *              except to start up/shut down the destination.  Otherwise,
 216  *              we retrieve up to 'count' tuples in the specified direction.
 217  *
 218  *              Note: count = 0 is interpreted as no portal limit, i.e., run to
 219  *              completion.
 220  *
 221  *              There is no return value, but output tuples (if any) are sent to
 222  *              the destination receiver specified in the QueryDesc; and the number
 223  *              of tuples processed at the top level can be found in
 224  *              estate->es_processed.
 225  *
 226  *              We provide a function hook variable that lets loadable plugins
 227  *              get control when ExecutorRun is called.  Such a plugin would
 228  *              normally call standard_ExecutorRun().
 229  *
 230  * ----------------------------------------------------------------
 231  */
 232 void
 233 ExecutorRun(QueryDesc *queryDesc,
 234                         ScanDirection direction, long count)
 235 {
 236         if (ExecutorRun_hook)
 237                 (*ExecutorRun_hook) (queryDesc, direction, count);
 238         else
 239                 standard_ExecutorRun(queryDesc, direction, count);
 240 }
 241
 242 void
 243 standard_ExecutorRun(QueryDesc *queryDesc,
 244                                          ScanDirection direction, long count)
 245 {
 246         EState     *estate;
 247         CmdType         operation;
 248         DestReceiver *dest;
 249         bool            sendTuples;
 250         MemoryContext oldcontext;
 251
 252         /* sanity checks */
 253         Assert(queryDesc != NULL);
 254
 255         estate = queryDesc->estate;
 256
 257         Assert(estate != NULL);
 258
 259         /*
 260          * Switch into per-query memory context
 261          */
 262         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 263
 264         /* Allow instrumentation of ExecutorRun overall runtime */
 265         if (queryDesc->totaltime)
 266                 InstrStartNode(queryDesc->totaltime);
 267
 268         /*
 269          * extract information from the query descriptor and the query feature.
 270          */
 271         operation = queryDesc->operation;
 272         dest = queryDesc->dest;
 273
 274         /*
 275          * startup tuple receiver, if we will be emitting tuples
 276          */
 277         estate->es_processed = 0;
 278         estate->es_lastoid = InvalidOid;
 279
 280         sendTuples = (operation == CMD_SELECT ||
 281                                   queryDesc->plannedstmt->hasReturning);
 282
 283         if (sendTuples)
 284                 (*dest->rStartup) (dest, operation, queryDesc->tupDesc);
 285
 286         /*
 287          * run plan
 288          */
 289         if (!ScanDirectionIsNoMovement(direction))
 290                 ExecutePlan(estate,
 291                                         queryDesc->planstate,
 292                                         operation,
 293                                         sendTuples,
 294                                         count,
 295                                         direction,
 296                                         dest);
 297
 298         /*
 299          * shutdown tuple receiver, if we started it
 300          */
 301         if (sendTuples)
 302                 (*dest->rShutdown) (dest);
 303
 304         if (queryDesc->totaltime)
 305                 InstrStopNode(queryDesc->totaltime, estate->es_processed);
 306
 307         MemoryContextSwitchTo(oldcontext);
 308 }
 309
 310 /* ----------------------------------------------------------------
 311  *              ExecutorEnd
 312  *
 313  *              This routine must be called at the end of execution of any
 314  *              query plan
 315  *
 316  *              We provide a function hook variable that lets loadable plugins
 317  *              get control when ExecutorEnd is called.  Such a plugin would
 318  *              normally call standard_ExecutorEnd().
 319  *
 320  * ----------------------------------------------------------------
 321  */
 322 void
 323 ExecutorEnd(QueryDesc *queryDesc)
 324 {
 325         if (ExecutorEnd_hook)
 326                 (*ExecutorEnd_hook) (queryDesc);
 327         else
 328                 standard_ExecutorEnd(queryDesc);
 329 }
 330
 331 void
 332 standard_ExecutorEnd(QueryDesc *queryDesc)
 333 {
 334         EState     *estate;
 335         MemoryContext oldcontext;
 336
 337         /* sanity checks */
 338         Assert(queryDesc != NULL);
 339
 340         estate = queryDesc->estate;
 341
 342         Assert(estate != NULL);
 343
 344         /*
 345          * Switch into per-query memory context to run ExecEndPlan
 346          */
 347         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 348
 349         ExecEndPlan(queryDesc->planstate, estate);
 350
 351         /*
 352          * Close the SELECT INTO relation if any
 353          */
 354         if (estate->es_select_into)
 355                 CloseIntoRel(queryDesc);
 356
 357         /* do away with our snapshots */
 358         UnregisterSnapshot(estate->es_snapshot);
 359         UnregisterSnapshot(estate->es_crosscheck_snapshot);
 360
 361         /*
 362          * Must switch out of context before destroying it
 363          */
 364         MemoryContextSwitchTo(oldcontext);
 365
 366         /*
 367          * Release EState and per-query memory context.  This should release
 368          * everything the executor has allocated.
 369          */
 370         FreeExecutorState(estate);
 371
 372         /* Reset queryDesc fields that no longer point to anything */
 373         queryDesc->tupDesc = NULL;
 374         queryDesc->estate = NULL;
 375         queryDesc->planstate = NULL;
 376         queryDesc->totaltime = NULL;
 377 }
 378
 379 /* ----------------------------------------------------------------
 380  *              ExecutorRewind
 381  *
 382  *              This routine may be called on an open queryDesc to rewind it
 383  *              to the start.
 384  * ----------------------------------------------------------------
 385  */
 386 void
 387 ExecutorRewind(QueryDesc *queryDesc)
 388 {
 389         EState     *estate;
 390         MemoryContext oldcontext;
 391
 392         /* sanity checks */
 393         Assert(queryDesc != NULL);
 394
 395         estate = queryDesc->estate;
 396
 397         Assert(estate != NULL);
 398
 399         /* It's probably not sensible to rescan updating queries */
 400         Assert(queryDesc->operation == CMD_SELECT);
 401
 402         /*
 403          * Switch into per-query memory context
 404          */
 405         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 406
 407         /*
 408          * rescan plan
 409          */
 410         ExecReScan(queryDesc->planstate, NULL);
 411
 412         MemoryContextSwitchTo(oldcontext);
 413 }
 414
 415
 416 /*
 417  * ExecCheckRTPerms
 418  *              Check access permissions for all relations listed in a range table.
 419  */
 420 static void
 421 ExecCheckRTPerms(List *rangeTable)
 422 {
 423         ListCell   *l;
 424
 425         foreach(l, rangeTable)
 426         {
 427                 ExecCheckRTEPerms((RangeTblEntry *) lfirst(l));
 428         }
 429 }
 430
 431 /*
 432  * ExecCheckRTEPerms
 433  *              Check access permissions for a single RTE.
 434  */
 435 static void
 436 ExecCheckRTEPerms(RangeTblEntry *rte)
 437 {
 438         AclMode         requiredPerms;
 439         AclMode         relPerms;
 440         AclMode         remainingPerms;
 441         Oid                     relOid;
 442         Oid                     userid;
 443         Bitmapset  *tmpset;
 444         int                     col;
 445
 446         /*
 447          * Only plain-relation RTEs need to be checked here.  Function RTEs are
 448          * checked by init_fcache when the function is prepared for execution.
 449          * Join, subquery, and special RTEs need no checks.
 450          */
 451         if (rte->rtekind != RTE_RELATION)
 452                 return;
 453
 454         /*
 455          * No work if requiredPerms is empty.
 456          */
 457         requiredPerms = rte->requiredPerms;
 458         if (requiredPerms == 0)
 459                 return;
 460
 461         relOid = rte->relid;
 462
 463         /*
 464          * userid to check as: current user unless we have a setuid indication.
 465          *
 466          * Note: GetUserId() is presently fast enough that there's no harm in
 467          * calling it separately for each RTE.  If that stops being true, we could
 468          * call it once in ExecCheckRTPerms and pass the userid down from there.
 469          * But for now, no need for the extra clutter.
 470          */
 471         userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
 472
 473         /*
 474          * We must have *all* the requiredPerms bits, but some of the bits can be
 475          * satisfied from column-level rather than relation-level permissions.
 476          * First, remove any bits that are satisfied by relation permissions.
 477          */
 478         relPerms = pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL);
 479         remainingPerms = requiredPerms & ~relPerms;
 480         if (remainingPerms != 0)
 481         {
 482                 /*
 483                  * If we lack any permissions that exist only as relation permissions,
 484                  * we can fail straight away.
 485                  */
 486                 if (remainingPerms & ~(ACL_SELECT | ACL_INSERT | ACL_UPDATE))
 487                         aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
 488                                                    get_rel_name(relOid));
 489
 490                 /*
 491                  * Check to see if we have the needed privileges at column level.
 492                  *
 493                  * Note: failures just report a table-level error; it would be nicer
 494                  * to report a column-level error if we have some but not all of the
 495                  * column privileges.
 496                  */
 497                 if (remainingPerms & ACL_SELECT)
 498                 {
 499                         /*
 500                          * When the query doesn't explicitly reference any columns (for
 501                          * example, SELECT COUNT(*) FROM table), allow the query if we
 502                          * have SELECT on any column of the rel, as per SQL spec.
 503                          */
 504                         if (bms_is_empty(rte->selectedCols))
 505                         {
 506                                 if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
 507                                                                                           ACLMASK_ANY) != ACLCHECK_OK)
 508                                         aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
 509                                                                    get_rel_name(relOid));
 510                         }
 511
 512                         tmpset = bms_copy(rte->selectedCols);
 513                         while ((col = bms_first_member(tmpset)) >= 0)
 514                         {
 515                                 /* remove the column number offset */
 516                                 col += FirstLowInvalidHeapAttributeNumber;
 517                                 if (col == InvalidAttrNumber)
 518                                 {
 519                                         /* Whole-row reference, must have priv on all cols */
 520                                         if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
 521                                                                                                   ACLMASK_ALL) != ACLCHECK_OK)
 522                                                 aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
 523                                                                            get_rel_name(relOid));
 524                                 }
 525                                 else
 526                                 {
 527                                         if (pg_attribute_aclcheck(relOid, col, userid, ACL_SELECT)
 528                                                 != ACLCHECK_OK)
 529                                                 aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
 530                                                                            get_rel_name(relOid));
 531                                 }
 532                         }
 533                         bms_free(tmpset);
 534                 }
 535
 536                 /*
 537                  * Basically the same for the mod columns, with either INSERT or
 538                  * UPDATE privilege as specified by remainingPerms.
 539                  */
 540                 remainingPerms &= ~ACL_SELECT;
 541                 if (remainingPerms != 0)
 542                 {
 543                         /*
 544                          * When the query doesn't explicitly change any columns, allow the
 545                          * query if we have permission on any column of the rel.  This is
 546                          * to handle SELECT FOR UPDATE as well as possible corner cases in
 547                          * INSERT and UPDATE.
 548                          */
 549                         if (bms_is_empty(rte->modifiedCols))
 550                         {
 551                                 if (pg_attribute_aclcheck_all(relOid, userid, remainingPerms,
 552                                                                                           ACLMASK_ANY) != ACLCHECK_OK)
 553                                         aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
 554                                                                    get_rel_name(relOid));
 555                         }
 556
 557                         tmpset = bms_copy(rte->modifiedCols);
 558                         while ((col = bms_first_member(tmpset)) >= 0)
 559                         {
 560                                 /* remove the column number offset */
 561                                 col += FirstLowInvalidHeapAttributeNumber;
 562                                 if (col == InvalidAttrNumber)
 563                                 {
 564                                         /* whole-row reference can't happen here */
 565                                         elog(ERROR, "whole-row update is not implemented");
 566                                 }
 567                                 else
 568                                 {
 569                                         if (pg_attribute_aclcheck(relOid, col, userid, remainingPerms)
 570                                                 != ACLCHECK_OK)
 571                                                 aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
 572                                                                            get_rel_name(relOid));
 573                                 }
 574                         }
 575                         bms_free(tmpset);
 576                 }
 577         }
 578 }
 579
 580 /*
 581  * Check that the query does not imply any writes to non-temp tables.
 582  */
 583 static void
 584 ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
 585 {
 586         ListCell   *l;
 587
 588         /*
 589          * CREATE TABLE AS or SELECT INTO?
 590          *
 591          * XXX should we allow this if the destination is temp?
 592          */
 593         if (plannedstmt->intoClause != NULL)
 594                 goto fail;
 595
 596         /* Fail if write permissions are requested on any non-temp table */
 597         foreach(l, plannedstmt->rtable)
 598         {
 599                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
 600
 601                 if (rte->rtekind != RTE_RELATION)
 602                         continue;
 603
 604                 if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
 605                         continue;
 606
 607                 if (isTempNamespace(get_rel_namespace(rte->relid)))
 608                         continue;
 609
 610                 goto fail;
 611         }
 612
 613         return;
 614
 615 fail:
 616         ereport(ERROR,
 617                         (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
 618                          errmsg("transaction is read-only")));
 619 }
 620
 621
 622 /* ----------------------------------------------------------------
 623  *              InitPlan
 624  *
 625  *              Initializes the query plan: open files, allocate storage
 626  *              and start up the rule manager
 627  * ----------------------------------------------------------------
 628  */
 629 static void
 630 InitPlan(QueryDesc *queryDesc, int eflags)
 631 {
 632         CmdType         operation = queryDesc->operation;
 633         PlannedStmt *plannedstmt = queryDesc->plannedstmt;
 634         Plan       *plan = plannedstmt->planTree;
 635         List       *rangeTable = plannedstmt->rtable;
 636         EState     *estate = queryDesc->estate;
 637         PlanState  *planstate;
 638         TupleDesc       tupType;
 639         ListCell   *l;
 640         int                     i;
 641
 642         /*
 643          * Do permissions checks
 644          */
 645         ExecCheckRTPerms(rangeTable);
 646
 647         /*
 648          * initialize the node's execution state
 649          */
 650         estate->es_range_table = rangeTable;
 651
 652         /*
 653          * initialize result relation stuff, and open/lock the result rels.
 654          *
 655          * We must do this before initializing the plan tree, else we might
 656          * try to do a lock upgrade if a result rel is also a source rel.
 657          */
 658         if (plannedstmt->resultRelations)
 659         {
 660                 List       *resultRelations = plannedstmt->resultRelations;
 661                 int                     numResultRelations = list_length(resultRelations);
 662                 ResultRelInfo *resultRelInfos;
 663                 ResultRelInfo *resultRelInfo;
 664
 665                 resultRelInfos = (ResultRelInfo *)
 666                         palloc(numResultRelations * sizeof(ResultRelInfo));
 667                 resultRelInfo = resultRelInfos;
 668                 foreach(l, resultRelations)
 669                 {
 670                         Index           resultRelationIndex = lfirst_int(l);
 671                         Oid                     resultRelationOid;
 672                         Relation        resultRelation;
 673
 674                         resultRelationOid = getrelid(resultRelationIndex, rangeTable);
 675                         resultRelation = heap_open(resultRelationOid, RowExclusiveLock);
 676                         InitResultRelInfo(resultRelInfo,
 677                                                           resultRelation,
 678                                                           resultRelationIndex,
 679                                                           operation,
 680                                                           estate->es_instrument);
 681                         resultRelInfo++;
 682                 }
 683                 estate->es_result_relations = resultRelInfos;
 684                 estate->es_num_result_relations = numResultRelations;
 685                 /* es_result_relation_info is NULL except when within ModifyTable */
 686                 estate->es_result_relation_info = NULL;
 687         }
 688         else
 689         {
 690                 /*
 691                  * if no result relation, then set state appropriately
 692                  */
 693                 estate->es_result_relations = NULL;
 694                 estate->es_num_result_relations = 0;
 695                 estate->es_result_relation_info = NULL;
 696         }
 697
 698         /*
 699          * Similarly, we have to lock relations selected FOR UPDATE/FOR SHARE
 700          * before we initialize the plan tree, else we'd be risking lock
 701          * upgrades.  While we are at it, build the ExecRowMark list.
 702          */
 703         estate->es_rowMarks = NIL;
 704         foreach(l, plannedstmt->rowMarks)
 705         {
 706                 RowMarkClause *rc = (RowMarkClause *) lfirst(l);
 707                 Oid                     relid;
 708                 Relation        relation;
 709                 ExecRowMark *erm;
 710
 711                 /* ignore "parent" rowmarks; they are irrelevant at runtime */
 712                 if (rc->isParent)
 713                         continue;
 714
 715                 relid = getrelid(rc->rti, rangeTable);
 716                 relation = heap_open(relid, RowShareLock);
 717                 erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
 718                 erm->relation = relation;
 719                 erm->rti = rc->rti;
 720                 erm->prti = rc->prti;
 721                 erm->rowmarkId = rc->rowmarkId;
 722                 erm->forUpdate = rc->forUpdate;
 723                 erm->noWait = rc->noWait;
 724                 /* remaining fields are filled during LockRows plan node init */
 725                 erm->ctidAttNo = InvalidAttrNumber;
 726                 erm->toidAttNo = InvalidAttrNumber;
 727                 ItemPointerSetInvalid(&(erm->curCtid));
 728                 estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
 729         }
 730
 731         /*
 732          * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
 733          * flag appropriately so that the plan tree will be initialized with the
 734          * correct tuple descriptors.  (Other SELECT INTO stuff comes later.)
 735          */
 736         estate->es_select_into = false;
 737         if (operation == CMD_SELECT && plannedstmt->intoClause != NULL)
 738         {
 739                 estate->es_select_into = true;
 740                 estate->es_into_oids = interpretOidsOption(plannedstmt->intoClause->options);
 741         }
 742
 743         /*
 744          * Initialize the executor's tuple table to empty.
 745          */
 746         estate->es_tupleTable = NIL;
 747         estate->es_trig_tuple_slot = NULL;
 748
 749         /* mark EvalPlanQual not active */
 750         estate->es_plannedstmt = plannedstmt;
 751         estate->es_evalPlanQual = NULL;
 752         estate->es_evTupleNull = NULL;
 753         estate->es_evTuple = NULL;
 754
 755         /*
 756          * Initialize private state information for each SubPlan.  We must do this
 757          * before running ExecInitNode on the main query tree, since
 758          * ExecInitSubPlan expects to be able to find these entries.
 759          */
 760         Assert(estate->es_subplanstates == NIL);
 761         i = 1;                                          /* subplan indices count from 1 */
 762         foreach(l, plannedstmt->subplans)
 763         {
 764                 Plan       *subplan = (Plan *) lfirst(l);
 765                 PlanState  *subplanstate;
 766                 int                     sp_eflags;
 767
 768                 /*
 769                  * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
 770                  * it is a parameterless subplan (not initplan), we suggest that it be
 771                  * prepared to handle REWIND efficiently; otherwise there is no need.
 772                  */
 773                 sp_eflags = eflags & EXEC_FLAG_EXPLAIN_ONLY;
 774                 if (bms_is_member(i, plannedstmt->rewindPlanIDs))
 775                         sp_eflags |= EXEC_FLAG_REWIND;
 776
 777                 subplanstate = ExecInitNode(subplan, estate, sp_eflags);
 778
 779                 estate->es_subplanstates = lappend(estate->es_subplanstates,
 780                                                                                    subplanstate);
 781
 782                 i++;
 783         }
 784
 785         /*
 786          * Initialize the private state information for all the nodes in the query
 787          * tree.  This opens files, allocates storage and leaves us ready to start
 788          * processing tuples.
 789          */
 790         planstate = ExecInitNode(plan, estate, eflags);
 791
 792         /*
 793          * Get the tuple descriptor describing the type of tuples to return. (this
 794          * is especially important if we are creating a relation with "SELECT
 795          * INTO")
 796          */
 797         tupType = ExecGetResultType(planstate);
 798
 799         /*
 800          * Initialize the junk filter if needed.  SELECT queries need a
 801          * filter if there are any junk attrs in the top-level tlist.
 802          */
 803         if (operation == CMD_SELECT)
 804         {
 805                 bool            junk_filter_needed = false;
 806                 ListCell   *tlist;
 807
 808                 foreach(tlist, plan->targetlist)
 809                 {
 810                         TargetEntry *tle = (TargetEntry *) lfirst(tlist);
 811
 812                         if (tle->resjunk)
 813                         {
 814                                 junk_filter_needed = true;
 815                                 break;
 816                         }
 817                 }
 818
 819                 if (junk_filter_needed)
 820                 {
 821                         JunkFilter *j;
 822
 823                         j = ExecInitJunkFilter(planstate->plan->targetlist,
 824                                                                    tupType->tdhasoid,
 825                                                                    ExecInitExtraTupleSlot(estate));
 826                         estate->es_junkFilter = j;
 827
 828                         /* Want to return the cleaned tuple type */
 829                         tupType = j->jf_cleanTupType;
 830                 }
 831         }
 832
 833         queryDesc->tupDesc = tupType;
 834         queryDesc->planstate = planstate;
 835
 836         /*
 837          * If doing SELECT INTO, initialize the "into" relation.  We must wait
 838          * till now so we have the "clean" result tuple type to create the new
 839          * table from.
 840          *
 841          * If EXPLAIN, skip creating the "into" relation.
 842          */
 843         if (estate->es_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
 844                 OpenIntoRel(queryDesc);
 845 }
 846
 847 /*
 848  * Initialize ResultRelInfo data for one result relation
 849  */
 850 void
 851 InitResultRelInfo(ResultRelInfo *resultRelInfo,
 852                                   Relation resultRelationDesc,
 853                                   Index resultRelationIndex,
 854                                   CmdType operation,
 855                                   bool doInstrument)
 856 {
 857         /*
 858          * Check valid relkind ... parser and/or planner should have noticed this
 859          * already, but let's make sure.
 860          */
 861         switch (resultRelationDesc->rd_rel->relkind)
 862         {
 863                 case RELKIND_RELATION:
 864                         /* OK */
 865                         break;
 866                 case RELKIND_SEQUENCE:
 867                         ereport(ERROR,
 868                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
 869                                          errmsg("cannot change sequence \"%s\"",
 870                                                         RelationGetRelationName(resultRelationDesc))));
 871                         break;
 872                 case RELKIND_TOASTVALUE:
 873                         ereport(ERROR,
 874                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
 875                                          errmsg("cannot change TOAST relation \"%s\"",
 876                                                         RelationGetRelationName(resultRelationDesc))));
 877                         break;
 878                 case RELKIND_VIEW:
 879                         ereport(ERROR,
 880                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
 881                                          errmsg("cannot change view \"%s\"",
 882                                                         RelationGetRelationName(resultRelationDesc))));
 883                         break;
 884                 default:
 885                         ereport(ERROR,
 886                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
 887                                          errmsg("cannot change relation \"%s\"",
 888                                                         RelationGetRelationName(resultRelationDesc))));
 889                         break;
 890         }
 891
 892         /* OK, fill in the node */
 893         MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
 894         resultRelInfo->type = T_ResultRelInfo;
 895         resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
 896         resultRelInfo->ri_RelationDesc = resultRelationDesc;
 897         resultRelInfo->ri_NumIndices = 0;
 898         resultRelInfo->ri_IndexRelationDescs = NULL;
 899         resultRelInfo->ri_IndexRelationInfo = NULL;
 900         /* make a copy so as not to depend on relcache info not changing... */
 901         resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
 902         if (resultRelInfo->ri_TrigDesc)
 903         {
 904                 int                     n = resultRelInfo->ri_TrigDesc->numtriggers;
 905
 906                 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
 907                         palloc0(n * sizeof(FmgrInfo));
 908                 if (doInstrument)
 909                         resultRelInfo->ri_TrigInstrument = InstrAlloc(n);
 910                 else
 911                         resultRelInfo->ri_TrigInstrument = NULL;
 912         }
 913         else
 914         {
 915                 resultRelInfo->ri_TrigFunctions = NULL;
 916                 resultRelInfo->ri_TrigInstrument = NULL;
 917         }
 918         resultRelInfo->ri_ConstraintExprs = NULL;
 919         resultRelInfo->ri_junkFilter = NULL;
 920         resultRelInfo->ri_projectReturning = NULL;
 921
 922         /*
 923          * If there are indices on the result relation, open them and save
 924          * descriptors in the result relation info, so that we can add new index
 925          * entries for the tuples we add/update.  We need not do this for a
 926          * DELETE, however, since deletion doesn't affect indexes.
 927          */
 928         if (resultRelationDesc->rd_rel->relhasindex &&
 929                 operation != CMD_DELETE)
 930                 ExecOpenIndices(resultRelInfo);
 931 }
 932
 933 /*
 934  *              ExecGetTriggerResultRel
 935  *
 936  * Get a ResultRelInfo for a trigger target relation.  Most of the time,
 937  * triggers are fired on one of the result relations of the query, and so
 938  * we can just return a member of the es_result_relations array.  (Note: in
 939  * self-join situations there might be multiple members with the same OID;
 940  * if so it doesn't matter which one we pick.)  However, it is sometimes
 941  * necessary to fire triggers on other relations; this happens mainly when an
 942  * RI update trigger queues additional triggers on other relations, which will
 943  * be processed in the context of the outer query.      For efficiency's sake,
 944  * we want to have a ResultRelInfo for those triggers too; that can avoid
 945  * repeated re-opening of the relation.  (It also provides a way for EXPLAIN
 946  * ANALYZE to report the runtimes of such triggers.)  So we make additional
 947  * ResultRelInfo's as needed, and save them in es_trig_target_relations.
 948  */
 949 ResultRelInfo *
 950 ExecGetTriggerResultRel(EState *estate, Oid relid)
 951 {
 952         ResultRelInfo *rInfo;
 953         int                     nr;
 954         ListCell   *l;
 955         Relation        rel;
 956         MemoryContext oldcontext;
 957
 958         /* First, search through the query result relations */
 959         rInfo = estate->es_result_relations;
 960         nr = estate->es_num_result_relations;
 961         while (nr > 0)
 962         {
 963                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
 964                         return rInfo;
 965                 rInfo++;
 966                 nr--;
 967         }
 968         /* Nope, but maybe we already made an extra ResultRelInfo for it */
 969         foreach(l, estate->es_trig_target_relations)
 970         {
 971                 rInfo = (ResultRelInfo *) lfirst(l);
 972                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
 973                         return rInfo;
 974         }
 975         /* Nope, so we need a new one */
 976
 977         /*
 978          * Open the target relation's relcache entry.  We assume that an
 979          * appropriate lock is still held by the backend from whenever the trigger
 980          * event got queued, so we need take no new lock here.
 981          */
 982         rel = heap_open(relid, NoLock);
 983
 984         /*
 985          * Make the new entry in the right context.  Currently, we don't need any
 986          * index information in ResultRelInfos used only for triggers, so tell
 987          * InitResultRelInfo it's a DELETE.
 988          */
 989         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 990         rInfo = makeNode(ResultRelInfo);
 991         InitResultRelInfo(rInfo,
 992                                           rel,
 993                                           0,            /* dummy rangetable index */
 994                                           CMD_DELETE,
 995                                           estate->es_instrument);
 996         estate->es_trig_target_relations =
 997                 lappend(estate->es_trig_target_relations, rInfo);
 998         MemoryContextSwitchTo(oldcontext);
 999
1000         return rInfo;
1001 }
1002
1003 /*
1004  *              ExecContextForcesOids
1005  *
1006  * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
1007  * we need to ensure that result tuples have space for an OID iff they are
1008  * going to be stored into a relation that has OIDs.  In other contexts
1009  * we are free to choose whether to leave space for OIDs in result tuples
1010  * (we generally don't want to, but we do if a physical-tlist optimization
1011  * is possible).  This routine checks the plan context and returns TRUE if the
1012  * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
1013  * *hasoids is set to the required value.
1014  *
1015  * One reason this is ugly is that all plan nodes in the plan tree will emit
1016  * tuples with space for an OID, though we really only need the topmost node
1017  * to do so.  However, node types like Sort don't project new tuples but just
1018  * return their inputs, and in those cases the requirement propagates down
1019  * to the input node.  Eventually we might make this code smart enough to
1020  * recognize how far down the requirement really goes, but for now we just
1021  * make all plan nodes do the same thing if the top level forces the choice.
1022  *
1023  * We assume that if we are generating tuples for INSERT or UPDATE,
1024  * estate->es_result_relation_info is already set up to describe the target
1025  * relation.  Note that in an UPDATE that spans an inheritance tree, some of
1026  * the target relations may have OIDs and some not.  We have to make the
1027  * decisions on a per-relation basis as we initialize each of the subplans of
1028  * the ModifyTable node, so ModifyTable has to set es_result_relation_info
1029  * while initializing each subplan.
1030  *
1031  * SELECT INTO is even uglier, because we don't have the INTO relation's
1032  * descriptor available when this code runs; we have to look aside at a
1033  * flag set by InitPlan().
1034  */
1035 bool
1036 ExecContextForcesOids(PlanState *planstate, bool *hasoids)
1037 {
1038         ResultRelInfo *ri = planstate->state->es_result_relation_info;
1039
1040         if (ri != NULL)
1041         {
1042                 Relation        rel = ri->ri_RelationDesc;
1043
1044                 if (rel != NULL)
1045                 {
1046                         *hasoids = rel->rd_rel->relhasoids;
1047                         return true;
1048                 }
1049         }
1050
1051         if (planstate->state->es_select_into)
1052         {
1053                 *hasoids = planstate->state->es_into_oids;
1054                 return true;
1055         }
1056
1057         return false;
1058 }
1059
1060 /* ----------------------------------------------------------------
1061  *              ExecEndPlan
1062  *
1063  *              Cleans up the query plan -- closes files and frees up storage
1064  *
1065  * NOTE: we are no longer very worried about freeing storage per se
1066  * in this code; FreeExecutorState should be guaranteed to release all
1067  * memory that needs to be released.  What we are worried about doing
1068  * is closing relations and dropping buffer pins.  Thus, for example,
1069  * tuple tables must be cleared or dropped to ensure pins are released.
1070  * ----------------------------------------------------------------
1071  */
1072 static void
1073 ExecEndPlan(PlanState *planstate, EState *estate)
1074 {
1075         ResultRelInfo *resultRelInfo;
1076         int                     i;
1077         ListCell   *l;
1078
1079         /*
1080          * shut down any PlanQual processing we were doing
1081          */
1082         if (estate->es_evalPlanQual != NULL)
1083                 EndEvalPlanQual(estate);
1084
1085         /*
1086          * shut down the node-type-specific query processing
1087          */
1088         ExecEndNode(planstate);
1089
1090         /*
1091          * for subplans too
1092          */
1093         foreach(l, estate->es_subplanstates)
1094         {
1095                 PlanState  *subplanstate = (PlanState *) lfirst(l);
1096
1097                 ExecEndNode(subplanstate);
1098         }
1099
1100         /*
1101          * destroy the executor's tuple table.  Actually we only care about
1102          * releasing buffer pins and tupdesc refcounts; there's no need to
1103          * pfree the TupleTableSlots, since the containing memory context
1104          * is about to go away anyway.
1105          */
1106         ExecResetTupleTable(estate->es_tupleTable, false);
1107
1108         /*
1109          * close the result relation(s) if any, but hold locks until xact commit.
1110          */
1111         resultRelInfo = estate->es_result_relations;
1112         for (i = estate->es_num_result_relations; i > 0; i--)
1113         {
1114                 /* Close indices and then the relation itself */
1115                 ExecCloseIndices(resultRelInfo);
1116                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1117                 resultRelInfo++;
1118         }
1119
1120         /*
1121          * likewise close any trigger target relations
1122          */
1123         foreach(l, estate->es_trig_target_relations)
1124         {
1125                 resultRelInfo = (ResultRelInfo *) lfirst(l);
1126                 /* Close indices and then the relation itself */
1127                 ExecCloseIndices(resultRelInfo);
1128                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1129         }
1130
1131         /*
1132          * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1133          */
1134         foreach(l, estate->es_rowMarks)
1135         {
1136                 ExecRowMark *erm = lfirst(l);
1137
1138                 heap_close(erm->relation, NoLock);
1139         }
1140 }
1141
1142 /* ----------------------------------------------------------------
1143  *              ExecutePlan
1144  *
1145  *              Processes the query plan until we have processed 'numberTuples' tuples,
1146  *              moving in the specified direction.
1147  *
1148  *              Runs to completion if numberTuples is 0
1149  *
1150  * Note: the ctid attribute is a 'junk' attribute that is removed before the
1151  * user can see it
1152  * ----------------------------------------------------------------
1153  */
1154 static void
1155 ExecutePlan(EState *estate,
1156                         PlanState *planstate,
1157                         CmdType operation,
1158                         bool sendTuples,
1159                         long numberTuples,
1160                         ScanDirection direction,
1161                         DestReceiver *dest)
1162 {
1163         TupleTableSlot *slot;
1164         long            current_tuple_count;
1165
1166         /*
1167          * initialize local variables
1168          */
1169         current_tuple_count = 0;
1170
1171         /*
1172          * Set the direction.
1173          */
1174         estate->es_direction = direction;
1175
1176         /*
1177          * Loop until we've processed the proper number of tuples from the plan.
1178          */
1179         for (;;)
1180         {
1181                 /* Reset the per-output-tuple exprcontext */
1182                 ResetPerTupleExprContext(estate);
1183
1184                 /*
1185                  * Execute the plan and obtain a tuple
1186                  */
1187                 slot = ExecProcNode(planstate);
1188
1189                 /*
1190                  * if the tuple is null, then we assume there is nothing more to
1191                  * process so we just end the loop...
1192                  */
1193                 if (TupIsNull(slot))
1194                         break;
1195
1196                 /*
1197                  * If we have a junk filter, then project a new tuple with the junk
1198                  * removed.
1199                  *
1200                  * Store this new "clean" tuple in the junkfilter's resultSlot.
1201                  * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
1202                  * because that tuple slot has the wrong descriptor.)
1203                  */
1204                 if (estate->es_junkFilter != NULL)
1205                         slot = ExecFilterJunk(estate->es_junkFilter, slot);
1206
1207                 /*
1208                  * If we are supposed to send the tuple somewhere, do so.
1209                  * (In practice, this is probably always the case at this point.)
1210                  */
1211                 if (sendTuples)
1212                         (*dest->receiveSlot) (slot, dest);
1213
1214                 /*
1215                  * Count tuples processed, if this is a SELECT.  (For other operation
1216                  * types, the ModifyTable plan node must count the appropriate
1217                  * events.)
1218                  */
1219                 if (operation == CMD_SELECT)
1220                         (estate->es_processed)++;
1221
1222                 /*
1223                  * check our tuple count.. if we've processed the proper number then
1224                  * quit, else loop again and process more tuples.  Zero numberTuples
1225                  * means no limit.
1226                  */
1227                 current_tuple_count++;
1228                 if (numberTuples && numberTuples == current_tuple_count)
1229                         break;
1230         }
1231 }
1232
1233
1234 /*
1235  * ExecRelCheck --- check that tuple meets constraints for result relation
1236  */
1237 static const char *
1238 ExecRelCheck(ResultRelInfo *resultRelInfo,
1239                          TupleTableSlot *slot, EState *estate)
1240 {
1241         Relation        rel = resultRelInfo->ri_RelationDesc;
1242         int                     ncheck = rel->rd_att->constr->num_check;
1243         ConstrCheck *check = rel->rd_att->constr->check;
1244         ExprContext *econtext;
1245         MemoryContext oldContext;
1246         List       *qual;
1247         int                     i;
1248
1249         /*
1250          * If first time through for this result relation, build expression
1251          * nodetrees for rel's constraint expressions.  Keep them in the per-query
1252          * memory context so they'll survive throughout the query.
1253          */
1254         if (resultRelInfo->ri_ConstraintExprs == NULL)
1255         {
1256                 oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
1257                 resultRelInfo->ri_ConstraintExprs =
1258                         (List **) palloc(ncheck * sizeof(List *));
1259                 for (i = 0; i < ncheck; i++)
1260                 {
1261                         /* ExecQual wants implicit-AND form */
1262                         qual = make_ands_implicit(stringToNode(check[i].ccbin));
1263                         resultRelInfo->ri_ConstraintExprs[i] = (List *)
1264                                 ExecPrepareExpr((Expr *) qual, estate);
1265                 }
1266                 MemoryContextSwitchTo(oldContext);
1267         }
1268
1269         /*
1270          * We will use the EState's per-tuple context for evaluating constraint
1271          * expressions (creating it if it's not already there).
1272          */
1273         econtext = GetPerTupleExprContext(estate);
1274
1275         /* Arrange for econtext's scan tuple to be the tuple under test */
1276         econtext->ecxt_scantuple = slot;
1277
1278         /* And evaluate the constraints */
1279         for (i = 0; i < ncheck; i++)
1280         {
1281                 qual = resultRelInfo->ri_ConstraintExprs[i];
1282
1283                 /*
1284                  * NOTE: SQL92 specifies that a NULL result from a constraint
1285                  * expression is not to be treated as a failure.  Therefore, tell
1286                  * ExecQual to return TRUE for NULL.
1287                  */
1288                 if (!ExecQual(qual, econtext, true))
1289                         return check[i].ccname;
1290         }
1291
1292         /* NULL result means no error */
1293         return NULL;
1294 }
1295
1296 void
1297 ExecConstraints(ResultRelInfo *resultRelInfo,
1298                                 TupleTableSlot *slot, EState *estate)
1299 {
1300         Relation        rel = resultRelInfo->ri_RelationDesc;
1301         TupleConstr *constr = rel->rd_att->constr;
1302
1303         Assert(constr);
1304
1305         if (constr->has_not_null)
1306         {
1307                 int                     natts = rel->rd_att->natts;
1308                 int                     attrChk;
1309
1310                 for (attrChk = 1; attrChk <= natts; attrChk++)
1311                 {
1312                         if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1313                                 slot_attisnull(slot, attrChk))
1314                                 ereport(ERROR,
1315                                                 (errcode(ERRCODE_NOT_NULL_VIOLATION),
1316                                                  errmsg("null value in column \"%s\" violates not-null constraint",
1317                                                 NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1318                 }
1319         }
1320
1321         if (constr->num_check > 0)
1322         {
1323                 const char *failed;
1324
1325                 if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1326                         ereport(ERROR,
1327                                         (errcode(ERRCODE_CHECK_VIOLATION),
1328                                          errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1329                                                         RelationGetRelationName(rel), failed)));
1330         }
1331 }
1332
1333 /*
1334  * Check a modified tuple to see if we want to process its updated version
1335  * under READ COMMITTED rules.
1336  *
1337  * See backend/executor/README for some info about how this works.
1338  *
1339  *      estate - executor state data
1340  *      rti - rangetable index of table containing tuple
1341  *      subplanstate - portion of plan tree that needs to be re-evaluated
1342  *      *tid - t_ctid from the outdated tuple (ie, next updated version)
1343  *      priorXmax - t_xmax from the outdated tuple
1344  *
1345  * *tid is also an output parameter: it's modified to hold the TID of the
1346  * latest version of the tuple (note this may be changed even on failure)
1347  *
1348  * Returns a slot containing the new candidate update/delete tuple, or
1349  * NULL if we determine we shouldn't process the row.
1350  */
1351 TupleTableSlot *
1352 EvalPlanQual(EState *estate, Index rti,
1353                          PlanState *subplanstate,
1354                          ItemPointer tid, TransactionId priorXmax)
1355 {
1356         TupleTableSlot *slot;
1357         HeapTuple       copyTuple;
1358
1359         Assert(rti != 0);
1360
1361         /*
1362          * Get the updated version of the row; if fail, return NULL.
1363          */
1364         copyTuple = EvalPlanQualFetch(estate, rti, tid, priorXmax);
1365
1366         if (copyTuple == NULL)
1367                 return NULL;
1368
1369         /*
1370          * For UPDATE/DELETE we have to return tid of actual row we're executing
1371          * PQ for.
1372          */
1373         *tid = copyTuple->t_self;
1374
1375         /*
1376          * Need to run a recheck subquery.      Find or create a PQ stack entry.
1377          */
1378         EvalPlanQualPush(estate, rti, subplanstate);
1379
1380         /*
1381          * free old RTE' tuple, if any, and store target tuple where relation's
1382          * scan node will see it
1383          */
1384         EvalPlanQualSetTuple(estate, rti, copyTuple);
1385
1386         /*
1387          * Run the EPQ query, but just for one tuple.
1388          */
1389         slot = EvalPlanQualNext(estate);
1390
1391         /*
1392          * If we got a result, we must copy it out of the EPQ query's local
1393          * context before we shut down the EPQ query.
1394          */
1395         if (TupIsNull(slot))
1396                 slot = NULL;                    /* in case we got back an empty slot */
1397         else
1398         {
1399                 TupleDesc tupdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
1400                 evalPlanQual *epq = estate->es_evalPlanQual;
1401
1402                 if (epq->resultslot == NULL)
1403                 {
1404                         epq->resultslot = ExecInitExtraTupleSlot(estate);
1405                         ExecSetSlotDescriptor(epq->resultslot, tupdesc);
1406                 }
1407                 else
1408                 {
1409                         TupleDesc oldtupdesc = epq->resultslot->tts_tupleDescriptor;
1410
1411                         ExecSetSlotDescriptor(epq->resultslot, tupdesc);
1412                         FreeTupleDesc(oldtupdesc);
1413                 }
1414
1415                 slot = ExecCopySlot(epq->resultslot, slot);
1416         }
1417
1418         /*
1419          * Shut it down ...
1420          */
1421         EvalPlanQualPop(estate, subplanstate);
1422
1423         return slot;
1424 }
1425
1426 /*
1427  * Fetch a copy of the newest version of an outdated tuple
1428  *
1429  *      estate - executor state data
1430  *      rti - rangetable index of table containing tuple
1431  *      *tid - t_ctid from the outdated tuple (ie, next updated version)
1432  *      priorXmax - t_xmax from the outdated tuple
1433  *
1434  * Returns a palloc'd copy of the newest tuple version, or NULL if we find
1435  * that there is no newest version (ie, the row was deleted not updated).
1436  *
1437  * XXX this does not lock the new row version ... wouldn't it be better if
1438  * it did?  As-is, caller might have to repeat all its work.
1439  */
1440 HeapTuple
1441 EvalPlanQualFetch(EState *estate, Index rti,
1442                                   ItemPointer tid, TransactionId priorXmax)
1443 {
1444         HeapTuple       copyTuple = NULL;
1445         Relation        relation;
1446         HeapTupleData tuple;
1447         SnapshotData SnapshotDirty;
1448
1449         Assert(rti != 0);
1450
1451         /*
1452          * Find relation containing target tuple --- must be either a result
1453          * relation of the query, or a SELECT FOR UPDATE target
1454          */
1455         if (estate->es_result_relation_info != NULL &&
1456                 estate->es_result_relation_info->ri_RangeTableIndex == rti)
1457                 relation = estate->es_result_relation_info->ri_RelationDesc;
1458         else
1459         {
1460                 ListCell   *l;
1461
1462                 relation = NULL;
1463                 foreach(l, estate->es_rowMarks)
1464                 {
1465                         ExecRowMark *erm = lfirst(l);
1466
1467                         if (erm->rti == rti)
1468                         {
1469                                 relation = erm->relation;
1470                                 break;
1471                         }
1472                 }
1473                 if (relation == NULL)
1474                         elog(ERROR, "could not find RowMark for RT index %u", rti);
1475         }
1476
1477         /*
1478          * fetch tid tuple
1479          *
1480          * Loop here to deal with updated or busy tuples
1481          */
1482         InitDirtySnapshot(SnapshotDirty);
1483         tuple.t_self = *tid;
1484         for (;;)
1485         {
1486                 Buffer          buffer;
1487
1488                 if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
1489                 {
1490                         /*
1491                          * If xmin isn't what we're expecting, the slot must have been
1492                          * recycled and reused for an unrelated tuple.  This implies that
1493                          * the latest version of the row was deleted, so we need do
1494                          * nothing.  (Should be safe to examine xmin without getting
1495                          * buffer's content lock, since xmin never changes in an existing
1496                          * tuple.)
1497                          */
1498                         if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
1499                                                                          priorXmax))
1500                         {
1501                                 ReleaseBuffer(buffer);
1502                                 return NULL;
1503                         }
1504
1505                         /* otherwise xmin should not be dirty... */
1506                         if (TransactionIdIsValid(SnapshotDirty.xmin))
1507                                 elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
1508
1509                         /*
1510                          * If tuple is being updated by other transaction then we have to
1511                          * wait for its commit/abort.
1512                          */
1513                         if (TransactionIdIsValid(SnapshotDirty.xmax))
1514                         {
1515                                 ReleaseBuffer(buffer);
1516                                 XactLockTableWait(SnapshotDirty.xmax);
1517                                 continue;               /* loop back to repeat heap_fetch */
1518                         }
1519
1520                         /*
1521                          * If tuple was inserted by our own transaction, we have to check
1522                          * cmin against es_output_cid: cmin >= current CID means our
1523                          * command cannot see the tuple, so we should ignore it.  Without
1524                          * this we are open to the "Halloween problem" of indefinitely
1525                          * re-updating the same tuple. (We need not check cmax because
1526                          * HeapTupleSatisfiesDirty will consider a tuple deleted by our
1527                          * transaction dead, regardless of cmax.)  We just checked that
1528                          * priorXmax == xmin, so we can test that variable instead of
1529                          * doing HeapTupleHeaderGetXmin again.
1530                          */
1531                         if (TransactionIdIsCurrentTransactionId(priorXmax) &&
1532                                 HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
1533                         {
1534                                 ReleaseBuffer(buffer);
1535                                 return NULL;
1536                         }
1537
1538                         /*
1539                          * We got tuple - now copy it for use by recheck query.
1540                          */
1541                         copyTuple = heap_copytuple(&tuple);
1542                         ReleaseBuffer(buffer);
1543                         break;
1544                 }
1545
1546                 /*
1547                  * If the referenced slot was actually empty, the latest version of
1548                  * the row must have been deleted, so we need do nothing.
1549                  */
1550                 if (tuple.t_data == NULL)
1551                 {
1552                         ReleaseBuffer(buffer);
1553                         return NULL;
1554                 }
1555
1556                 /*
1557                  * As above, if xmin isn't what we're expecting, do nothing.
1558                  */
1559                 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
1560                                                                  priorXmax))
1561                 {
1562                         ReleaseBuffer(buffer);
1563                         return NULL;
1564                 }
1565
1566                 /*
1567                  * If we get here, the tuple was found but failed SnapshotDirty.
1568                  * Assuming the xmin is either a committed xact or our own xact (as it
1569                  * certainly should be if we're trying to modify the tuple), this must
1570                  * mean that the row was updated or deleted by either a committed xact
1571                  * or our own xact.  If it was deleted, we can ignore it; if it was
1572                  * updated then chain up to the next version and repeat the whole
1573                  * test.
1574                  *
1575                  * As above, it should be safe to examine xmax and t_ctid without the
1576                  * buffer content lock, because they can't be changing.
1577                  */
1578                 if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
1579                 {
1580                         /* deleted, so forget about it */
1581                         ReleaseBuffer(buffer);
1582                         return NULL;
1583                 }
1584
1585                 /* updated, so look at the updated row */
1586                 tuple.t_self = tuple.t_data->t_ctid;
1587                 /* updated row should have xmin matching this xmax */
1588                 priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
1589                 ReleaseBuffer(buffer);
1590                 /* loop back to fetch next in chain */
1591         }
1592
1593         /*
1594          * Return the copied tuple
1595          */
1596         return copyTuple;
1597 }
1598
1599 /*
1600  * Push a new level of EPQ state, and prepare to execute the given subplan
1601  */
1602 void
1603 EvalPlanQualPush(EState *estate, Index rti, PlanState *subplanstate)
1604 {
1605         evalPlanQual *epq;
1606         bool            endNode;
1607
1608         Assert(rti != 0);
1609
1610         epq = estate->es_evalPlanQual;
1611         endNode = true;
1612
1613         if (epq != NULL && epq->rti == 0)
1614         {
1615                 /* Top PQ stack entry is idle, so re-use it */
1616                 Assert(epq->next == NULL);
1617                 epq->rti = rti;
1618                 endNode = false;
1619         }
1620
1621         /*
1622          * If this is request for another RTE - Ra, - then we have to check wasn't
1623          * PlanQual requested for Ra already and if so then Ra' row was updated
1624          * again and we have to re-start old execution for Ra and forget all what
1625          * we done after Ra was suspended. Cool? -:))
1626          */
1627         if (epq != NULL && epq->rti != rti &&
1628                 epq->estate->es_evTuple[rti - 1] != NULL)
1629         {
1630                 do
1631                 {
1632                         evalPlanQual *oldepq;
1633
1634                         /* stop execution */
1635                         EvalPlanQualStop(epq);
1636                         /* pop previous PlanQual from the stack */
1637                         oldepq = epq->next;
1638                         Assert(oldepq && oldepq->rti != 0);
1639                         /* push current PQ to freePQ stack */
1640                         oldepq->free = epq;
1641                         epq = oldepq;
1642                         estate->es_evalPlanQual = epq;
1643                 } while (epq->rti != rti);
1644         }
1645
1646         /*
1647          * If we are requested for another RTE then we have to suspend execution
1648          * of current PlanQual and start execution for new one.
1649          */
1650         if (epq == NULL || epq->rti != rti)
1651         {
1652                 /* try to reuse plan used previously */
1653                 evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL;
1654
1655                 if (newepq == NULL)             /* first call or freePQ stack is empty */
1656                 {
1657                         newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual));
1658                         newepq->free = NULL;
1659                         newepq->estate = NULL;
1660                         newepq->planstate = NULL;
1661                         newepq->origplanstate = NULL;
1662                         newepq->resultslot = NULL;
1663                 }
1664                 else
1665                 {
1666                         /* recycle previously used PlanQual */
1667                         Assert(newepq->estate == NULL);
1668                         epq->free = NULL;
1669                 }
1670                 /* push current PQ to the stack */
1671                 newepq->next = epq;
1672                 epq = newepq;
1673                 estate->es_evalPlanQual = epq;
1674                 epq->rti = rti;
1675                 endNode = false;
1676         }
1677
1678         Assert(epq->rti == rti);
1679         Assert(estate->es_evalPlanQual == epq);
1680
1681         /*
1682          * Ok - we're requested for the same RTE.  Unfortunately we still have to
1683          * end and restart execution of the plan, because ExecReScan wouldn't
1684          * ensure that upper plan nodes would reset themselves.  We could make
1685          * that work if insertion of the target tuple were integrated with the
1686          * Param mechanism somehow, so that the upper plan nodes know that their
1687          * children's outputs have changed.
1688          *
1689          * Note that the stack of free evalPlanQual nodes is quite useless at the
1690          * moment, since it only saves us from pallocing/releasing the
1691          * evalPlanQual nodes themselves.  But it will be useful once we implement
1692          * ReScan instead of end/restart for re-using PlanQual nodes.
1693          */
1694         if (endNode)
1695         {
1696                 /* stop execution */
1697                 EvalPlanQualStop(epq);
1698         }
1699
1700         /*
1701          * Initialize new recheck query.
1702          *
1703          * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to
1704          * instead copy down changeable state from the top plan (including
1705          * es_result_relation_info) and reset locally changeable
1706          * state in the epq (including es_param_exec_vals, es_evTupleNull).
1707          */
1708         epq->origplanstate = subplanstate;
1709         EvalPlanQualStart(epq, estate, subplanstate->plan, epq->next);
1710 }
1711
1712 /*
1713  * Install one test tuple into current EPQ level
1714  */
1715 void
1716 EvalPlanQualSetTuple(EState *estate, Index rti, HeapTuple tuple)
1717 {
1718         evalPlanQual *epq = estate->es_evalPlanQual;
1719         EState     *epqstate;
1720
1721         Assert(rti != 0);
1722
1723         /*
1724          * free old RTE' tuple, if any, and store target tuple where relation's
1725          * scan node will see it
1726          */
1727         epqstate = epq->estate;
1728         if (epqstate->es_evTuple[rti - 1] != NULL)
1729                 heap_freetuple(epqstate->es_evTuple[rti - 1]);
1730         epqstate->es_evTuple[rti - 1] = tuple;
1731 }
1732
1733 /*
1734  * Fetch the next row (if any) from EvalPlanQual testing
1735  */
1736 TupleTableSlot *
1737 EvalPlanQualNext(EState *estate)
1738 {
1739         evalPlanQual *epq = estate->es_evalPlanQual;
1740         MemoryContext oldcontext;
1741         TupleTableSlot *slot;
1742
1743         Assert(epq->rti != 0);
1744
1745         oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt);
1746         slot = ExecProcNode(epq->planstate);
1747         MemoryContextSwitchTo(oldcontext);
1748
1749         return slot;
1750 }
1751
1752 /*
1753  * Shut down and pop the specified level of EvalPlanQual machinery,
1754  * plus any levels nested within it
1755  */
1756 void
1757 EvalPlanQualPop(EState *estate, PlanState *subplanstate)
1758 {
1759         evalPlanQual *epq = estate->es_evalPlanQual;
1760
1761         for (;;)
1762         {
1763                 PlanState *epqplanstate = epq->origplanstate;
1764                 evalPlanQual *oldepq;
1765
1766                 Assert(epq->rti != 0);
1767
1768                 /* stop execution */
1769                 EvalPlanQualStop(epq);
1770                 epq->origplanstate = NULL;
1771                 /* pop old PQ from the stack */
1772                 oldepq = epq->next;
1773                 if (oldepq == NULL)
1774                 {
1775                         /* this is the first (oldest) PQ - mark as free */
1776                         epq->rti = 0;
1777                         break;
1778                 }
1779                 Assert(oldepq->rti != 0);
1780                 /* push current PQ to freePQ stack */
1781                 oldepq->free = epq;
1782                 epq = oldepq;
1783                 estate->es_evalPlanQual = epq;
1784                 if (epqplanstate == subplanstate)
1785                         break;
1786         }
1787 }
1788
1789 static void
1790 EndEvalPlanQual(EState *estate)
1791 {
1792         evalPlanQual *epq = estate->es_evalPlanQual;
1793
1794         if (epq->rti == 0)                      /* plans already shutdowned */
1795         {
1796                 Assert(epq->next == NULL);
1797                 return;
1798         }
1799
1800         for (;;)
1801         {
1802                 evalPlanQual *oldepq;
1803
1804                 /* stop execution */
1805                 EvalPlanQualStop(epq);
1806                 epq->origplanstate = NULL;
1807                 /* pop old PQ from the stack */
1808                 oldepq = epq->next;
1809                 if (oldepq == NULL)
1810                 {
1811                         /* this is the first (oldest) PQ - mark as free */
1812                         epq->rti = 0;
1813                         break;
1814                 }
1815                 Assert(oldepq->rti != 0);
1816                 /* push current PQ to freePQ stack */
1817                 oldepq->free = epq;
1818                 epq = oldepq;
1819                 estate->es_evalPlanQual = epq;
1820         }
1821 }
1822
1823 /*
1824  * Start execution of one level of PlanQual.
1825  *
1826  * This is a cut-down version of ExecutorStart(): we copy some state from
1827  * the top-level estate rather than initializing it fresh.
1828  */
1829 static void
1830 EvalPlanQualStart(evalPlanQual *epq, EState *estate, Plan *planTree,
1831                                   evalPlanQual *priorepq)
1832 {
1833         EState     *epqstate;
1834         int                     rtsize;
1835         MemoryContext oldcontext;
1836         ListCell   *l;
1837
1838         rtsize = list_length(estate->es_range_table);
1839
1840         epq->estate = epqstate = CreateExecutorState();
1841
1842         oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
1843
1844         /*
1845          * The epqstates share the top query's copy of unchanging state such as
1846          * the snapshot, rangetable, result-rel info, and external Param info.
1847          * They need their own copies of local state, including a tuple table,
1848          * es_param_exec_vals, etc.
1849          */
1850         epqstate->es_direction = ForwardScanDirection;
1851         epqstate->es_snapshot = estate->es_snapshot;
1852         epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot;
1853         epqstate->es_range_table = estate->es_range_table;
1854         epqstate->es_junkFilter = estate->es_junkFilter;
1855         epqstate->es_output_cid = estate->es_output_cid;
1856         epqstate->es_result_relations = estate->es_result_relations;
1857         epqstate->es_num_result_relations = estate->es_num_result_relations;
1858         epqstate->es_result_relation_info = estate->es_result_relation_info;
1859         /* es_trig_target_relations must NOT be copied */
1860         epqstate->es_param_list_info = estate->es_param_list_info;
1861         if (estate->es_plannedstmt->nParamExec > 0)
1862                 epqstate->es_param_exec_vals = (ParamExecData *)
1863                         palloc0(estate->es_plannedstmt->nParamExec * sizeof(ParamExecData));
1864         epqstate->es_rowMarks = estate->es_rowMarks;
1865         epqstate->es_instrument = estate->es_instrument;
1866         epqstate->es_select_into = estate->es_select_into;
1867         epqstate->es_into_oids = estate->es_into_oids;
1868         epqstate->es_plannedstmt = estate->es_plannedstmt;
1869
1870         /*
1871          * Each epqstate must have its own es_evTupleNull state, but all the stack
1872          * entries share es_evTuple state.      This allows sub-rechecks to inherit
1873          * the value being examined by an outer recheck.
1874          */
1875         epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool));
1876         if (priorepq == NULL)
1877                 /* first PQ stack entry */
1878                 epqstate->es_evTuple = (HeapTuple *)
1879                         palloc0(rtsize * sizeof(HeapTuple));
1880         else
1881                 /* later stack entries share the same storage */
1882                 epqstate->es_evTuple = priorepq->estate->es_evTuple;
1883
1884         /*
1885          * Each epqstate also has its own tuple table.
1886          */
1887         epqstate->es_tupleTable = NIL;
1888
1889         /*
1890          * Initialize private state information for each SubPlan.  We must do this
1891          * before running ExecInitNode on the main query tree, since
1892          * ExecInitSubPlan expects to be able to find these entries.
1893          * Some of the SubPlans might not be used in the part of the plan tree
1894          * we intend to run, but since it's not easy to tell which, we just
1895          * initialize them all.
1896          */
1897         Assert(epqstate->es_subplanstates == NIL);
1898         foreach(l, estate->es_plannedstmt->subplans)
1899         {
1900                 Plan       *subplan = (Plan *) lfirst(l);
1901                 PlanState  *subplanstate;
1902
1903                 subplanstate = ExecInitNode(subplan, epqstate, 0);
1904
1905                 epqstate->es_subplanstates = lappend(epqstate->es_subplanstates,
1906                                                                                          subplanstate);
1907         }
1908
1909         /*
1910          * Initialize the private state information for all the nodes in the
1911          * part of the plan tree we need to run.  This opens files, allocates
1912          * storage and leaves us ready to start processing tuples.
1913          */
1914         epq->planstate = ExecInitNode(planTree, epqstate, 0);
1915
1916         MemoryContextSwitchTo(oldcontext);
1917 }
1918
1919 /*
1920  * End execution of one level of PlanQual.
1921  *
1922  * This is a cut-down version of ExecutorEnd(); basically we want to do most
1923  * of the normal cleanup, but *not* close result relations (which we are
1924  * just sharing from the outer query).  We do, however, have to close any
1925  * trigger target relations that got opened, since those are not shared.
1926  */
1927 static void
1928 EvalPlanQualStop(evalPlanQual *epq)
1929 {
1930         EState     *epqstate = epq->estate;
1931         MemoryContext oldcontext;
1932         ListCell   *l;
1933
1934         oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
1935
1936         ExecEndNode(epq->planstate);
1937
1938         foreach(l, epqstate->es_subplanstates)
1939         {
1940                 PlanState  *subplanstate = (PlanState *) lfirst(l);
1941
1942                 ExecEndNode(subplanstate);
1943         }
1944
1945         /* throw away the per-epqstate tuple table completely */
1946         ExecResetTupleTable(epqstate->es_tupleTable, true);
1947         epqstate->es_tupleTable = NIL;
1948
1949         if (epqstate->es_evTuple[epq->rti - 1] != NULL)
1950         {
1951                 heap_freetuple(epqstate->es_evTuple[epq->rti - 1]);
1952                 epqstate->es_evTuple[epq->rti - 1] = NULL;
1953         }
1954
1955         foreach(l, epqstate->es_trig_target_relations)
1956         {
1957                 ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l);
1958
1959                 /* Close indices and then the relation itself */
1960                 ExecCloseIndices(resultRelInfo);
1961                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1962         }
1963
1964         MemoryContextSwitchTo(oldcontext);
1965
1966         FreeExecutorState(epqstate);
1967
1968         epq->estate = NULL;
1969         epq->planstate = NULL;
1970 }
1971
1972
1973 /*
1974  * Support for SELECT INTO (a/k/a CREATE TABLE AS)
1975  *
1976  * We implement SELECT INTO by diverting SELECT's normal output with
1977  * a specialized DestReceiver type.
1978  */
1979
1980 typedef struct
1981 {
1982         DestReceiver pub;                       /* publicly-known function pointers */
1983         EState     *estate;                     /* EState we are working with */
1984         Relation        rel;                    /* Relation to write to */
1985         int                     hi_options;             /* heap_insert performance options */
1986         BulkInsertState bistate;        /* bulk insert state */
1987 } DR_intorel;
1988
1989 /*
1990  * OpenIntoRel --- actually create the SELECT INTO target relation
1991  *
1992  * This also replaces QueryDesc->dest with the special DestReceiver for
1993  * SELECT INTO.  We assume that the correct result tuple type has already
1994  * been placed in queryDesc->tupDesc.
1995  */
1996 static void
1997 OpenIntoRel(QueryDesc *queryDesc)
1998 {
1999         IntoClause *into = queryDesc->plannedstmt->intoClause;
2000         EState     *estate = queryDesc->estate;
2001         Relation        intoRelationDesc;
2002         char       *intoName;
2003         Oid                     namespaceId;
2004         Oid                     tablespaceId;
2005         Datum           reloptions;
2006         AclResult       aclresult;
2007         Oid                     intoRelationId;
2008         TupleDesc       tupdesc;
2009         DR_intorel *myState;
2010         static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
2011
2012         Assert(into);
2013
2014         /*
2015          * Check consistency of arguments
2016          */
2017         if (into->onCommit != ONCOMMIT_NOOP && !into->rel->istemp)
2018                 ereport(ERROR,
2019                                 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
2020                                  errmsg("ON COMMIT can only be used on temporary tables")));
2021
2022         /*
2023          * Find namespace to create in, check its permissions
2024          */
2025         intoName = into->rel->relname;
2026         namespaceId = RangeVarGetCreationNamespace(into->rel);
2027
2028         aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
2029                                                                           ACL_CREATE);
2030         if (aclresult != ACLCHECK_OK)
2031                 aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
2032                                            get_namespace_name(namespaceId));
2033
2034         /*
2035          * Select tablespace to use.  If not specified, use default tablespace
2036          * (which may in turn default to database's default).
2037          */
2038         if (into->tableSpaceName)
2039         {
2040                 tablespaceId = get_tablespace_oid(into->tableSpaceName);
2041                 if (!OidIsValid(tablespaceId))
2042                         ereport(ERROR,
2043                                         (errcode(ERRCODE_UNDEFINED_OBJECT),
2044                                          errmsg("tablespace \"%s\" does not exist",
2045                                                         into->tableSpaceName)));
2046         }
2047         else
2048         {
2049                 tablespaceId = GetDefaultTablespace(into->rel->istemp);
2050                 /* note InvalidOid is OK in this case */
2051         }
2052
2053         /* Check permissions except when using the database's default space */
2054         if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
2055         {
2056                 AclResult       aclresult;
2057
2058                 aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
2059                                                                                    ACL_CREATE);
2060
2061                 if (aclresult != ACLCHECK_OK)
2062                         aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
2063                                                    get_tablespace_name(tablespaceId));
2064         }
2065
2066         /* Parse and validate any reloptions */
2067         reloptions = transformRelOptions((Datum) 0,
2068                                                                          into->options,
2069                                                                          NULL,
2070                                                                          validnsps,
2071                                                                          true,
2072                                                                          false);
2073         (void) heap_reloptions(RELKIND_RELATION, reloptions, true);
2074
2075         /* Copy the tupdesc because heap_create_with_catalog modifies it */
2076         tupdesc = CreateTupleDescCopy(queryDesc->tupDesc);
2077
2078         /* Now we can actually create the new relation */
2079         intoRelationId = heap_create_with_catalog(intoName,
2080                                                                                           namespaceId,
2081                                                                                           tablespaceId,
2082                                                                                           InvalidOid,
2083                                                                                           InvalidOid,
2084                                                                                           GetUserId(),
2085                                                                                           tupdesc,
2086                                                                                           NIL,
2087                                                                                           RELKIND_RELATION,
2088                                                                                           false,
2089                                                                                           true,
2090                                                                                           0,
2091                                                                                           into->onCommit,
2092                                                                                           reloptions,
2093                                                                                           true,
2094                                                                                           allowSystemTableMods);
2095
2096         FreeTupleDesc(tupdesc);
2097
2098         /*
2099          * Advance command counter so that the newly-created relation's catalog
2100          * tuples will be visible to heap_open.
2101          */
2102         CommandCounterIncrement();
2103
2104         /*
2105          * If necessary, create a TOAST table for the INTO relation. Note that
2106          * AlterTableCreateToastTable ends with CommandCounterIncrement(), so that
2107          * the TOAST table will be visible for insertion.
2108          */
2109         reloptions = transformRelOptions((Datum) 0,
2110                                                                          into->options,
2111                                                                          "toast",
2112                                                                          validnsps,
2113                                                                          true,
2114                                                                          false);
2115
2116         (void) heap_reloptions(RELKIND_TOASTVALUE, reloptions, true);
2117
2118         AlterTableCreateToastTable(intoRelationId, InvalidOid, reloptions, false);
2119
2120         /*
2121          * And open the constructed table for writing.
2122          */
2123         intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
2124
2125         /*
2126          * Now replace the query's DestReceiver with one for SELECT INTO
2127          */
2128         queryDesc->dest = CreateDestReceiver(DestIntoRel);
2129         myState = (DR_intorel *) queryDesc->dest;
2130         Assert(myState->pub.mydest == DestIntoRel);
2131         myState->estate = estate;
2132         myState->rel = intoRelationDesc;
2133
2134         /*
2135          * We can skip WAL-logging the insertions, unless PITR is in use.  We can
2136          * skip the FSM in any case.
2137          */
2138         myState->hi_options = HEAP_INSERT_SKIP_FSM |
2139                 (XLogArchivingActive() ? 0 : HEAP_INSERT_SKIP_WAL);
2140         myState->bistate = GetBulkInsertState();
2141
2142         /* Not using WAL requires rd_targblock be initially invalid */
2143         Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);
2144 }
2145
2146 /*
2147  * CloseIntoRel --- clean up SELECT INTO at ExecutorEnd time
2148  */
2149 static void
2150 CloseIntoRel(QueryDesc *queryDesc)
2151 {
2152         DR_intorel *myState = (DR_intorel *) queryDesc->dest;
2153
2154         /* OpenIntoRel might never have gotten called */
2155         if (myState && myState->pub.mydest == DestIntoRel && myState->rel)
2156         {
2157                 FreeBulkInsertState(myState->bistate);
2158
2159                 /* If we skipped using WAL, must heap_sync before commit */
2160                 if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
2161                         heap_sync(myState->rel);
2162
2163                 /* close rel, but keep lock until commit */
2164                 heap_close(myState->rel, NoLock);
2165
2166                 myState->rel = NULL;
2167         }
2168 }
2169
2170 /*
2171  * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
2172  */
2173 DestReceiver *
2174 CreateIntoRelDestReceiver(void)
2175 {
2176         DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel));
2177
2178         self->pub.receiveSlot = intorel_receive;
2179         self->pub.rStartup = intorel_startup;
2180         self->pub.rShutdown = intorel_shutdown;
2181         self->pub.rDestroy = intorel_destroy;
2182         self->pub.mydest = DestIntoRel;
2183
2184         /* private fields will be set by OpenIntoRel */
2185
2186         return (DestReceiver *) self;
2187 }
2188
2189 /*
2190  * intorel_startup --- executor startup
2191  */
2192 static void
2193 intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
2194 {
2195         /* no-op */
2196 }
2197
2198 /*
2199  * intorel_receive --- receive one tuple
2200  */
2201 static void
2202 intorel_receive(TupleTableSlot *slot, DestReceiver *self)
2203 {
2204         DR_intorel *myState = (DR_intorel *) self;
2205         HeapTuple       tuple;
2206
2207         /*
2208          * get the heap tuple out of the tuple table slot, making sure we have a
2209          * writable copy
2210          */
2211         tuple = ExecMaterializeSlot(slot);
2212
2213         /*
2214          * force assignment of new OID (see comments in ExecInsert)
2215          */
2216         if (myState->rel->rd_rel->relhasoids)
2217                 HeapTupleSetOid(tuple, InvalidOid);
2218
2219         heap_insert(myState->rel,
2220                                 tuple,
2221                                 myState->estate->es_output_cid,
2222                                 myState->hi_options,
2223                                 myState->bistate);
2224
2225         /* We know this is a newly created relation, so there are no indexes */
2226 }
2227
2228 /*
2229  * intorel_shutdown --- executor end
2230  */
2231 static void
2232 intorel_shutdown(DestReceiver *self)
2233 {
2234         /* no-op */
2235 }
2236
2237 /*
2238  * intorel_destroy --- release DestReceiver object
2239  */
2240 static void
2241 intorel_destroy(DestReceiver *self)
2242 {
2243         pfree(self);
2244 }