src/backend/executor/execMain.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * execMain.c
   4  *        top level executor interface routines
   5  *
   6  * INTERFACE ROUTINES
   7  *      ExecutorStart()
   8  *      ExecutorRun()
   9  *      ExecutorFinish()
  10  *      ExecutorEnd()
  11  *
  12  *      These four procedures are the external interface to the executor.
  13  *      In each case, the query descriptor is required as an argument.
  14  *
  15  *      ExecutorStart must be called at the beginning of execution of any
  16  *      query plan and ExecutorEnd must always be called at the end of
  17  *      execution of a plan (unless it is aborted due to error).
  18  *
  19  *      ExecutorRun accepts direction and count arguments that specify whether
  20  *      the plan is to be executed forwards, backwards, and for how many tuples.
  21  *      In some cases ExecutorRun may be called multiple times to process all
  22  *      the tuples for a plan.  It is also acceptable to stop short of executing
  23  *      the whole plan (but only if it is a SELECT).
  24  *
  25  *      ExecutorFinish must be called after the final ExecutorRun call and
  26  *      before ExecutorEnd.  This can be omitted only in case of EXPLAIN,
  27  *      which should also omit ExecutorRun.
  28  *
  29  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
  30  * Portions Copyright (c) 1994, Regents of the University of California
  31  *
  32  *
  33  * IDENTIFICATION
  34  *        src/backend/executor/execMain.c
  35  *
  36  *-------------------------------------------------------------------------
  37  */
  38 #include "postgres.h"
  39
  40 #include "access/reloptions.h"
  41 #include "access/sysattr.h"
  42 #include "access/transam.h"
  43 #include "access/xact.h"
  44 #include "catalog/heap.h"
  45 #include "catalog/namespace.h"
  46 #include "catalog/toasting.h"
  47 #include "commands/tablespace.h"
  48 #include "commands/trigger.h"
  49 #include "executor/execdebug.h"
  50 #include "executor/instrument.h"
  51 #include "miscadmin.h"
  52 #include "optimizer/clauses.h"
  53 #include "parser/parse_clause.h"
  54 #include "parser/parsetree.h"
  55 #include "storage/bufmgr.h"
  56 #include "storage/lmgr.h"
  57 #include "storage/smgr.h"
  58 #include "tcop/utility.h"
  59 #include "utils/acl.h"
  60 #include "utils/lsyscache.h"
  61 #include "utils/memutils.h"
  62 #include "utils/snapmgr.h"
  63 #include "utils/tqual.h"
  64
  65
  66 /* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */
  67 ExecutorStart_hook_type ExecutorStart_hook = NULL;
  68 ExecutorRun_hook_type ExecutorRun_hook = NULL;
  69 ExecutorFinish_hook_type ExecutorFinish_hook = NULL;
  70 ExecutorEnd_hook_type ExecutorEnd_hook = NULL;
  71
  72 /* Hook for plugin to get control in ExecCheckRTPerms() */
  73 ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook = NULL;
  74
  75 /* decls for local routines only used within this module */
  76 static void InitPlan(QueryDesc *queryDesc, int eflags);
  77 static void ExecPostprocessPlan(EState *estate);
  78 static void ExecEndPlan(PlanState *planstate, EState *estate);
  79 static void ExecutePlan(EState *estate, PlanState *planstate,
  80                         CmdType operation,
  81                         bool sendTuples,
  82                         long numberTuples,
  83                         ScanDirection direction,
  84                         DestReceiver *dest);
  85 static bool ExecCheckRTEPerms(RangeTblEntry *rte);
  86 static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
  87 static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate,
  88                                   Plan *planTree);
  89 static void OpenIntoRel(QueryDesc *queryDesc);
  90 static void CloseIntoRel(QueryDesc *queryDesc);
  91 static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
  92 static void intorel_receive(TupleTableSlot *slot, DestReceiver *self);
  93 static void intorel_shutdown(DestReceiver *self);
  94 static void intorel_destroy(DestReceiver *self);
  95
  96 /* end of local decls */
  97
  98
  99 /* ----------------------------------------------------------------
 100  *              ExecutorStart
 101  *
 102  *              This routine must be called at the beginning of any execution of any
 103  *              query plan
 104  *
 105  * Takes a QueryDesc previously created by CreateQueryDesc (which is separate
 106  * only because some places use QueryDescs for utility commands).  The tupDesc
 107  * field of the QueryDesc is filled in to describe the tuples that will be
 108  * returned, and the internal fields (estate and planstate) are set up.
 109  *
 110  * eflags contains flag bits as described in executor.h.
 111  *
 112  * NB: the CurrentMemoryContext when this is called will become the parent
 113  * of the per-query context used for this Executor invocation.
 114  *
 115  * We provide a function hook variable that lets loadable plugins
 116  * get control when ExecutorStart is called.  Such a plugin would
 117  * normally call standard_ExecutorStart().
 118  *
 119  * ----------------------------------------------------------------
 120  */
 121 void
 122 ExecutorStart(QueryDesc *queryDesc, int eflags)
 123 {
 124         if (ExecutorStart_hook)
 125                 (*ExecutorStart_hook) (queryDesc, eflags);
 126         else
 127                 standard_ExecutorStart(queryDesc, eflags);
 128 }
 129
 130 void
 131 standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
 132 {
 133         EState     *estate;
 134         MemoryContext oldcontext;
 135
 136         /* sanity checks: queryDesc must not be started already */
 137         Assert(queryDesc != NULL);
 138         Assert(queryDesc->estate == NULL);
 139
 140         /*
 141          * If the transaction is read-only, we need to check if any writes are
 142          * planned to non-temporary tables.  EXPLAIN is considered read-only.
 143          */
 144         if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
 145                 ExecCheckXactReadOnly(queryDesc->plannedstmt);
 146
 147         /*
 148          * Build EState, switch into per-query memory context for startup.
 149          */
 150         estate = CreateExecutorState();
 151         queryDesc->estate = estate;
 152
 153         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 154
 155         /*
 156          * Fill in external parameters, if any, from queryDesc; and allocate
 157          * workspace for internal parameters
 158          */
 159         estate->es_param_list_info = queryDesc->params;
 160
 161         if (queryDesc->plannedstmt->nParamExec > 0)
 162                 estate->es_param_exec_vals = (ParamExecData *)
 163                         palloc0(queryDesc->plannedstmt->nParamExec * sizeof(ParamExecData));
 164
 165         /*
 166          * If non-read-only query, set the command ID to mark output tuples with
 167          */
 168         switch (queryDesc->operation)
 169         {
 170                 case CMD_SELECT:
 171
 172                         /*
 173                          * SELECT INTO, SELECT FOR UPDATE/SHARE and modifying CTEs need to
 174                          * mark tuples
 175                          */
 176                         if (queryDesc->plannedstmt->intoClause != NULL ||
 177                                 queryDesc->plannedstmt->rowMarks != NIL ||
 178                                 queryDesc->plannedstmt->hasModifyingCTE)
 179                                 estate->es_output_cid = GetCurrentCommandId(true);
 180
 181                         /*
 182                          * A SELECT without modifying CTEs can't possibly queue triggers,
 183                          * so force skip-triggers mode. This is just a marginal efficiency
 184                          * hack, since AfterTriggerBeginQuery/AfterTriggerEndQuery aren't
 185                          * all that expensive, but we might as well do it.
 186                          */
 187                         if (!queryDesc->plannedstmt->hasModifyingCTE)
 188                                 eflags |= EXEC_FLAG_SKIP_TRIGGERS;
 189                         break;
 190
 191                 case CMD_INSERT:
 192                 case CMD_DELETE:
 193                 case CMD_UPDATE:
 194                         estate->es_output_cid = GetCurrentCommandId(true);
 195                         break;
 196
 197                 default:
 198                         elog(ERROR, "unrecognized operation code: %d",
 199                                  (int) queryDesc->operation);
 200                         break;
 201         }
 202
 203         /*
 204          * Copy other important information into the EState
 205          */
 206         estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot);
 207         estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot);
 208         estate->es_top_eflags = eflags;
 209         estate->es_instrument = queryDesc->instrument_options;
 210
 211         /*
 212          * Initialize the plan state tree
 213          */
 214         InitPlan(queryDesc, eflags);
 215
 216         /*
 217          * Set up an AFTER-trigger statement context, unless told not to, or
 218          * unless it's EXPLAIN-only mode (when ExecutorFinish won't be called).
 219          */
 220         if (!(eflags & (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY)))
 221                 AfterTriggerBeginQuery();
 222
 223         MemoryContextSwitchTo(oldcontext);
 224 }
 225
 226 /* ----------------------------------------------------------------
 227  *              ExecutorRun
 228  *
 229  *              This is the main routine of the executor module. It accepts
 230  *              the query descriptor from the traffic cop and executes the
 231  *              query plan.
 232  *
 233  *              ExecutorStart must have been called already.
 234  *
 235  *              If direction is NoMovementScanDirection then nothing is done
 236  *              except to start up/shut down the destination.  Otherwise,
 237  *              we retrieve up to 'count' tuples in the specified direction.
 238  *
 239  *              Note: count = 0 is interpreted as no portal limit, i.e., run to
 240  *              completion.
 241  *
 242  *              There is no return value, but output tuples (if any) are sent to
 243  *              the destination receiver specified in the QueryDesc; and the number
 244  *              of tuples processed at the top level can be found in
 245  *              estate->es_processed.
 246  *
 247  *              We provide a function hook variable that lets loadable plugins
 248  *              get control when ExecutorRun is called.  Such a plugin would
 249  *              normally call standard_ExecutorRun().
 250  *
 251  * ----------------------------------------------------------------
 252  */
 253 void
 254 ExecutorRun(QueryDesc *queryDesc,
 255                         ScanDirection direction, long count)
 256 {
 257         if (ExecutorRun_hook)
 258                 (*ExecutorRun_hook) (queryDesc, direction, count);
 259         else
 260                 standard_ExecutorRun(queryDesc, direction, count);
 261 }
 262
 263 void
 264 standard_ExecutorRun(QueryDesc *queryDesc,
 265                                          ScanDirection direction, long count)
 266 {
 267         EState     *estate;
 268         CmdType         operation;
 269         DestReceiver *dest;
 270         bool            sendTuples;
 271         MemoryContext oldcontext;
 272
 273         /* sanity checks */
 274         Assert(queryDesc != NULL);
 275
 276         estate = queryDesc->estate;
 277
 278         Assert(estate != NULL);
 279         Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
 280
 281         /*
 282          * Switch into per-query memory context
 283          */
 284         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 285
 286         /* Allow instrumentation of Executor overall runtime */
 287         if (queryDesc->totaltime)
 288                 InstrStartNode(queryDesc->totaltime);
 289
 290         /*
 291          * extract information from the query descriptor and the query feature.
 292          */
 293         operation = queryDesc->operation;
 294         dest = queryDesc->dest;
 295
 296         /*
 297          * startup tuple receiver, if we will be emitting tuples
 298          */
 299         estate->es_processed = 0;
 300         estate->es_lastoid = InvalidOid;
 301
 302         sendTuples = (operation == CMD_SELECT ||
 303                                   queryDesc->plannedstmt->hasReturning);
 304
 305         if (sendTuples)
 306                 (*dest->rStartup) (dest, operation, queryDesc->tupDesc);
 307
 308         /*
 309          * run plan
 310          */
 311         if (!ScanDirectionIsNoMovement(direction))
 312                 ExecutePlan(estate,
 313                                         queryDesc->planstate,
 314                                         operation,
 315                                         sendTuples,
 316                                         count,
 317                                         direction,
 318                                         dest);
 319
 320         /*
 321          * shutdown tuple receiver, if we started it
 322          */
 323         if (sendTuples)
 324                 (*dest->rShutdown) (dest);
 325
 326         if (queryDesc->totaltime)
 327                 InstrStopNode(queryDesc->totaltime, estate->es_processed);
 328
 329         MemoryContextSwitchTo(oldcontext);
 330 }
 331
 332 /* ----------------------------------------------------------------
 333  *              ExecutorFinish
 334  *
 335  *              This routine must be called after the last ExecutorRun call.
 336  *              It performs cleanup such as firing AFTER triggers.      It is
 337  *              separate from ExecutorEnd because EXPLAIN ANALYZE needs to
 338  *              include these actions in the total runtime.
 339  *
 340  *              We provide a function hook variable that lets loadable plugins
 341  *              get control when ExecutorFinish is called.      Such a plugin would
 342  *              normally call standard_ExecutorFinish().
 343  *
 344  * ----------------------------------------------------------------
 345  */
 346 void
 347 ExecutorFinish(QueryDesc *queryDesc)
 348 {
 349         if (ExecutorFinish_hook)
 350                 (*ExecutorFinish_hook) (queryDesc);
 351         else
 352                 standard_ExecutorFinish(queryDesc);
 353 }
 354
 355 void
 356 standard_ExecutorFinish(QueryDesc *queryDesc)
 357 {
 358         EState     *estate;
 359         MemoryContext oldcontext;
 360
 361         /* sanity checks */
 362         Assert(queryDesc != NULL);
 363
 364         estate = queryDesc->estate;
 365
 366         Assert(estate != NULL);
 367         Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
 368
 369         /* This should be run once and only once per Executor instance */
 370         Assert(!estate->es_finished);
 371
 372         /* Switch into per-query memory context */
 373         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 374
 375         /* Allow instrumentation of Executor overall runtime */
 376         if (queryDesc->totaltime)
 377                 InstrStartNode(queryDesc->totaltime);
 378
 379         /* Run ModifyTable nodes to completion */
 380         ExecPostprocessPlan(estate);
 381
 382         /* Execute queued AFTER triggers, unless told not to */
 383         if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS))
 384                 AfterTriggerEndQuery(estate);
 385
 386         if (queryDesc->totaltime)
 387                 InstrStopNode(queryDesc->totaltime, 0);
 388
 389         MemoryContextSwitchTo(oldcontext);
 390
 391         estate->es_finished = true;
 392 }
 393
 394 /* ----------------------------------------------------------------
 395  *              ExecutorEnd
 396  *
 397  *              This routine must be called at the end of execution of any
 398  *              query plan
 399  *
 400  *              We provide a function hook variable that lets loadable plugins
 401  *              get control when ExecutorEnd is called.  Such a plugin would
 402  *              normally call standard_ExecutorEnd().
 403  *
 404  * ----------------------------------------------------------------
 405  */
 406 void
 407 ExecutorEnd(QueryDesc *queryDesc)
 408 {
 409         if (ExecutorEnd_hook)
 410                 (*ExecutorEnd_hook) (queryDesc);
 411         else
 412                 standard_ExecutorEnd(queryDesc);
 413 }
 414
 415 void
 416 standard_ExecutorEnd(QueryDesc *queryDesc)
 417 {
 418         EState     *estate;
 419         MemoryContext oldcontext;
 420
 421         /* sanity checks */
 422         Assert(queryDesc != NULL);
 423
 424         estate = queryDesc->estate;
 425
 426         Assert(estate != NULL);
 427
 428         /*
 429          * Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This
 430          * Assert is needed because ExecutorFinish is new as of 9.1, and callers
 431          * might forget to call it.
 432          */
 433         Assert(estate->es_finished ||
 434                    (estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
 435
 436         /*
 437          * Switch into per-query memory context to run ExecEndPlan
 438          */
 439         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 440
 441         ExecEndPlan(queryDesc->planstate, estate);
 442
 443         /*
 444          * Close the SELECT INTO relation if any
 445          */
 446         if (estate->es_select_into)
 447                 CloseIntoRel(queryDesc);
 448
 449         /* do away with our snapshots */
 450         UnregisterSnapshot(estate->es_snapshot);
 451         UnregisterSnapshot(estate->es_crosscheck_snapshot);
 452
 453         /*
 454          * Must switch out of context before destroying it
 455          */
 456         MemoryContextSwitchTo(oldcontext);
 457
 458         /*
 459          * Release EState and per-query memory context.  This should release
 460          * everything the executor has allocated.
 461          */
 462         FreeExecutorState(estate);
 463
 464         /* Reset queryDesc fields that no longer point to anything */
 465         queryDesc->tupDesc = NULL;
 466         queryDesc->estate = NULL;
 467         queryDesc->planstate = NULL;
 468         queryDesc->totaltime = NULL;
 469 }
 470
 471 /* ----------------------------------------------------------------
 472  *              ExecutorRewind
 473  *
 474  *              This routine may be called on an open queryDesc to rewind it
 475  *              to the start.
 476  * ----------------------------------------------------------------
 477  */
 478 void
 479 ExecutorRewind(QueryDesc *queryDesc)
 480 {
 481         EState     *estate;
 482         MemoryContext oldcontext;
 483
 484         /* sanity checks */
 485         Assert(queryDesc != NULL);
 486
 487         estate = queryDesc->estate;
 488
 489         Assert(estate != NULL);
 490
 491         /* It's probably not sensible to rescan updating queries */
 492         Assert(queryDesc->operation == CMD_SELECT);
 493
 494         /*
 495          * Switch into per-query memory context
 496          */
 497         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 498
 499         /*
 500          * rescan plan
 501          */
 502         ExecReScan(queryDesc->planstate);
 503
 504         MemoryContextSwitchTo(oldcontext);
 505 }
 506
 507
 508 /*
 509  * ExecCheckRTPerms
 510  *              Check access permissions for all relations listed in a range table.
 511  *
 512  * Returns true if permissions are adequate.  Otherwise, throws an appropriate
 513  * error if ereport_on_violation is true, or simply returns false otherwise.
 514  */
 515 bool
 516 ExecCheckRTPerms(List *rangeTable, bool ereport_on_violation)
 517 {
 518         ListCell   *l;
 519         bool            result = true;
 520
 521         foreach(l, rangeTable)
 522         {
 523                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
 524
 525                 result = ExecCheckRTEPerms(rte);
 526                 if (!result)
 527                 {
 528                         Assert(rte->rtekind == RTE_RELATION);
 529                         if (ereport_on_violation)
 530                                 aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
 531                                                            get_rel_name(rte->relid));
 532                         return false;
 533                 }
 534         }
 535
 536         if (ExecutorCheckPerms_hook)
 537                 result = (*ExecutorCheckPerms_hook) (rangeTable,
 538                                                                                          ereport_on_violation);
 539         return result;
 540 }
 541
 542 /*
 543  * ExecCheckRTEPerms
 544  *              Check access permissions for a single RTE.
 545  */
 546 static bool
 547 ExecCheckRTEPerms(RangeTblEntry *rte)
 548 {
 549         AclMode         requiredPerms;
 550         AclMode         relPerms;
 551         AclMode         remainingPerms;
 552         Oid                     relOid;
 553         Oid                     userid;
 554         Bitmapset  *tmpset;
 555         int                     col;
 556
 557         /*
 558          * Only plain-relation RTEs need to be checked here.  Function RTEs are
 559          * checked by init_fcache when the function is prepared for execution.
 560          * Join, subquery, and special RTEs need no checks.
 561          */
 562         if (rte->rtekind != RTE_RELATION)
 563                 return true;
 564
 565         /*
 566          * No work if requiredPerms is empty.
 567          */
 568         requiredPerms = rte->requiredPerms;
 569         if (requiredPerms == 0)
 570                 return true;
 571
 572         relOid = rte->relid;
 573
 574         /*
 575          * userid to check as: current user unless we have a setuid indication.
 576          *
 577          * Note: GetUserId() is presently fast enough that there's no harm in
 578          * calling it separately for each RTE.  If that stops being true, we could
 579          * call it once in ExecCheckRTPerms and pass the userid down from there.
 580          * But for now, no need for the extra clutter.
 581          */
 582         userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
 583
 584         /*
 585          * We must have *all* the requiredPerms bits, but some of the bits can be
 586          * satisfied from column-level rather than relation-level permissions.
 587          * First, remove any bits that are satisfied by relation permissions.
 588          */
 589         relPerms = pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL);
 590         remainingPerms = requiredPerms & ~relPerms;
 591         if (remainingPerms != 0)
 592         {
 593                 /*
 594                  * If we lack any permissions that exist only as relation permissions,
 595                  * we can fail straight away.
 596                  */
 597                 if (remainingPerms & ~(ACL_SELECT | ACL_INSERT | ACL_UPDATE))
 598                         return false;
 599
 600                 /*
 601                  * Check to see if we have the needed privileges at column level.
 602                  *
 603                  * Note: failures just report a table-level error; it would be nicer
 604                  * to report a column-level error if we have some but not all of the
 605                  * column privileges.
 606                  */
 607                 if (remainingPerms & ACL_SELECT)
 608                 {
 609                         /*
 610                          * When the query doesn't explicitly reference any columns (for
 611                          * example, SELECT COUNT(*) FROM table), allow the query if we
 612                          * have SELECT on any column of the rel, as per SQL spec.
 613                          */
 614                         if (bms_is_empty(rte->selectedCols))
 615                         {
 616                                 if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
 617                                                                                           ACLMASK_ANY) != ACLCHECK_OK)
 618                                         return false;
 619                         }
 620
 621                         tmpset = bms_copy(rte->selectedCols);
 622                         while ((col = bms_first_member(tmpset)) >= 0)
 623                         {
 624                                 /* remove the column number offset */
 625                                 col += FirstLowInvalidHeapAttributeNumber;
 626                                 if (col == InvalidAttrNumber)
 627                                 {
 628                                         /* Whole-row reference, must have priv on all cols */
 629                                         if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
 630                                                                                                   ACLMASK_ALL) != ACLCHECK_OK)
 631                                                 return false;
 632                                 }
 633                                 else
 634                                 {
 635                                         if (pg_attribute_aclcheck(relOid, col, userid,
 636                                                                                           ACL_SELECT) != ACLCHECK_OK)
 637                                                 return false;
 638                                 }
 639                         }
 640                         bms_free(tmpset);
 641                 }
 642
 643                 /*
 644                  * Basically the same for the mod columns, with either INSERT or
 645                  * UPDATE privilege as specified by remainingPerms.
 646                  */
 647                 remainingPerms &= ~ACL_SELECT;
 648                 if (remainingPerms != 0)
 649                 {
 650                         /*
 651                          * When the query doesn't explicitly change any columns, allow the
 652                          * query if we have permission on any column of the rel.  This is
 653                          * to handle SELECT FOR UPDATE as well as possible corner cases in
 654                          * INSERT and UPDATE.
 655                          */
 656                         if (bms_is_empty(rte->modifiedCols))
 657                         {
 658                                 if (pg_attribute_aclcheck_all(relOid, userid, remainingPerms,
 659                                                                                           ACLMASK_ANY) != ACLCHECK_OK)
 660                                         return false;
 661                         }
 662
 663                         tmpset = bms_copy(rte->modifiedCols);
 664                         while ((col = bms_first_member(tmpset)) >= 0)
 665                         {
 666                                 /* remove the column number offset */
 667                                 col += FirstLowInvalidHeapAttributeNumber;
 668                                 if (col == InvalidAttrNumber)
 669                                 {
 670                                         /* whole-row reference can't happen here */
 671                                         elog(ERROR, "whole-row update is not implemented");
 672                                 }
 673                                 else
 674                                 {
 675                                         if (pg_attribute_aclcheck(relOid, col, userid,
 676                                                                                           remainingPerms) != ACLCHECK_OK)
 677                                                 return false;
 678                                 }
 679                         }
 680                         bms_free(tmpset);
 681                 }
 682         }
 683         return true;
 684 }
 685
 686 /*
 687  * Check that the query does not imply any writes to non-temp tables.
 688  *
 689  * Note: in a Hot Standby slave this would need to reject writes to temp
 690  * tables as well; but an HS slave can't have created any temp tables
 691  * in the first place, so no need to check that.
 692  */
 693 static void
 694 ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
 695 {
 696         ListCell   *l;
 697
 698         /*
 699          * CREATE TABLE AS or SELECT INTO?
 700          *
 701          * XXX should we allow this if the destination is temp?  Considering that
 702          * it would still require catalog changes, probably not.
 703          */
 704         if (plannedstmt->intoClause != NULL)
 705                 PreventCommandIfReadOnly(CreateCommandTag((Node *) plannedstmt));
 706
 707         /* Fail if write permissions are requested on any non-temp table */
 708         foreach(l, plannedstmt->rtable)
 709         {
 710                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
 711
 712                 if (rte->rtekind != RTE_RELATION)
 713                         continue;
 714
 715                 if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
 716                         continue;
 717
 718                 if (isTempNamespace(get_rel_namespace(rte->relid)))
 719                         continue;
 720
 721                 PreventCommandIfReadOnly(CreateCommandTag((Node *) plannedstmt));
 722         }
 723 }
 724
 725
 726 /* ----------------------------------------------------------------
 727  *              InitPlan
 728  *
 729  *              Initializes the query plan: open files, allocate storage
 730  *              and start up the rule manager
 731  * ----------------------------------------------------------------
 732  */
 733 static void
 734 InitPlan(QueryDesc *queryDesc, int eflags)
 735 {
 736         CmdType         operation = queryDesc->operation;
 737         PlannedStmt *plannedstmt = queryDesc->plannedstmt;
 738         Plan       *plan = plannedstmt->planTree;
 739         List       *rangeTable = plannedstmt->rtable;
 740         EState     *estate = queryDesc->estate;
 741         PlanState  *planstate;
 742         TupleDesc       tupType;
 743         ListCell   *l;
 744         int                     i;
 745
 746         /*
 747          * Do permissions checks
 748          */
 749         ExecCheckRTPerms(rangeTable, true);
 750
 751         /*
 752          * initialize the node's execution state
 753          */
 754         estate->es_range_table = rangeTable;
 755         estate->es_plannedstmt = plannedstmt;
 756
 757         /*
 758          * initialize result relation stuff, and open/lock the result rels.
 759          *
 760          * We must do this before initializing the plan tree, else we might try to
 761          * do a lock upgrade if a result rel is also a source rel.
 762          */
 763         if (plannedstmt->resultRelations)
 764         {
 765                 List       *resultRelations = plannedstmt->resultRelations;
 766                 int                     numResultRelations = list_length(resultRelations);
 767                 ResultRelInfo *resultRelInfos;
 768                 ResultRelInfo *resultRelInfo;
 769
 770                 resultRelInfos = (ResultRelInfo *)
 771                         palloc(numResultRelations * sizeof(ResultRelInfo));
 772                 resultRelInfo = resultRelInfos;
 773                 foreach(l, resultRelations)
 774                 {
 775                         Index           resultRelationIndex = lfirst_int(l);
 776                         Oid                     resultRelationOid;
 777                         Relation        resultRelation;
 778
 779                         resultRelationOid = getrelid(resultRelationIndex, rangeTable);
 780                         resultRelation = heap_open(resultRelationOid, RowExclusiveLock);
 781                         InitResultRelInfo(resultRelInfo,
 782                                                           resultRelation,
 783                                                           resultRelationIndex,
 784                                                           estate->es_instrument);
 785                         resultRelInfo++;
 786                 }
 787                 estate->es_result_relations = resultRelInfos;
 788                 estate->es_num_result_relations = numResultRelations;
 789                 /* es_result_relation_info is NULL except when within ModifyTable */
 790                 estate->es_result_relation_info = NULL;
 791         }
 792         else
 793         {
 794                 /*
 795                  * if no result relation, then set state appropriately
 796                  */
 797                 estate->es_result_relations = NULL;
 798                 estate->es_num_result_relations = 0;
 799                 estate->es_result_relation_info = NULL;
 800         }
 801
 802         /*
 803          * Similarly, we have to lock relations selected FOR UPDATE/FOR SHARE
 804          * before we initialize the plan tree, else we'd be risking lock upgrades.
 805          * While we are at it, build the ExecRowMark list.
 806          */
 807         estate->es_rowMarks = NIL;
 808         foreach(l, plannedstmt->rowMarks)
 809         {
 810                 PlanRowMark *rc = (PlanRowMark *) lfirst(l);
 811                 Oid                     relid;
 812                 Relation        relation;
 813                 ExecRowMark *erm;
 814
 815                 /* ignore "parent" rowmarks; they are irrelevant at runtime */
 816                 if (rc->isParent)
 817                         continue;
 818
 819                 switch (rc->markType)
 820                 {
 821                         case ROW_MARK_EXCLUSIVE:
 822                         case ROW_MARK_SHARE:
 823                                 relid = getrelid(rc->rti, rangeTable);
 824                                 relation = heap_open(relid, RowShareLock);
 825                                 break;
 826                         case ROW_MARK_REFERENCE:
 827                                 relid = getrelid(rc->rti, rangeTable);
 828                                 relation = heap_open(relid, AccessShareLock);
 829                                 break;
 830                         case ROW_MARK_COPY:
 831                                 /* there's no real table here ... */
 832                                 relation = NULL;
 833                                 break;
 834                         default:
 835                                 elog(ERROR, "unrecognized markType: %d", rc->markType);
 836                                 relation = NULL;        /* keep compiler quiet */
 837                                 break;
 838                 }
 839
 840                 /* if foreign table, tuples can't be locked */
 841                 if (relation && relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
 842                         ereport(ERROR,
 843                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 844                                          errmsg("SELECT FOR UPDATE/SHARE cannot be used with foreign table \"%s\"",
 845                                                         RelationGetRelationName(relation))));
 846
 847                 erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
 848                 erm->relation = relation;
 849                 erm->rti = rc->rti;
 850                 erm->prti = rc->prti;
 851                 erm->rowmarkId = rc->rowmarkId;
 852                 erm->markType = rc->markType;
 853                 erm->noWait = rc->noWait;
 854                 ItemPointerSetInvalid(&(erm->curCtid));
 855                 estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
 856         }
 857
 858         /*
 859          * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
 860          * flag appropriately so that the plan tree will be initialized with the
 861          * correct tuple descriptors.  (Other SELECT INTO stuff comes later.)
 862          */
 863         estate->es_select_into = false;
 864         if (operation == CMD_SELECT && plannedstmt->intoClause != NULL)
 865         {
 866                 estate->es_select_into = true;
 867                 estate->es_into_oids = interpretOidsOption(plannedstmt->intoClause->options);
 868         }
 869
 870         /*
 871          * Initialize the executor's tuple table to empty.
 872          */
 873         estate->es_tupleTable = NIL;
 874         estate->es_trig_tuple_slot = NULL;
 875         estate->es_trig_oldtup_slot = NULL;
 876
 877         /* mark EvalPlanQual not active */
 878         estate->es_epqTuple = NULL;
 879         estate->es_epqTupleSet = NULL;
 880         estate->es_epqScanDone = NULL;
 881
 882         /*
 883          * Initialize private state information for each SubPlan.  We must do this
 884          * before running ExecInitNode on the main query tree, since
 885          * ExecInitSubPlan expects to be able to find these entries.
 886          */
 887         Assert(estate->es_subplanstates == NIL);
 888         i = 1;                                          /* subplan indices count from 1 */
 889         foreach(l, plannedstmt->subplans)
 890         {
 891                 Plan       *subplan = (Plan *) lfirst(l);
 892                 PlanState  *subplanstate;
 893                 int                     sp_eflags;
 894
 895                 /*
 896                  * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
 897                  * it is a parameterless subplan (not initplan), we suggest that it be
 898                  * prepared to handle REWIND efficiently; otherwise there is no need.
 899                  */
 900                 sp_eflags = eflags & EXEC_FLAG_EXPLAIN_ONLY;
 901                 if (bms_is_member(i, plannedstmt->rewindPlanIDs))
 902                         sp_eflags |= EXEC_FLAG_REWIND;
 903
 904                 subplanstate = ExecInitNode(subplan, estate, sp_eflags);
 905
 906                 estate->es_subplanstates = lappend(estate->es_subplanstates,
 907                                                                                    subplanstate);
 908
 909                 i++;
 910         }
 911
 912         /*
 913          * Initialize the private state information for all the nodes in the query
 914          * tree.  This opens files, allocates storage and leaves us ready to start
 915          * processing tuples.
 916          */
 917         planstate = ExecInitNode(plan, estate, eflags);
 918
 919         /*
 920          * Get the tuple descriptor describing the type of tuples to return. (this
 921          * is especially important if we are creating a relation with "SELECT
 922          * INTO")
 923          */
 924         tupType = ExecGetResultType(planstate);
 925
 926         /*
 927          * Initialize the junk filter if needed.  SELECT queries need a filter if
 928          * there are any junk attrs in the top-level tlist.
 929          */
 930         if (operation == CMD_SELECT)
 931         {
 932                 bool            junk_filter_needed = false;
 933                 ListCell   *tlist;
 934
 935                 foreach(tlist, plan->targetlist)
 936                 {
 937                         TargetEntry *tle = (TargetEntry *) lfirst(tlist);
 938
 939                         if (tle->resjunk)
 940                         {
 941                                 junk_filter_needed = true;
 942                                 break;
 943                         }
 944                 }
 945
 946                 if (junk_filter_needed)
 947                 {
 948                         JunkFilter *j;
 949
 950                         j = ExecInitJunkFilter(planstate->plan->targetlist,
 951                                                                    tupType->tdhasoid,
 952                                                                    ExecInitExtraTupleSlot(estate));
 953                         estate->es_junkFilter = j;
 954
 955                         /* Want to return the cleaned tuple type */
 956                         tupType = j->jf_cleanTupType;
 957                 }
 958         }
 959
 960         queryDesc->tupDesc = tupType;
 961         queryDesc->planstate = planstate;
 962
 963         /*
 964          * If doing SELECT INTO, initialize the "into" relation.  We must wait
 965          * till now so we have the "clean" result tuple type to create the new
 966          * table from.
 967          *
 968          * If EXPLAIN, skip creating the "into" relation.
 969          */
 970         if (estate->es_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
 971                 OpenIntoRel(queryDesc);
 972 }
 973
 974 /*
 975  * Check that a proposed result relation is a legal target for the operation
 976  *
 977  * In most cases parser and/or planner should have noticed this already, but
 978  * let's make sure.  In the view case we do need a test here, because if the
 979  * view wasn't rewritten by a rule, it had better have an INSTEAD trigger.
 980  */
 981 void
 982 CheckValidResultRel(Relation resultRel, CmdType operation)
 983 {
 984         TriggerDesc *trigDesc = resultRel->trigdesc;
 985
 986         switch (resultRel->rd_rel->relkind)
 987         {
 988                 case RELKIND_RELATION:
 989                         /* OK */
 990                         break;
 991                 case RELKIND_SEQUENCE:
 992                         ereport(ERROR,
 993                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
 994                                          errmsg("cannot change sequence \"%s\"",
 995                                                         RelationGetRelationName(resultRel))));
 996                         break;
 997                 case RELKIND_TOASTVALUE:
 998                         ereport(ERROR,
 999                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1000                                          errmsg("cannot change TOAST relation \"%s\"",
1001                                                         RelationGetRelationName(resultRel))));
1002                         break;
1003                 case RELKIND_VIEW:
1004                         switch (operation)
1005                         {
1006                                 case CMD_INSERT:
1007                                         if (!trigDesc || !trigDesc->trig_insert_instead_row)
1008                                                 ereport(ERROR,
1009                                                   (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1010                                                    errmsg("cannot insert into view \"%s\"",
1011                                                                   RelationGetRelationName(resultRel)),
1012                                                    errhint("You need an unconditional ON INSERT DO INSTEAD rule or an INSTEAD OF INSERT trigger.")));
1013                                         break;
1014                                 case CMD_UPDATE:
1015                                         if (!trigDesc || !trigDesc->trig_update_instead_row)
1016                                                 ereport(ERROR,
1017                                                   (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1018                                                    errmsg("cannot update view \"%s\"",
1019                                                                   RelationGetRelationName(resultRel)),
1020                                                    errhint("You need an unconditional ON UPDATE DO INSTEAD rule or an INSTEAD OF UPDATE trigger.")));
1021                                         break;
1022                                 case CMD_DELETE:
1023                                         if (!trigDesc || !trigDesc->trig_delete_instead_row)
1024                                                 ereport(ERROR,
1025                                                   (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1026                                                    errmsg("cannot delete from view \"%s\"",
1027                                                                   RelationGetRelationName(resultRel)),
1028                                                    errhint("You need an unconditional ON DELETE DO INSTEAD rule or an INSTEAD OF DELETE trigger.")));
1029                                         break;
1030                                 default:
1031                                         elog(ERROR, "unrecognized CmdType: %d", (int) operation);
1032                                         break;
1033                         }
1034                         break;
1035                 case RELKIND_FOREIGN_TABLE:
1036                         ereport(ERROR,
1037                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1038                                          errmsg("cannot change foreign table \"%s\"",
1039                                                         RelationGetRelationName(resultRel))));
1040                         break;
1041                 default:
1042                         ereport(ERROR,
1043                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1044                                          errmsg("cannot change relation \"%s\"",
1045                                                         RelationGetRelationName(resultRel))));
1046                         break;
1047         }
1048 }
1049
1050 /*
1051  * Initialize ResultRelInfo data for one result relation
1052  *
1053  * Caution: before Postgres 9.1, this function included the relkind checking
1054  * that's now in CheckValidResultRel, and it also did ExecOpenIndices if
1055  * appropriate.  Be sure callers cover those needs.
1056  */
1057 void
1058 InitResultRelInfo(ResultRelInfo *resultRelInfo,
1059                                   Relation resultRelationDesc,
1060                                   Index resultRelationIndex,
1061                                   int instrument_options)
1062 {
1063         MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
1064         resultRelInfo->type = T_ResultRelInfo;
1065         resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
1066         resultRelInfo->ri_RelationDesc = resultRelationDesc;
1067         resultRelInfo->ri_NumIndices = 0;
1068         resultRelInfo->ri_IndexRelationDescs = NULL;
1069         resultRelInfo->ri_IndexRelationInfo = NULL;
1070         /* make a copy so as not to depend on relcache info not changing... */
1071         resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
1072         if (resultRelInfo->ri_TrigDesc)
1073         {
1074                 int                     n = resultRelInfo->ri_TrigDesc->numtriggers;
1075
1076                 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
1077                         palloc0(n * sizeof(FmgrInfo));
1078                 resultRelInfo->ri_TrigWhenExprs = (List **)
1079                         palloc0(n * sizeof(List *));
1080                 if (instrument_options)
1081                         resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options);
1082         }
1083         else
1084         {
1085                 resultRelInfo->ri_TrigFunctions = NULL;
1086                 resultRelInfo->ri_TrigWhenExprs = NULL;
1087                 resultRelInfo->ri_TrigInstrument = NULL;
1088         }
1089         resultRelInfo->ri_ConstraintExprs = NULL;
1090         resultRelInfo->ri_junkFilter = NULL;
1091         resultRelInfo->ri_projectReturning = NULL;
1092 }
1093
1094 /*
1095  *              ExecGetTriggerResultRel
1096  *
1097  * Get a ResultRelInfo for a trigger target relation.  Most of the time,
1098  * triggers are fired on one of the result relations of the query, and so
1099  * we can just return a member of the es_result_relations array.  (Note: in
1100  * self-join situations there might be multiple members with the same OID;
1101  * if so it doesn't matter which one we pick.)  However, it is sometimes
1102  * necessary to fire triggers on other relations; this happens mainly when an
1103  * RI update trigger queues additional triggers on other relations, which will
1104  * be processed in the context of the outer query.      For efficiency's sake,
1105  * we want to have a ResultRelInfo for those triggers too; that can avoid
1106  * repeated re-opening of the relation.  (It also provides a way for EXPLAIN
1107  * ANALYZE to report the runtimes of such triggers.)  So we make additional
1108  * ResultRelInfo's as needed, and save them in es_trig_target_relations.
1109  */
1110 ResultRelInfo *
1111 ExecGetTriggerResultRel(EState *estate, Oid relid)
1112 {
1113         ResultRelInfo *rInfo;
1114         int                     nr;
1115         ListCell   *l;
1116         Relation        rel;
1117         MemoryContext oldcontext;
1118
1119         /* First, search through the query result relations */
1120         rInfo = estate->es_result_relations;
1121         nr = estate->es_num_result_relations;
1122         while (nr > 0)
1123         {
1124                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1125                         return rInfo;
1126                 rInfo++;
1127                 nr--;
1128         }
1129         /* Nope, but maybe we already made an extra ResultRelInfo for it */
1130         foreach(l, estate->es_trig_target_relations)
1131         {
1132                 rInfo = (ResultRelInfo *) lfirst(l);
1133                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1134                         return rInfo;
1135         }
1136         /* Nope, so we need a new one */
1137
1138         /*
1139          * Open the target relation's relcache entry.  We assume that an
1140          * appropriate lock is still held by the backend from whenever the trigger
1141          * event got queued, so we need take no new lock here.  Also, we need not
1142          * recheck the relkind, so no need for CheckValidResultRel.
1143          */
1144         rel = heap_open(relid, NoLock);
1145
1146         /*
1147          * Make the new entry in the right context.
1148          */
1149         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
1150         rInfo = makeNode(ResultRelInfo);
1151         InitResultRelInfo(rInfo,
1152                                           rel,
1153                                           0,            /* dummy rangetable index */
1154                                           estate->es_instrument);
1155         estate->es_trig_target_relations =
1156                 lappend(estate->es_trig_target_relations, rInfo);
1157         MemoryContextSwitchTo(oldcontext);
1158
1159         /*
1160          * Currently, we don't need any index information in ResultRelInfos used
1161          * only for triggers, so no need to call ExecOpenIndices.
1162          */
1163
1164         return rInfo;
1165 }
1166
1167 /*
1168  *              ExecContextForcesOids
1169  *
1170  * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
1171  * we need to ensure that result tuples have space for an OID iff they are
1172  * going to be stored into a relation that has OIDs.  In other contexts
1173  * we are free to choose whether to leave space for OIDs in result tuples
1174  * (we generally don't want to, but we do if a physical-tlist optimization
1175  * is possible).  This routine checks the plan context and returns TRUE if the
1176  * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
1177  * *hasoids is set to the required value.
1178  *
1179  * One reason this is ugly is that all plan nodes in the plan tree will emit
1180  * tuples with space for an OID, though we really only need the topmost node
1181  * to do so.  However, node types like Sort don't project new tuples but just
1182  * return their inputs, and in those cases the requirement propagates down
1183  * to the input node.  Eventually we might make this code smart enough to
1184  * recognize how far down the requirement really goes, but for now we just
1185  * make all plan nodes do the same thing if the top level forces the choice.
1186  *
1187  * We assume that if we are generating tuples for INSERT or UPDATE,
1188  * estate->es_result_relation_info is already set up to describe the target
1189  * relation.  Note that in an UPDATE that spans an inheritance tree, some of
1190  * the target relations may have OIDs and some not.  We have to make the
1191  * decisions on a per-relation basis as we initialize each of the subplans of
1192  * the ModifyTable node, so ModifyTable has to set es_result_relation_info
1193  * while initializing each subplan.
1194  *
1195  * SELECT INTO is even uglier, because we don't have the INTO relation's
1196  * descriptor available when this code runs; we have to look aside at a
1197  * flag set by InitPlan().
1198  */
1199 bool
1200 ExecContextForcesOids(PlanState *planstate, bool *hasoids)
1201 {
1202         ResultRelInfo *ri = planstate->state->es_result_relation_info;
1203
1204         if (ri != NULL)
1205         {
1206                 Relation        rel = ri->ri_RelationDesc;
1207
1208                 if (rel != NULL)
1209                 {
1210                         *hasoids = rel->rd_rel->relhasoids;
1211                         return true;
1212                 }
1213         }
1214
1215         if (planstate->state->es_select_into)
1216         {
1217                 *hasoids = planstate->state->es_into_oids;
1218                 return true;
1219         }
1220
1221         return false;
1222 }
1223
1224 /* ----------------------------------------------------------------
1225  *              ExecPostprocessPlan
1226  *
1227  *              Give plan nodes a final chance to execute before shutdown
1228  * ----------------------------------------------------------------
1229  */
1230 static void
1231 ExecPostprocessPlan(EState *estate)
1232 {
1233         ListCell   *lc;
1234
1235         /*
1236          * Make sure nodes run forward.
1237          */
1238         estate->es_direction = ForwardScanDirection;
1239
1240         /*
1241          * Run any secondary ModifyTable nodes to completion, in case the main
1242          * query did not fetch all rows from them.      (We do this to ensure that
1243          * such nodes have predictable results.)
1244          */
1245         foreach(lc, estate->es_auxmodifytables)
1246         {
1247                 PlanState  *ps = (PlanState *) lfirst(lc);
1248
1249                 for (;;)
1250                 {
1251                         TupleTableSlot *slot;
1252
1253                         /* Reset the per-output-tuple exprcontext each time */
1254                         ResetPerTupleExprContext(estate);
1255
1256                         slot = ExecProcNode(ps);
1257
1258                         if (TupIsNull(slot))
1259                                 break;
1260                 }
1261         }
1262 }
1263
1264 /* ----------------------------------------------------------------
1265  *              ExecEndPlan
1266  *
1267  *              Cleans up the query plan -- closes files and frees up storage
1268  *
1269  * NOTE: we are no longer very worried about freeing storage per se
1270  * in this code; FreeExecutorState should be guaranteed to release all
1271  * memory that needs to be released.  What we are worried about doing
1272  * is closing relations and dropping buffer pins.  Thus, for example,
1273  * tuple tables must be cleared or dropped to ensure pins are released.
1274  * ----------------------------------------------------------------
1275  */
1276 static void
1277 ExecEndPlan(PlanState *planstate, EState *estate)
1278 {
1279         ResultRelInfo *resultRelInfo;
1280         int                     i;
1281         ListCell   *l;
1282
1283         /*
1284          * shut down the node-type-specific query processing
1285          */
1286         ExecEndNode(planstate);
1287
1288         /*
1289          * for subplans too
1290          */
1291         foreach(l, estate->es_subplanstates)
1292         {
1293                 PlanState  *subplanstate = (PlanState *) lfirst(l);
1294
1295                 ExecEndNode(subplanstate);
1296         }
1297
1298         /*
1299          * destroy the executor's tuple table.  Actually we only care about
1300          * releasing buffer pins and tupdesc refcounts; there's no need to pfree
1301          * the TupleTableSlots, since the containing memory context is about to go
1302          * away anyway.
1303          */
1304         ExecResetTupleTable(estate->es_tupleTable, false);
1305
1306         /*
1307          * close the result relation(s) if any, but hold locks until xact commit.
1308          */
1309         resultRelInfo = estate->es_result_relations;
1310         for (i = estate->es_num_result_relations; i > 0; i--)
1311         {
1312                 /* Close indices and then the relation itself */
1313                 ExecCloseIndices(resultRelInfo);
1314                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1315                 resultRelInfo++;
1316         }
1317
1318         /*
1319          * likewise close any trigger target relations
1320          */
1321         foreach(l, estate->es_trig_target_relations)
1322         {
1323                 resultRelInfo = (ResultRelInfo *) lfirst(l);
1324                 /* Close indices and then the relation itself */
1325                 ExecCloseIndices(resultRelInfo);
1326                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1327         }
1328
1329         /*
1330          * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1331          */
1332         foreach(l, estate->es_rowMarks)
1333         {
1334                 ExecRowMark *erm = (ExecRowMark *) lfirst(l);
1335
1336                 if (erm->relation)
1337                         heap_close(erm->relation, NoLock);
1338         }
1339 }
1340
1341 /* ----------------------------------------------------------------
1342  *              ExecutePlan
1343  *
1344  *              Processes the query plan until we have processed 'numberTuples' tuples,
1345  *              moving in the specified direction.
1346  *
1347  *              Runs to completion if numberTuples is 0
1348  *
1349  * Note: the ctid attribute is a 'junk' attribute that is removed before the
1350  * user can see it
1351  * ----------------------------------------------------------------
1352  */
1353 static void
1354 ExecutePlan(EState *estate,
1355                         PlanState *planstate,
1356                         CmdType operation,
1357                         bool sendTuples,
1358                         long numberTuples,
1359                         ScanDirection direction,
1360                         DestReceiver *dest)
1361 {
1362         TupleTableSlot *slot;
1363         long            current_tuple_count;
1364
1365         /*
1366          * initialize local variables
1367          */
1368         current_tuple_count = 0;
1369
1370         /*
1371          * Set the direction.
1372          */
1373         estate->es_direction = direction;
1374
1375         /*
1376          * Loop until we've processed the proper number of tuples from the plan.
1377          */
1378         for (;;)
1379         {
1380                 /* Reset the per-output-tuple exprcontext */
1381                 ResetPerTupleExprContext(estate);
1382
1383                 /*
1384                  * Execute the plan and obtain a tuple
1385                  */
1386                 slot = ExecProcNode(planstate);
1387
1388                 /*
1389                  * if the tuple is null, then we assume there is nothing more to
1390                  * process so we just end the loop...
1391                  */
1392                 if (TupIsNull(slot))
1393                         break;
1394
1395                 /*
1396                  * If we have a junk filter, then project a new tuple with the junk
1397                  * removed.
1398                  *
1399                  * Store this new "clean" tuple in the junkfilter's resultSlot.
1400                  * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
1401                  * because that tuple slot has the wrong descriptor.)
1402                  */
1403                 if (estate->es_junkFilter != NULL)
1404                         slot = ExecFilterJunk(estate->es_junkFilter, slot);
1405
1406                 /*
1407                  * If we are supposed to send the tuple somewhere, do so. (In
1408                  * practice, this is probably always the case at this point.)
1409                  */
1410                 if (sendTuples)
1411                         (*dest->receiveSlot) (slot, dest);
1412
1413                 /*
1414                  * Count tuples processed, if this is a SELECT.  (For other operation
1415                  * types, the ModifyTable plan node must count the appropriate
1416                  * events.)
1417                  */
1418                 if (operation == CMD_SELECT)
1419                         (estate->es_processed)++;
1420
1421                 /*
1422                  * check our tuple count.. if we've processed the proper number then
1423                  * quit, else loop again and process more tuples.  Zero numberTuples
1424                  * means no limit.
1425                  */
1426                 current_tuple_count++;
1427                 if (numberTuples && numberTuples == current_tuple_count)
1428                         break;
1429         }
1430 }
1431
1432
1433 /*
1434  * ExecRelCheck --- check that tuple meets constraints for result relation
1435  */
1436 static const char *
1437 ExecRelCheck(ResultRelInfo *resultRelInfo,
1438                          TupleTableSlot *slot, EState *estate)
1439 {
1440         Relation        rel = resultRelInfo->ri_RelationDesc;
1441         int                     ncheck = rel->rd_att->constr->num_check;
1442         ConstrCheck *check = rel->rd_att->constr->check;
1443         ExprContext *econtext;
1444         MemoryContext oldContext;
1445         List       *qual;
1446         int                     i;
1447
1448         /*
1449          * If first time through for this result relation, build expression
1450          * nodetrees for rel's constraint expressions.  Keep them in the per-query
1451          * memory context so they'll survive throughout the query.
1452          */
1453         if (resultRelInfo->ri_ConstraintExprs == NULL)
1454         {
1455                 oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
1456                 resultRelInfo->ri_ConstraintExprs =
1457                         (List **) palloc(ncheck * sizeof(List *));
1458                 for (i = 0; i < ncheck; i++)
1459                 {
1460                         /* ExecQual wants implicit-AND form */
1461                         qual = make_ands_implicit(stringToNode(check[i].ccbin));
1462                         resultRelInfo->ri_ConstraintExprs[i] = (List *)
1463                                 ExecPrepareExpr((Expr *) qual, estate);
1464                 }
1465                 MemoryContextSwitchTo(oldContext);
1466         }
1467
1468         /*
1469          * We will use the EState's per-tuple context for evaluating constraint
1470          * expressions (creating it if it's not already there).
1471          */
1472         econtext = GetPerTupleExprContext(estate);
1473
1474         /* Arrange for econtext's scan tuple to be the tuple under test */
1475         econtext->ecxt_scantuple = slot;
1476
1477         /* And evaluate the constraints */
1478         for (i = 0; i < ncheck; i++)
1479         {
1480                 qual = resultRelInfo->ri_ConstraintExprs[i];
1481
1482                 /*
1483                  * NOTE: SQL92 specifies that a NULL result from a constraint
1484                  * expression is not to be treated as a failure.  Therefore, tell
1485                  * ExecQual to return TRUE for NULL.
1486                  */
1487                 if (!ExecQual(qual, econtext, true))
1488                         return check[i].ccname;
1489         }
1490
1491         /* NULL result means no error */
1492         return NULL;
1493 }
1494
1495 void
1496 ExecConstraints(ResultRelInfo *resultRelInfo,
1497                                 TupleTableSlot *slot, EState *estate)
1498 {
1499         Relation        rel = resultRelInfo->ri_RelationDesc;
1500         TupleConstr *constr = rel->rd_att->constr;
1501
1502         Assert(constr);
1503
1504         if (constr->has_not_null)
1505         {
1506                 int                     natts = rel->rd_att->natts;
1507                 int                     attrChk;
1508
1509                 for (attrChk = 1; attrChk <= natts; attrChk++)
1510                 {
1511                         if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1512                                 slot_attisnull(slot, attrChk))
1513                                 ereport(ERROR,
1514                                                 (errcode(ERRCODE_NOT_NULL_VIOLATION),
1515                                                  errmsg("null value in column \"%s\" violates not-null constraint",
1516                                                 NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1517                 }
1518         }
1519
1520         if (constr->num_check > 0)
1521         {
1522                 const char *failed;
1523
1524                 if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1525                         ereport(ERROR,
1526                                         (errcode(ERRCODE_CHECK_VIOLATION),
1527                                          errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1528                                                         RelationGetRelationName(rel), failed)));
1529         }
1530 }
1531
1532
1533 /*
1534  * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
1535  */
1536 ExecRowMark *
1537 ExecFindRowMark(EState *estate, Index rti)
1538 {
1539         ListCell   *lc;
1540
1541         foreach(lc, estate->es_rowMarks)
1542         {
1543                 ExecRowMark *erm = (ExecRowMark *) lfirst(lc);
1544
1545                 if (erm->rti == rti)
1546                         return erm;
1547         }
1548         elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
1549         return NULL;                            /* keep compiler quiet */
1550 }
1551
1552 /*
1553  * ExecBuildAuxRowMark -- create an ExecAuxRowMark struct
1554  *
1555  * Inputs are the underlying ExecRowMark struct and the targetlist of the
1556  * input plan node (not planstate node!).  We need the latter to find out
1557  * the column numbers of the resjunk columns.
1558  */
1559 ExecAuxRowMark *
1560 ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
1561 {
1562         ExecAuxRowMark *aerm = (ExecAuxRowMark *) palloc0(sizeof(ExecAuxRowMark));
1563         char            resname[32];
1564
1565         aerm->rowmark = erm;
1566
1567         /* Look up the resjunk columns associated with this rowmark */
1568         if (erm->relation)
1569         {
1570                 Assert(erm->markType != ROW_MARK_COPY);
1571
1572                 /* if child rel, need tableoid */
1573                 if (erm->rti != erm->prti)
1574                 {
1575                         snprintf(resname, sizeof(resname), "tableoid%u", erm->rowmarkId);
1576                         aerm->toidAttNo = ExecFindJunkAttributeInTlist(targetlist,
1577                                                                                                                    resname);
1578                         if (!AttributeNumberIsValid(aerm->toidAttNo))
1579                                 elog(ERROR, "could not find junk %s column", resname);
1580                 }
1581
1582                 /* always need ctid for real relations */
1583                 snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId);
1584                 aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist,
1585                                                                                                            resname);
1586                 if (!AttributeNumberIsValid(aerm->ctidAttNo))
1587                         elog(ERROR, "could not find junk %s column", resname);
1588         }
1589         else
1590         {
1591                 Assert(erm->markType == ROW_MARK_COPY);
1592
1593                 snprintf(resname, sizeof(resname), "wholerow%u", erm->rowmarkId);
1594                 aerm->wholeAttNo = ExecFindJunkAttributeInTlist(targetlist,
1595                                                                                                                 resname);
1596                 if (!AttributeNumberIsValid(aerm->wholeAttNo))
1597                         elog(ERROR, "could not find junk %s column", resname);
1598         }
1599
1600         return aerm;
1601 }
1602
1603
1604 /*
1605  * EvalPlanQual logic --- recheck modified tuple(s) to see if we want to
1606  * process the updated version under READ COMMITTED rules.
1607  *
1608  * See backend/executor/README for some info about how this works.
1609  */
1610
1611
1612 /*
1613  * Check a modified tuple to see if we want to process its updated version
1614  * under READ COMMITTED rules.
1615  *
1616  *      estate - outer executor state data
1617  *      epqstate - state for EvalPlanQual rechecking
1618  *      relation - table containing tuple
1619  *      rti - rangetable index of table containing tuple
1620  *      *tid - t_ctid from the outdated tuple (ie, next updated version)
1621  *      priorXmax - t_xmax from the outdated tuple
1622  *
1623  * *tid is also an output parameter: it's modified to hold the TID of the
1624  * latest version of the tuple (note this may be changed even on failure)
1625  *
1626  * Returns a slot containing the new candidate update/delete tuple, or
1627  * NULL if we determine we shouldn't process the row.
1628  */
1629 TupleTableSlot *
1630 EvalPlanQual(EState *estate, EPQState *epqstate,
1631                          Relation relation, Index rti,
1632                          ItemPointer tid, TransactionId priorXmax)
1633 {
1634         TupleTableSlot *slot;
1635         HeapTuple       copyTuple;
1636
1637         Assert(rti > 0);
1638
1639         /*
1640          * Get and lock the updated version of the row; if fail, return NULL.
1641          */
1642         copyTuple = EvalPlanQualFetch(estate, relation, LockTupleExclusive,
1643                                                                   tid, priorXmax);
1644
1645         if (copyTuple == NULL)
1646                 return NULL;
1647
1648         /*
1649          * For UPDATE/DELETE we have to return tid of actual row we're executing
1650          * PQ for.
1651          */
1652         *tid = copyTuple->t_self;
1653
1654         /*
1655          * Need to run a recheck subquery.      Initialize or reinitialize EPQ state.
1656          */
1657         EvalPlanQualBegin(epqstate, estate);
1658
1659         /*
1660          * Free old test tuple, if any, and store new tuple where relation's scan
1661          * node will see it
1662          */
1663         EvalPlanQualSetTuple(epqstate, rti, copyTuple);
1664
1665         /*
1666          * Fetch any non-locked source rows
1667          */
1668         EvalPlanQualFetchRowMarks(epqstate);
1669
1670         /*
1671          * Run the EPQ query.  We assume it will return at most one tuple.
1672          */
1673         slot = EvalPlanQualNext(epqstate);
1674
1675         /*
1676          * If we got a tuple, force the slot to materialize the tuple so that it
1677          * is not dependent on any local state in the EPQ query (in particular,
1678          * it's highly likely that the slot contains references to any pass-by-ref
1679          * datums that may be present in copyTuple).  As with the next step, this
1680          * is to guard against early re-use of the EPQ query.
1681          */
1682         if (!TupIsNull(slot))
1683                 (void) ExecMaterializeSlot(slot);
1684
1685         /*
1686          * Clear out the test tuple.  This is needed in case the EPQ query is
1687          * re-used to test a tuple for a different relation.  (Not clear that can
1688          * really happen, but let's be safe.)
1689          */
1690         EvalPlanQualSetTuple(epqstate, rti, NULL);
1691
1692         return slot;
1693 }
1694
1695 /*
1696  * Fetch a copy of the newest version of an outdated tuple
1697  *
1698  *      estate - executor state data
1699  *      relation - table containing tuple
1700  *      lockmode - requested tuple lock mode
1701  *      *tid - t_ctid from the outdated tuple (ie, next updated version)
1702  *      priorXmax - t_xmax from the outdated tuple
1703  *
1704  * Returns a palloc'd copy of the newest tuple version, or NULL if we find
1705  * that there is no newest version (ie, the row was deleted not updated).
1706  * If successful, we have locked the newest tuple version, so caller does not
1707  * need to worry about it changing anymore.
1708  *
1709  * Note: properly, lockmode should be declared as enum LockTupleMode,
1710  * but we use "int" to avoid having to include heapam.h in executor.h.
1711  */
1712 HeapTuple
1713 EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
1714                                   ItemPointer tid, TransactionId priorXmax)
1715 {
1716         HeapTuple       copyTuple = NULL;
1717         HeapTupleData tuple;
1718         SnapshotData SnapshotDirty;
1719
1720         /*
1721          * fetch target tuple
1722          *
1723          * Loop here to deal with updated or busy tuples
1724          */
1725         InitDirtySnapshot(SnapshotDirty);
1726         tuple.t_self = *tid;
1727         for (;;)
1728         {
1729                 Buffer          buffer;
1730
1731                 if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
1732                 {
1733                         HTSU_Result test;
1734                         ItemPointerData update_ctid;
1735                         TransactionId update_xmax;
1736
1737                         /*
1738                          * If xmin isn't what we're expecting, the slot must have been
1739                          * recycled and reused for an unrelated tuple.  This implies that
1740                          * the latest version of the row was deleted, so we need do
1741                          * nothing.  (Should be safe to examine xmin without getting
1742                          * buffer's content lock, since xmin never changes in an existing
1743                          * tuple.)
1744                          */
1745                         if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
1746                                                                          priorXmax))
1747                         {
1748                                 ReleaseBuffer(buffer);
1749                                 return NULL;
1750                         }
1751
1752                         /* otherwise xmin should not be dirty... */
1753                         if (TransactionIdIsValid(SnapshotDirty.xmin))
1754                                 elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
1755
1756                         /*
1757                          * If tuple is being updated by other transaction then we have to
1758                          * wait for its commit/abort.
1759                          */
1760                         if (TransactionIdIsValid(SnapshotDirty.xmax))
1761                         {
1762                                 ReleaseBuffer(buffer);
1763                                 XactLockTableWait(SnapshotDirty.xmax);
1764                                 continue;               /* loop back to repeat heap_fetch */
1765                         }
1766
1767                         /*
1768                          * If tuple was inserted by our own transaction, we have to check
1769                          * cmin against es_output_cid: cmin >= current CID means our
1770                          * command cannot see the tuple, so we should ignore it.  Without
1771                          * this we are open to the "Halloween problem" of indefinitely
1772                          * re-updating the same tuple. (We need not check cmax because
1773                          * HeapTupleSatisfiesDirty will consider a tuple deleted by our
1774                          * transaction dead, regardless of cmax.)  We just checked that
1775                          * priorXmax == xmin, so we can test that variable instead of
1776                          * doing HeapTupleHeaderGetXmin again.
1777                          */
1778                         if (TransactionIdIsCurrentTransactionId(priorXmax) &&
1779                                 HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
1780                         {
1781                                 ReleaseBuffer(buffer);
1782                                 return NULL;
1783                         }
1784
1785                         /*
1786                          * This is a live tuple, so now try to lock it.
1787                          */
1788                         test = heap_lock_tuple(relation, &tuple, &buffer,
1789                                                                    &update_ctid, &update_xmax,
1790                                                                    estate->es_output_cid,
1791                                                                    lockmode, false);
1792                         /* We now have two pins on the buffer, get rid of one */
1793                         ReleaseBuffer(buffer);
1794
1795                         switch (test)
1796                         {
1797                                 case HeapTupleSelfUpdated:
1798                                         /* treat it as deleted; do not process */
1799                                         ReleaseBuffer(buffer);
1800                                         return NULL;
1801
1802                                 case HeapTupleMayBeUpdated:
1803                                         /* successfully locked */
1804                                         break;
1805
1806                                 case HeapTupleUpdated:
1807                                         ReleaseBuffer(buffer);
1808                                         if (IsolationUsesXactSnapshot())
1809                                                 ereport(ERROR,
1810                                                                 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1811                                                                  errmsg("could not serialize access due to concurrent update")));
1812                                         if (!ItemPointerEquals(&update_ctid, &tuple.t_self))
1813                                         {
1814                                                 /* it was updated, so look at the updated version */
1815                                                 tuple.t_self = update_ctid;
1816                                                 /* updated row should have xmin matching this xmax */
1817                                                 priorXmax = update_xmax;
1818                                                 continue;
1819                                         }
1820                                         /* tuple was deleted, so give up */
1821                                         return NULL;
1822
1823                                 default:
1824                                         ReleaseBuffer(buffer);
1825                                         elog(ERROR, "unrecognized heap_lock_tuple status: %u",
1826                                                  test);
1827                                         return NULL;    /* keep compiler quiet */
1828                         }
1829
1830                         /*
1831                          * We got tuple - now copy it for use by recheck query.
1832                          */
1833                         copyTuple = heap_copytuple(&tuple);
1834                         ReleaseBuffer(buffer);
1835                         break;
1836                 }
1837
1838                 /*
1839                  * If the referenced slot was actually empty, the latest version of
1840                  * the row must have been deleted, so we need do nothing.
1841                  */
1842                 if (tuple.t_data == NULL)
1843                 {
1844                         ReleaseBuffer(buffer);
1845                         return NULL;
1846                 }
1847
1848                 /*
1849                  * As above, if xmin isn't what we're expecting, do nothing.
1850                  */
1851                 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
1852                                                                  priorXmax))
1853                 {
1854                         ReleaseBuffer(buffer);
1855                         return NULL;
1856                 }
1857
1858                 /*
1859                  * If we get here, the tuple was found but failed SnapshotDirty.
1860                  * Assuming the xmin is either a committed xact or our own xact (as it
1861                  * certainly should be if we're trying to modify the tuple), this must
1862                  * mean that the row was updated or deleted by either a committed xact
1863                  * or our own xact.  If it was deleted, we can ignore it; if it was
1864                  * updated then chain up to the next version and repeat the whole
1865                  * process.
1866                  *
1867                  * As above, it should be safe to examine xmax and t_ctid without the
1868                  * buffer content lock, because they can't be changing.
1869                  */
1870                 if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
1871                 {
1872                         /* deleted, so forget about it */
1873                         ReleaseBuffer(buffer);
1874                         return NULL;
1875                 }
1876
1877                 /* updated, so look at the updated row */
1878                 tuple.t_self = tuple.t_data->t_ctid;
1879                 /* updated row should have xmin matching this xmax */
1880                 priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
1881                 ReleaseBuffer(buffer);
1882                 /* loop back to fetch next in chain */
1883         }
1884
1885         /*
1886          * Return the copied tuple
1887          */
1888         return copyTuple;
1889 }
1890
1891 /*
1892  * EvalPlanQualInit -- initialize during creation of a plan state node
1893  * that might need to invoke EPQ processing.
1894  *
1895  * Note: subplan/auxrowmarks can be NULL/NIL if they will be set later
1896  * with EvalPlanQualSetPlan.
1897  */
1898 void
1899 EvalPlanQualInit(EPQState *epqstate, EState *estate,
1900                                  Plan *subplan, List *auxrowmarks, int epqParam)
1901 {
1902         /* Mark the EPQ state inactive */
1903         epqstate->estate = NULL;
1904         epqstate->planstate = NULL;
1905         epqstate->origslot = NULL;
1906         /* ... and remember data that EvalPlanQualBegin will need */
1907         epqstate->plan = subplan;
1908         epqstate->arowMarks = auxrowmarks;
1909         epqstate->epqParam = epqParam;
1910 }
1911
1912 /*
1913  * EvalPlanQualSetPlan -- set or change subplan of an EPQState.
1914  *
1915  * We need this so that ModifyTuple can deal with multiple subplans.
1916  */
1917 void
1918 EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan, List *auxrowmarks)
1919 {
1920         /* If we have a live EPQ query, shut it down */
1921         EvalPlanQualEnd(epqstate);
1922         /* And set/change the plan pointer */
1923         epqstate->plan = subplan;
1924         /* The rowmarks depend on the plan, too */
1925         epqstate->arowMarks = auxrowmarks;
1926 }
1927
1928 /*
1929  * Install one test tuple into EPQ state, or clear test tuple if tuple == NULL
1930  *
1931  * NB: passed tuple must be palloc'd; it may get freed later
1932  */
1933 void
1934 EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple)
1935 {
1936         EState     *estate = epqstate->estate;
1937
1938         Assert(rti > 0);
1939
1940         /*
1941          * free old test tuple, if any, and store new tuple where relation's scan
1942          * node will see it
1943          */
1944         if (estate->es_epqTuple[rti - 1] != NULL)
1945                 heap_freetuple(estate->es_epqTuple[rti - 1]);
1946         estate->es_epqTuple[rti - 1] = tuple;
1947         estate->es_epqTupleSet[rti - 1] = true;
1948 }
1949
1950 /*
1951  * Fetch back the current test tuple (if any) for the specified RTI
1952  */
1953 HeapTuple
1954 EvalPlanQualGetTuple(EPQState *epqstate, Index rti)
1955 {
1956         EState     *estate = epqstate->estate;
1957
1958         Assert(rti > 0);
1959
1960         return estate->es_epqTuple[rti - 1];
1961 }
1962
1963 /*
1964  * Fetch the current row values for any non-locked relations that need
1965  * to be scanned by an EvalPlanQual operation.  origslot must have been set
1966  * to contain the current result row (top-level row) that we need to recheck.
1967  */
1968 void
1969 EvalPlanQualFetchRowMarks(EPQState *epqstate)
1970 {
1971         ListCell   *l;
1972
1973         Assert(epqstate->origslot != NULL);
1974
1975         foreach(l, epqstate->arowMarks)
1976         {
1977                 ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(l);
1978                 ExecRowMark *erm = aerm->rowmark;
1979                 Datum           datum;
1980                 bool            isNull;
1981                 HeapTupleData tuple;
1982
1983                 if (RowMarkRequiresRowShareLock(erm->markType))
1984                         elog(ERROR, "EvalPlanQual doesn't support locking rowmarks");
1985
1986                 /* clear any leftover test tuple for this rel */
1987                 EvalPlanQualSetTuple(epqstate, erm->rti, NULL);
1988
1989                 if (erm->relation)
1990                 {
1991                         Buffer          buffer;
1992
1993                         Assert(erm->markType == ROW_MARK_REFERENCE);
1994
1995                         /* if child rel, must check whether it produced this row */
1996                         if (erm->rti != erm->prti)
1997                         {
1998                                 Oid                     tableoid;
1999
2000                                 datum = ExecGetJunkAttribute(epqstate->origslot,
2001                                                                                          aerm->toidAttNo,
2002                                                                                          &isNull);
2003                                 /* non-locked rels could be on the inside of outer joins */
2004                                 if (isNull)
2005                                         continue;
2006                                 tableoid = DatumGetObjectId(datum);
2007
2008                                 if (tableoid != RelationGetRelid(erm->relation))
2009                                 {
2010                                         /* this child is inactive right now */
2011                                         continue;
2012                                 }
2013                         }
2014
2015                         /* fetch the tuple's ctid */
2016                         datum = ExecGetJunkAttribute(epqstate->origslot,
2017                                                                                  aerm->ctidAttNo,
2018                                                                                  &isNull);
2019                         /* non-locked rels could be on the inside of outer joins */
2020                         if (isNull)
2021                                 continue;
2022                         tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
2023
2024                         /* okay, fetch the tuple */
2025                         if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
2026                                                         false, NULL))
2027                                 elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
2028
2029                         /* successful, copy and store tuple */
2030                         EvalPlanQualSetTuple(epqstate, erm->rti,
2031                                                                  heap_copytuple(&tuple));
2032                         ReleaseBuffer(buffer);
2033                 }
2034                 else
2035                 {
2036                         HeapTupleHeader td;
2037
2038                         Assert(erm->markType == ROW_MARK_COPY);
2039
2040                         /* fetch the whole-row Var for the relation */
2041                         datum = ExecGetJunkAttribute(epqstate->origslot,
2042                                                                                  aerm->wholeAttNo,
2043                                                                                  &isNull);
2044                         /* non-locked rels could be on the inside of outer joins */
2045                         if (isNull)
2046                                 continue;
2047                         td = DatumGetHeapTupleHeader(datum);
2048
2049                         /* build a temporary HeapTuple control structure */
2050                         tuple.t_len = HeapTupleHeaderGetDatumLength(td);
2051                         ItemPointerSetInvalid(&(tuple.t_self));
2052                         tuple.t_tableOid = InvalidOid;
2053                         tuple.t_data = td;
2054
2055                         /* copy and store tuple */
2056                         EvalPlanQualSetTuple(epqstate, erm->rti,
2057                                                                  heap_copytuple(&tuple));
2058                 }
2059         }
2060 }
2061
2062 /*
2063  * Fetch the next row (if any) from EvalPlanQual testing
2064  *
2065  * (In practice, there should never be more than one row...)
2066  */
2067 TupleTableSlot *
2068 EvalPlanQualNext(EPQState *epqstate)
2069 {
2070         MemoryContext oldcontext;
2071         TupleTableSlot *slot;
2072
2073         oldcontext = MemoryContextSwitchTo(epqstate->estate->es_query_cxt);
2074         slot = ExecProcNode(epqstate->planstate);
2075         MemoryContextSwitchTo(oldcontext);
2076
2077         return slot;
2078 }
2079
2080 /*
2081  * Initialize or reset an EvalPlanQual state tree
2082  */
2083 void
2084 EvalPlanQualBegin(EPQState *epqstate, EState *parentestate)
2085 {
2086         EState     *estate = epqstate->estate;
2087
2088         if (estate == NULL)
2089         {
2090                 /* First time through, so create a child EState */
2091                 EvalPlanQualStart(epqstate, parentestate, epqstate->plan);
2092         }
2093         else
2094         {
2095                 /*
2096                  * We already have a suitable child EPQ tree, so just reset it.
2097                  */
2098                 int                     rtsize = list_length(parentestate->es_range_table);
2099                 PlanState  *planstate = epqstate->planstate;
2100
2101                 MemSet(estate->es_epqScanDone, 0, rtsize * sizeof(bool));
2102
2103                 /* Recopy current values of parent parameters */
2104                 if (parentestate->es_plannedstmt->nParamExec > 0)
2105                 {
2106                         int                     i = parentestate->es_plannedstmt->nParamExec;
2107
2108                         while (--i >= 0)
2109                         {
2110                                 /* copy value if any, but not execPlan link */
2111                                 estate->es_param_exec_vals[i].value =
2112                                         parentestate->es_param_exec_vals[i].value;
2113                                 estate->es_param_exec_vals[i].isnull =
2114                                         parentestate->es_param_exec_vals[i].isnull;
2115                         }
2116                 }
2117
2118                 /*
2119                  * Mark child plan tree as needing rescan at all scan nodes.  The
2120                  * first ExecProcNode will take care of actually doing the rescan.
2121                  */
2122                 planstate->chgParam = bms_add_member(planstate->chgParam,
2123                                                                                          epqstate->epqParam);
2124         }
2125 }
2126
2127 /*
2128  * Start execution of an EvalPlanQual plan tree.
2129  *
2130  * This is a cut-down version of ExecutorStart(): we copy some state from
2131  * the top-level estate rather than initializing it fresh.
2132  */
2133 static void
2134 EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
2135 {
2136         EState     *estate;
2137         int                     rtsize;
2138         MemoryContext oldcontext;
2139         ListCell   *l;
2140
2141         rtsize = list_length(parentestate->es_range_table);
2142
2143         epqstate->estate = estate = CreateExecutorState();
2144
2145         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
2146
2147         /*
2148          * Child EPQ EStates share the parent's copy of unchanging state such as
2149          * the snapshot, rangetable, result-rel info, and external Param info.
2150          * They need their own copies of local state, including a tuple table,
2151          * es_param_exec_vals, etc.
2152          */
2153         estate->es_direction = ForwardScanDirection;
2154         estate->es_snapshot = parentestate->es_snapshot;
2155         estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot;
2156         estate->es_range_table = parentestate->es_range_table;
2157         estate->es_plannedstmt = parentestate->es_plannedstmt;
2158         estate->es_junkFilter = parentestate->es_junkFilter;
2159         estate->es_output_cid = parentestate->es_output_cid;
2160         estate->es_result_relations = parentestate->es_result_relations;
2161         estate->es_num_result_relations = parentestate->es_num_result_relations;
2162         estate->es_result_relation_info = parentestate->es_result_relation_info;
2163         /* es_trig_target_relations must NOT be copied */
2164         estate->es_rowMarks = parentestate->es_rowMarks;
2165         estate->es_top_eflags = parentestate->es_top_eflags;
2166         estate->es_instrument = parentestate->es_instrument;
2167         estate->es_select_into = parentestate->es_select_into;
2168         estate->es_into_oids = parentestate->es_into_oids;
2169         /* es_auxmodifytables must NOT be copied */
2170
2171         /*
2172          * The external param list is simply shared from parent.  The internal
2173          * param workspace has to be local state, but we copy the initial values
2174          * from the parent, so as to have access to any param values that were
2175          * already set from other parts of the parent's plan tree.
2176          */
2177         estate->es_param_list_info = parentestate->es_param_list_info;
2178         if (parentestate->es_plannedstmt->nParamExec > 0)
2179         {
2180                 int                     i = parentestate->es_plannedstmt->nParamExec;
2181
2182                 estate->es_param_exec_vals = (ParamExecData *)
2183                         palloc0(i * sizeof(ParamExecData));
2184                 while (--i >= 0)
2185                 {
2186                         /* copy value if any, but not execPlan link */
2187                         estate->es_param_exec_vals[i].value =
2188                                 parentestate->es_param_exec_vals[i].value;
2189                         estate->es_param_exec_vals[i].isnull =
2190                                 parentestate->es_param_exec_vals[i].isnull;
2191                 }
2192         }
2193
2194         /*
2195          * Each EState must have its own es_epqScanDone state, but if we have
2196          * nested EPQ checks they should share es_epqTuple arrays.      This allows
2197          * sub-rechecks to inherit the values being examined by an outer recheck.
2198          */
2199         estate->es_epqScanDone = (bool *) palloc0(rtsize * sizeof(bool));
2200         if (parentestate->es_epqTuple != NULL)
2201         {
2202                 estate->es_epqTuple = parentestate->es_epqTuple;
2203                 estate->es_epqTupleSet = parentestate->es_epqTupleSet;
2204         }
2205         else
2206         {
2207                 estate->es_epqTuple = (HeapTuple *)
2208                         palloc0(rtsize * sizeof(HeapTuple));
2209                 estate->es_epqTupleSet = (bool *)
2210                         palloc0(rtsize * sizeof(bool));
2211         }
2212
2213         /*
2214          * Each estate also has its own tuple table.
2215          */
2216         estate->es_tupleTable = NIL;
2217
2218         /*
2219          * Initialize private state information for each SubPlan.  We must do this
2220          * before running ExecInitNode on the main query tree, since
2221          * ExecInitSubPlan expects to be able to find these entries. Some of the
2222          * SubPlans might not be used in the part of the plan tree we intend to
2223          * run, but since it's not easy to tell which, we just initialize them
2224          * all.  (However, if the subplan is headed by a ModifyTable node, then it
2225          * must be a data-modifying CTE, which we will certainly not need to
2226          * re-run, so we can skip initializing it.      This is just an efficiency
2227          * hack; it won't skip data-modifying CTEs for which the ModifyTable node
2228          * is not at the top.)
2229          */
2230         Assert(estate->es_subplanstates == NIL);
2231         foreach(l, parentestate->es_plannedstmt->subplans)
2232         {
2233                 Plan       *subplan = (Plan *) lfirst(l);
2234                 PlanState  *subplanstate;
2235
2236                 /* Don't initialize ModifyTable subplans, per comment above */
2237                 if (IsA(subplan, ModifyTable))
2238                         subplanstate = NULL;
2239                 else
2240                         subplanstate = ExecInitNode(subplan, estate, 0);
2241
2242                 estate->es_subplanstates = lappend(estate->es_subplanstates,
2243                                                                                    subplanstate);
2244         }
2245
2246         /*
2247          * Initialize the private state information for all the nodes in the part
2248          * of the plan tree we need to run.  This opens files, allocates storage
2249          * and leaves us ready to start processing tuples.
2250          */
2251         epqstate->planstate = ExecInitNode(planTree, estate, 0);
2252
2253         MemoryContextSwitchTo(oldcontext);
2254 }
2255
2256 /*
2257  * EvalPlanQualEnd -- shut down at termination of parent plan state node,
2258  * or if we are done with the current EPQ child.
2259  *
2260  * This is a cut-down version of ExecutorEnd(); basically we want to do most
2261  * of the normal cleanup, but *not* close result relations (which we are
2262  * just sharing from the outer query).  We do, however, have to close any
2263  * trigger target relations that got opened, since those are not shared.
2264  * (There probably shouldn't be any of the latter, but just in case...)
2265  */
2266 void
2267 EvalPlanQualEnd(EPQState *epqstate)
2268 {
2269         EState     *estate = epqstate->estate;
2270         MemoryContext oldcontext;
2271         ListCell   *l;
2272
2273         if (estate == NULL)
2274                 return;                                 /* idle, so nothing to do */
2275
2276         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
2277
2278         ExecEndNode(epqstate->planstate);
2279
2280         foreach(l, estate->es_subplanstates)
2281         {
2282                 PlanState  *subplanstate = (PlanState *) lfirst(l);
2283
2284                 ExecEndNode(subplanstate);
2285         }
2286
2287         /* throw away the per-estate tuple table */
2288         ExecResetTupleTable(estate->es_tupleTable, false);
2289
2290         /* close any trigger target relations attached to this EState */
2291         foreach(l, estate->es_trig_target_relations)
2292         {
2293                 ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l);
2294
2295                 /* Close indices and then the relation itself */
2296                 ExecCloseIndices(resultRelInfo);
2297                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
2298         }
2299
2300         MemoryContextSwitchTo(oldcontext);
2301
2302         FreeExecutorState(estate);
2303
2304         /* Mark EPQState idle */
2305         epqstate->estate = NULL;
2306         epqstate->planstate = NULL;
2307         epqstate->origslot = NULL;
2308 }
2309
2310
2311 /*
2312  * Support for SELECT INTO (a/k/a CREATE TABLE AS)
2313  *
2314  * We implement SELECT INTO by diverting SELECT's normal output with
2315  * a specialized DestReceiver type.
2316  */
2317
2318 typedef struct
2319 {
2320         DestReceiver pub;                       /* publicly-known function pointers */
2321         EState     *estate;                     /* EState we are working with */
2322         Relation        rel;                    /* Relation to write to */
2323         int                     hi_options;             /* heap_insert performance options */
2324         BulkInsertState bistate;        /* bulk insert state */
2325 } DR_intorel;
2326
2327 /*
2328  * OpenIntoRel --- actually create the SELECT INTO target relation
2329  *
2330  * This also replaces QueryDesc->dest with the special DestReceiver for
2331  * SELECT INTO.  We assume that the correct result tuple type has already
2332  * been placed in queryDesc->tupDesc.
2333  */
2334 static void
2335 OpenIntoRel(QueryDesc *queryDesc)
2336 {
2337         IntoClause *into = queryDesc->plannedstmt->intoClause;
2338         EState     *estate = queryDesc->estate;
2339         Relation        intoRelationDesc;
2340         char       *intoName;
2341         Oid                     namespaceId;
2342         Oid                     tablespaceId;
2343         Datum           reloptions;
2344         Oid                     intoRelationId;
2345         TupleDesc       tupdesc;
2346         DR_intorel *myState;
2347         static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
2348
2349         Assert(into);
2350
2351         /*
2352          * XXX This code needs to be kept in sync with DefineRelation(). Maybe we
2353          * should try to use that function instead.
2354          */
2355
2356         /*
2357          * Check consistency of arguments
2358          */
2359         if (into->onCommit != ONCOMMIT_NOOP
2360                 && into->rel->relpersistence != RELPERSISTENCE_TEMP)
2361                 ereport(ERROR,
2362                                 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
2363                                  errmsg("ON COMMIT can only be used on temporary tables")));
2364
2365         /*
2366          * Security check: disallow creating temp tables from security-restricted
2367          * code.  This is needed because calling code might not expect untrusted
2368          * tables to appear in pg_temp at the front of its search path.
2369          */
2370         if (into->rel->relpersistence == RELPERSISTENCE_TEMP
2371                 && InSecurityRestrictedOperation())
2372                 ereport(ERROR,
2373                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2374                                  errmsg("cannot create temporary table within security-restricted operation")));
2375
2376         /*
2377          * Find namespace to create in, check its permissions
2378          */
2379         intoName = into->rel->relname;
2380         namespaceId = RangeVarGetAndCheckCreationNamespace(into->rel);
2381
2382         /*
2383          * Select tablespace to use.  If not specified, use default tablespace
2384          * (which may in turn default to database's default).
2385          */
2386         if (into->tableSpaceName)
2387         {
2388                 tablespaceId = get_tablespace_oid(into->tableSpaceName, false);
2389         }
2390         else
2391         {
2392                 tablespaceId = GetDefaultTablespace(into->rel->relpersistence);
2393                 /* note InvalidOid is OK in this case */
2394         }
2395
2396         /* Check permissions except when using the database's default space */
2397         if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
2398         {
2399                 AclResult       aclresult;
2400
2401                 aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
2402                                                                                    ACL_CREATE);
2403
2404                 if (aclresult != ACLCHECK_OK)
2405                         aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
2406                                                    get_tablespace_name(tablespaceId));
2407         }
2408
2409         /* Parse and validate any reloptions */
2410         reloptions = transformRelOptions((Datum) 0,
2411                                                                          into->options,
2412                                                                          NULL,
2413                                                                          validnsps,
2414                                                                          true,
2415                                                                          false);
2416         (void) heap_reloptions(RELKIND_RELATION, reloptions, true);
2417
2418         /* Copy the tupdesc because heap_create_with_catalog modifies it */
2419         tupdesc = CreateTupleDescCopy(queryDesc->tupDesc);
2420
2421         /* Now we can actually create the new relation */
2422         intoRelationId = heap_create_with_catalog(intoName,
2423                                                                                           namespaceId,
2424                                                                                           tablespaceId,
2425                                                                                           InvalidOid,
2426                                                                                           InvalidOid,
2427                                                                                           InvalidOid,
2428                                                                                           GetUserId(),
2429                                                                                           tupdesc,
2430                                                                                           NIL,
2431                                                                                           RELKIND_RELATION,
2432                                                                                           into->rel->relpersistence,
2433                                                                                           false,
2434                                                                                           false,
2435                                                                                           true,
2436                                                                                           0,
2437                                                                                           into->onCommit,
2438                                                                                           reloptions,
2439                                                                                           true,
2440                                                                                           allowSystemTableMods);
2441         Assert(intoRelationId != InvalidOid);
2442
2443         FreeTupleDesc(tupdesc);
2444
2445         /*
2446          * Advance command counter so that the newly-created relation's catalog
2447          * tuples will be visible to heap_open.
2448          */
2449         CommandCounterIncrement();
2450
2451         /*
2452          * If necessary, create a TOAST table for the INTO relation. Note that
2453          * AlterTableCreateToastTable ends with CommandCounterIncrement(), so that
2454          * the TOAST table will be visible for insertion.
2455          */
2456         reloptions = transformRelOptions((Datum) 0,
2457                                                                          into->options,
2458                                                                          "toast",
2459                                                                          validnsps,
2460                                                                          true,
2461                                                                          false);
2462
2463         (void) heap_reloptions(RELKIND_TOASTVALUE, reloptions, true);
2464
2465         AlterTableCreateToastTable(intoRelationId, reloptions);
2466
2467         /*
2468          * And open the constructed table for writing.
2469          */
2470         intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
2471
2472         /*
2473          * Now replace the query's DestReceiver with one for SELECT INTO
2474          */
2475         queryDesc->dest = CreateDestReceiver(DestIntoRel);
2476         myState = (DR_intorel *) queryDesc->dest;
2477         Assert(myState->pub.mydest == DestIntoRel);
2478         myState->estate = estate;
2479         myState->rel = intoRelationDesc;
2480
2481         /*
2482          * We can skip WAL-logging the insertions, unless PITR or streaming
2483          * replication is in use. We can skip the FSM in any case.
2484          */
2485         myState->hi_options = HEAP_INSERT_SKIP_FSM |
2486                 (XLogIsNeeded() ? 0 : HEAP_INSERT_SKIP_WAL);
2487         myState->bistate = GetBulkInsertState();
2488
2489         /* Not using WAL requires smgr_targblock be initially invalid */
2490         Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
2491 }
2492
2493 /*
2494  * CloseIntoRel --- clean up SELECT INTO at ExecutorEnd time
2495  */
2496 static void
2497 CloseIntoRel(QueryDesc *queryDesc)
2498 {
2499         DR_intorel *myState = (DR_intorel *) queryDesc->dest;
2500
2501         /* OpenIntoRel might never have gotten called */
2502         if (myState && myState->pub.mydest == DestIntoRel && myState->rel)
2503         {
2504                 FreeBulkInsertState(myState->bistate);
2505
2506                 /* If we skipped using WAL, must heap_sync before commit */
2507                 if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
2508                         heap_sync(myState->rel);
2509
2510                 /* close rel, but keep lock until commit */
2511                 heap_close(myState->rel, NoLock);
2512
2513                 myState->rel = NULL;
2514         }
2515 }
2516
2517 /*
2518  * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
2519  */
2520 DestReceiver *
2521 CreateIntoRelDestReceiver(void)
2522 {
2523         DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel));
2524
2525         self->pub.receiveSlot = intorel_receive;
2526         self->pub.rStartup = intorel_startup;
2527         self->pub.rShutdown = intorel_shutdown;
2528         self->pub.rDestroy = intorel_destroy;
2529         self->pub.mydest = DestIntoRel;
2530
2531         /* private fields will be set by OpenIntoRel */
2532
2533         return (DestReceiver *) self;
2534 }
2535
2536 /*
2537  * intorel_startup --- executor startup
2538  */
2539 static void
2540 intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
2541 {
2542         /* no-op */
2543 }
2544
2545 /*
2546  * intorel_receive --- receive one tuple
2547  */
2548 static void
2549 intorel_receive(TupleTableSlot *slot, DestReceiver *self)
2550 {
2551         DR_intorel *myState = (DR_intorel *) self;
2552         HeapTuple       tuple;
2553
2554         /*
2555          * get the heap tuple out of the tuple table slot, making sure we have a
2556          * writable copy
2557          */
2558         tuple = ExecMaterializeSlot(slot);
2559
2560         /*
2561          * force assignment of new OID (see comments in ExecInsert)
2562          */
2563         if (myState->rel->rd_rel->relhasoids)
2564                 HeapTupleSetOid(tuple, InvalidOid);
2565
2566         heap_insert(myState->rel,
2567                                 tuple,
2568                                 myState->estate->es_output_cid,
2569                                 myState->hi_options,
2570                                 myState->bistate);
2571
2572         /* We know this is a newly created relation, so there are no indexes */
2573 }
2574
2575 /*
2576  * intorel_shutdown --- executor end
2577  */
2578 static void
2579 intorel_shutdown(DestReceiver *self)
2580 {
2581         /* no-op */
2582 }
2583
2584 /*
2585  * intorel_destroy --- release DestReceiver object
2586  */
2587 static void
2588 intorel_destroy(DestReceiver *self)
2589 {
2590         pfree(self);
2591 }