core.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * core.c
   4  *        Routines copied from PostgreSQL core distribution.
   5  *
   6  * src/backend/optimizer/path/allpaths.c
   7  *     set_append_rel_pathlist()
   8  *     generate_mergeappend_paths()
   9  *     get_cheapest_parameterized_child_path()
  10  *     accumulate_append_subpath()
  11  *     standard_join_search()
  12  *
  13  * src/backend/optimizer/path/joinrels.c
  14  *     join_search_one_level()
  15  *     make_rels_by_clause_joins()
  16  *     make_rels_by_clauseless_joins()
  17  *     join_is_legal()
  18  *     has_join_restriction()
  19  *     is_dummy_rel()
  20  *     mark_dummy_rel()
  21  *     restriction_is_constant_false()
  22  *
  23  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
  24  * Portions Copyright (c) 1994, Regents of the University of California
  25  *
  26  *-------------------------------------------------------------------------
  27  */
  28
  29 /*
  30  * set_append_rel_pathlist
  31  *        Build access paths for an "append relation"
  32  */
  33 static void
  34 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
  35                                                 Index rti, RangeTblEntry *rte)
  36 {
  37         int                     parentRTindex = rti;
  38         List       *live_childrels = NIL;
  39         List       *subpaths = NIL;
  40         List       *all_child_pathkeys = NIL;
  41         List       *all_child_outers = NIL;
  42         ListCell   *l;
  43
  44         /*
  45          * Generate access paths for each member relation, and remember the
  46          * cheapest path for each one.  Also, identify all pathkeys (orderings)
  47          * and parameterizations (required_outer sets) available for the member
  48          * relations.
  49          */
  50         foreach(l, root->append_rel_list)
  51         {
  52                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
  53                 int                     childRTindex;
  54                 RangeTblEntry *childRTE;
  55                 RelOptInfo *childrel;
  56                 ListCell   *lcp;
  57
  58                 /* append_rel_list contains all append rels; ignore others */
  59                 if (appinfo->parent_relid != parentRTindex)
  60                         continue;
  61
  62                 /* Re-locate the child RTE and RelOptInfo */
  63                 childRTindex = appinfo->child_relid;
  64                 childRTE = root->simple_rte_array[childRTindex];
  65                 childrel = root->simple_rel_array[childRTindex];
  66
  67                 /*
  68                  * Compute the child's access paths.
  69                  */
  70                 set_rel_pathlist(root, childrel, childRTindex, childRTE);
  71
  72                 /*
  73                  * If child is dummy, ignore it.
  74                  */
  75                 if (IS_DUMMY_REL(childrel))
  76                         continue;
  77
  78                 /*
  79                  * Child is live, so add its cheapest access path to the Append path
  80                  * we are constructing for the parent.
  81                  */
  82                 subpaths = accumulate_append_subpath(subpaths,
  83                                                                                          childrel->cheapest_total_path);
  84
  85                 /* Remember which childrels are live, for logic below */
  86                 live_childrels = lappend(live_childrels, childrel);
  87
  88                 /*
  89                  * Collect lists of all the available path orderings and
  90                  * parameterizations for all the children.      We use these as a
  91                  * heuristic to indicate which sort orderings and parameterizations we
  92                  * should build Append and MergeAppend paths for.
  93                  */
  94                 foreach(lcp, childrel->pathlist)
  95                 {
  96                         Path       *childpath = (Path *) lfirst(lcp);
  97                         List       *childkeys = childpath->pathkeys;
  98                         Relids          childouter = PATH_REQ_OUTER(childpath);
  99
 100                         /* Unsorted paths don't contribute to pathkey list */
 101                         if (childkeys != NIL)
 102                         {
 103                                 ListCell   *lpk;
 104                                 bool            found = false;
 105
 106                                 /* Have we already seen this ordering? */
 107                                 foreach(lpk, all_child_pathkeys)
 108                                 {
 109                                         List       *existing_pathkeys = (List *) lfirst(lpk);
 110
 111                                         if (compare_pathkeys(existing_pathkeys,
 112                                                                                  childkeys) == PATHKEYS_EQUAL)
 113                                         {
 114                                                 found = true;
 115                                                 break;
 116                                         }
 117                                 }
 118                                 if (!found)
 119                                 {
 120                                         /* No, so add it to all_child_pathkeys */
 121                                         all_child_pathkeys = lappend(all_child_pathkeys,
 122                                                                                                  childkeys);
 123                                 }
 124                         }
 125
 126                         /* Unparameterized paths don't contribute to param-set list */
 127                         if (childouter)
 128                         {
 129                                 ListCell   *lco;
 130                                 bool            found = false;
 131
 132                                 /* Have we already seen this param set? */
 133                                 foreach(lco, all_child_outers)
 134                                 {
 135                                         Relids          existing_outers = (Relids) lfirst(lco);
 136
 137                                         if (bms_equal(existing_outers, childouter))
 138                                         {
 139                                                 found = true;
 140                                                 break;
 141                                         }
 142                                 }
 143                                 if (!found)
 144                                 {
 145                                         /* No, so add it to all_child_outers */
 146                                         all_child_outers = lappend(all_child_outers,
 147                                                                                            childouter);
 148                                 }
 149                         }
 150                 }
 151         }
 152
 153         /*
 154          * Next, build an unordered, unparameterized Append path for the rel.
 155          * (Note: this is correct even if we have zero or one live subpath due to
 156          * constraint exclusion.)
 157          */
 158         add_path(rel, (Path *) create_append_path(rel, subpaths, NULL));
 159
 160         /*
 161          * Build unparameterized MergeAppend paths based on the collected list of
 162          * child pathkeys.
 163          */
 164         generate_mergeappend_paths(root, rel, live_childrels, all_child_pathkeys);
 165
 166         /*
 167          * Build Append paths for each parameterization seen among the child rels.
 168          * (This may look pretty expensive, but in most cases of practical
 169          * interest, the child rels will expose mostly the same parameterizations,
 170          * so that not that many cases actually get considered here.)
 171          *
 172          * The Append node itself cannot enforce quals, so all qual checking must
 173          * be done in the child paths.  This means that to have a parameterized
 174          * Append path, we must have the exact same parameterization for each
 175          * child path; otherwise some children might be failing to check the
 176          * moved-down quals.  To make them match up, we can try to increase the
 177          * parameterization of lesser-parameterized paths.
 178          */
 179         foreach(l, all_child_outers)
 180         {
 181                 Relids          required_outer = (Relids) lfirst(l);
 182                 bool            subpaths_valid = true;
 183                 ListCell   *lcr;
 184
 185                 /* Select the child paths for an Append with this parameterization */
 186                 subpaths = NIL;
 187                 foreach(lcr, live_childrels)
 188                 {
 189                         RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
 190                         Path       *subpath;
 191
 192                         subpath = get_cheapest_parameterized_child_path(root,
 193                                                                                                                         childrel,
 194                                                                                                                         required_outer);
 195                         if (subpath == NULL)
 196                         {
 197                                 /* failed to make a suitable path for this child */
 198                                 subpaths_valid = false;
 199                                 break;
 200                         }
 201                         subpaths = accumulate_append_subpath(subpaths, subpath);
 202                 }
 203
 204                 if (subpaths_valid)
 205                         add_path(rel, (Path *)
 206                                          create_append_path(rel, subpaths, required_outer));
 207         }
 208
 209         /* Select cheapest paths */
 210         set_cheapest(rel);
 211 }
 212
 213 /*
 214  * generate_mergeappend_paths
 215  *              Generate MergeAppend paths for an append relation
 216  *
 217  * Generate a path for each ordering (pathkey list) appearing in
 218  * all_child_pathkeys.
 219  *
 220  * We consider both cheapest-startup and cheapest-total cases, ie, for each
 221  * interesting ordering, collect all the cheapest startup subpaths and all the
 222  * cheapest total paths, and build a MergeAppend path for each case.
 223  *
 224  * We don't currently generate any parameterized MergeAppend paths.  While
 225  * it would not take much more code here to do so, it's very unclear that it
 226  * is worth the planning cycles to investigate such paths: there's little
 227  * use for an ordered path on the inside of a nestloop.  In fact, it's likely
 228  * that the current coding of add_path would reject such paths out of hand,
 229  * because add_path gives no credit for sort ordering of parameterized paths,
 230  * and a parameterized MergeAppend is going to be more expensive than the
 231  * corresponding parameterized Append path.  If we ever try harder to support
 232  * parameterized mergejoin plans, it might be worth adding support for
 233  * parameterized MergeAppends to feed such joins.  (See notes in
 234  * optimizer/README for why that might not ever happen, though.)
 235  */
 236 static void
 237 generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
 238                                                    List *live_childrels,
 239                                                    List *all_child_pathkeys)
 240 {
 241         ListCell   *lcp;
 242
 243         foreach(lcp, all_child_pathkeys)
 244         {
 245                 List       *pathkeys = (List *) lfirst(lcp);
 246                 List       *startup_subpaths = NIL;
 247                 List       *total_subpaths = NIL;
 248                 bool            startup_neq_total = false;
 249                 ListCell   *lcr;
 250
 251                 /* Select the child paths for this ordering... */
 252                 foreach(lcr, live_childrels)
 253                 {
 254                         RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
 255                         Path       *cheapest_startup,
 256                                            *cheapest_total;
 257
 258                         /* Locate the right paths, if they are available. */
 259                         cheapest_startup =
 260                                 get_cheapest_path_for_pathkeys(childrel->pathlist,
 261                                                                                            pathkeys,
 262                                                                                            NULL,
 263                                                                                            STARTUP_COST);
 264                         cheapest_total =
 265                                 get_cheapest_path_for_pathkeys(childrel->pathlist,
 266                                                                                            pathkeys,
 267                                                                                            NULL,
 268                                                                                            TOTAL_COST);
 269
 270                         /*
 271                          * If we can't find any paths with the right order just use the
 272                          * cheapest-total path; we'll have to sort it later.
 273                          */
 274                         if (cheapest_startup == NULL || cheapest_total == NULL)
 275                         {
 276                                 cheapest_startup = cheapest_total =
 277                                         childrel->cheapest_total_path;
 278                                 Assert(cheapest_total != NULL);
 279                         }
 280
 281                         /*
 282                          * Notice whether we actually have different paths for the
 283                          * "cheapest" and "total" cases; frequently there will be no point
 284                          * in two create_merge_append_path() calls.
 285                          */
 286                         if (cheapest_startup != cheapest_total)
 287                                 startup_neq_total = true;
 288
 289                         startup_subpaths =
 290                                 accumulate_append_subpath(startup_subpaths, cheapest_startup);
 291                         total_subpaths =
 292                                 accumulate_append_subpath(total_subpaths, cheapest_total);
 293                 }
 294
 295                 /* ... and build the MergeAppend paths */
 296                 add_path(rel, (Path *) create_merge_append_path(root,
 297                                                                                                                 rel,
 298                                                                                                                 startup_subpaths,
 299                                                                                                                 pathkeys,
 300                                                                                                                 NULL));
 301                 if (startup_neq_total)
 302                         add_path(rel, (Path *) create_merge_append_path(root,
 303                                                                                                                         rel,
 304                                                                                                                         total_subpaths,
 305                                                                                                                         pathkeys,
 306                                                                                                                         NULL));
 307         }
 308 }
 309
 310 /*
 311  * get_cheapest_parameterized_child_path
 312  *              Get cheapest path for this relation that has exactly the requested
 313  *              parameterization.
 314  *
 315  * Returns NULL if unable to create such a path.
 316  */
 317 static Path *
 318 get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
 319                                                                           Relids required_outer)
 320 {
 321         Path       *cheapest;
 322         ListCell   *lc;
 323
 324         /*
 325          * Look up the cheapest existing path with no more than the needed
 326          * parameterization.  If it has exactly the needed parameterization, we're
 327          * done.
 328          */
 329         cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
 330                                                                                           NIL,
 331                                                                                           required_outer,
 332                                                                                           TOTAL_COST);
 333         Assert(cheapest != NULL);
 334         if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
 335                 return cheapest;
 336
 337         /*
 338          * Otherwise, we can "reparameterize" an existing path to match the given
 339          * parameterization, which effectively means pushing down additional
 340          * joinquals to be checked within the path's scan.  However, some existing
 341          * paths might check the available joinquals already while others don't;
 342          * therefore, it's not clear which existing path will be cheapest after
 343          * reparameterization.  We have to go through them all and find out.
 344          */
 345         cheapest = NULL;
 346         foreach(lc, rel->pathlist)
 347         {
 348                 Path       *path = (Path *) lfirst(lc);
 349
 350                 /* Can't use it if it needs more than requested parameterization */
 351                 if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
 352                         continue;
 353
 354                 /*
 355                  * Reparameterization can only increase the path's cost, so if it's
 356                  * already more expensive than the current cheapest, forget it.
 357                  */
 358                 if (cheapest != NULL &&
 359                         compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
 360                         continue;
 361
 362                 /* Reparameterize if needed, then recheck cost */
 363                 if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
 364                 {
 365                         path = reparameterize_path(root, path, required_outer, 1.0);
 366                         if (path == NULL)
 367                                 continue;               /* failed to reparameterize this one */
 368                         Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
 369
 370                         if (cheapest != NULL &&
 371                                 compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
 372                                 continue;
 373                 }
 374
 375                 /* We have a new best path */
 376                 cheapest = path;
 377         }
 378
 379         /* Return the best path, or NULL if we found no suitable candidate */
 380         return cheapest;
 381 }
 382
 383 /*
 384  * accumulate_append_subpath
 385  *              Add a subpath to the list being built for an Append or MergeAppend
 386  *
 387  * It's possible that the child is itself an Append path, in which case
 388  * we can "cut out the middleman" and just add its child paths to our
 389  * own list.  (We don't try to do this earlier because we need to
 390  * apply both levels of transformation to the quals.)
 391  */
 392 static List *
 393 accumulate_append_subpath(List *subpaths, Path *path)
 394 {
 395         if (IsA(path, AppendPath))
 396         {
 397                 AppendPath *apath = (AppendPath *) path;
 398
 399                 /* list_copy is important here to avoid sharing list substructure */
 400                 return list_concat(subpaths, list_copy(apath->subpaths));
 401         }
 402         else
 403                 return lappend(subpaths, path);
 404 }
 405
 406 /*
 407  * standard_join_search
 408  *        Find possible joinpaths for a query by successively finding ways
 409  *        to join component relations into join relations.
 410  *
 411  * 'levels_needed' is the number of iterations needed, ie, the number of
 412  *              independent jointree items in the query.  This is > 1.
 413  *
 414  * 'initial_rels' is a list of RelOptInfo nodes for each independent
 415  *              jointree item.  These are the components to be joined together.
 416  *              Note that levels_needed == list_length(initial_rels).
 417  *
 418  * Returns the final level of join relations, i.e., the relation that is
 419  * the result of joining all the original relations together.
 420  * At least one implementation path must be provided for this relation and
 421  * all required sub-relations.
 422  *
 423  * To support loadable plugins that modify planner behavior by changing the
 424  * join searching algorithm, we provide a hook variable that lets a plugin
 425  * replace or supplement this function.  Any such hook must return the same
 426  * final join relation as the standard code would, but it might have a
 427  * different set of implementation paths attached, and only the sub-joinrels
 428  * needed for these paths need have been instantiated.
 429  *
 430  * Note to plugin authors: the functions invoked during standard_join_search()
 431  * modify root->join_rel_list and root->join_rel_hash.  If you want to do more
 432  * than one join-order search, you'll probably need to save and restore the
 433  * original states of those data structures.  See geqo_eval() for an example.
 434  */
 435 RelOptInfo *
 436 standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
 437 {
 438         int                     lev;
 439         RelOptInfo *rel;
 440
 441         /*
 442          * This function cannot be invoked recursively within any one planning
 443          * problem, so join_rel_level[] can't be in use already.
 444          */
 445         Assert(root->join_rel_level == NULL);
 446
 447         /*
 448          * We employ a simple "dynamic programming" algorithm: we first find all
 449          * ways to build joins of two jointree items, then all ways to build joins
 450          * of three items (from two-item joins and single items), then four-item
 451          * joins, and so on until we have considered all ways to join all the
 452          * items into one rel.
 453          *
 454          * root->join_rel_level[j] is a list of all the j-item rels.  Initially we
 455          * set root->join_rel_level[1] to represent all the single-jointree-item
 456          * relations.
 457          */
 458         root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));
 459
 460         root->join_rel_level[1] = initial_rels;
 461
 462         for (lev = 2; lev <= levels_needed; lev++)
 463         {
 464                 ListCell   *lc;
 465
 466                 /*
 467                  * Determine all possible pairs of relations to be joined at this
 468                  * level, and build paths for making each one from every available
 469                  * pair of lower-level relations.
 470                  */
 471                 join_search_one_level(root, lev);
 472
 473                 /*
 474                  * Do cleanup work on each just-processed rel.
 475                  */
 476                 foreach(lc, root->join_rel_level[lev])
 477                 {
 478                         rel = (RelOptInfo *) lfirst(lc);
 479
 480                         /* Find and save the cheapest paths for this rel */
 481                         set_cheapest(rel);
 482
 483 #ifdef OPTIMIZER_DEBUG
 484                         debug_print_rel(root, rel);
 485 #endif
 486                 }
 487         }
 488
 489         /*
 490          * We should have a single rel at the final level.
 491          */
 492         if (root->join_rel_level[levels_needed] == NIL)
 493                 elog(ERROR, "failed to build any %d-way joins", levels_needed);
 494         Assert(list_length(root->join_rel_level[levels_needed]) == 1);
 495
 496         rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);
 497
 498         root->join_rel_level = NULL;
 499
 500         return rel;
 501 }
 502
 503 /*
 504  * join_search_one_level
 505  *        Consider ways to produce join relations containing exactly 'level'
 506  *        jointree items.  (This is one step of the dynamic-programming method
 507  *        embodied in standard_join_search.)  Join rel nodes for each feasible
 508  *        combination of lower-level rels are created and returned in a list.
 509  *        Implementation paths are created for each such joinrel, too.
 510  *
 511  * level: level of rels we want to make this time
 512  * root->join_rel_level[j], 1 <= j < level, is a list of rels containing j items
 513  *
 514  * The result is returned in root->join_rel_level[level].
 515  */
 516 void
 517 join_search_one_level(PlannerInfo *root, int level)
 518 {
 519         List      **joinrels = root->join_rel_level;
 520         ListCell   *r;
 521         int                     k;
 522
 523         Assert(joinrels[level] == NIL);
 524
 525         /* Set join_cur_level so that new joinrels are added to proper list */
 526         root->join_cur_level = level;
 527
 528         /*
 529          * First, consider left-sided and right-sided plans, in which rels of
 530          * exactly level-1 member relations are joined against initial relations.
 531          * We prefer to join using join clauses, but if we find a rel of level-1
 532          * members that has no join clauses, we will generate Cartesian-product
 533          * joins against all initial rels not already contained in it.
 534          */
 535         foreach(r, joinrels[level - 1])
 536         {
 537                 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
 538
 539                 if (old_rel->joininfo != NIL || old_rel->has_eclass_joins ||
 540                         has_join_restriction(root, old_rel))
 541                 {
 542                         /*
 543                          * There are join clauses or join order restrictions relevant to
 544                          * this rel, so consider joins between this rel and (only) those
 545                          * initial rels it is linked to by a clause or restriction.
 546                          *
 547                          * At level 2 this condition is symmetric, so there is no need to
 548                          * look at initial rels before this one in the list; we already
 549                          * considered such joins when we were at the earlier rel.  (The
 550                          * mirror-image joins are handled automatically by make_join_rel.)
 551                          * In later passes (level > 2), we join rels of the previous level
 552                          * to each initial rel they don't already include but have a join
 553                          * clause or restriction with.
 554                          */
 555                         ListCell   *other_rels;
 556
 557                         if (level == 2)         /* consider remaining initial rels */
 558                                 other_rels = lnext(r);
 559                         else    /* consider all initial rels */
 560                                 other_rels = list_head(joinrels[1]);
 561
 562                         make_rels_by_clause_joins(root,
 563                                                                           old_rel,
 564                                                                           other_rels);
 565                 }
 566                 else
 567                 {
 568                         /*
 569                          * Oops, we have a relation that is not joined to any other
 570                          * relation, either directly or by join-order restrictions.
 571                          * Cartesian product time.
 572                          *
 573                          * We consider a cartesian product with each not-already-included
 574                          * initial rel, whether it has other join clauses or not.  At
 575                          * level 2, if there are two or more clauseless initial rels, we
 576                          * will redundantly consider joining them in both directions; but
 577                          * such cases aren't common enough to justify adding complexity to
 578                          * avoid the duplicated effort.
 579                          */
 580                         make_rels_by_clauseless_joins(root,
 581                                                                                   old_rel,
 582                                                                                   list_head(joinrels[1]));
 583                 }
 584         }
 585
 586         /*
 587          * Now, consider "bushy plans" in which relations of k initial rels are
 588          * joined to relations of level-k initial rels, for 2 <= k <= level-2.
 589          *
 590          * We only consider bushy-plan joins for pairs of rels where there is a
 591          * suitable join clause (or join order restriction), in order to avoid
 592          * unreasonable growth of planning time.
 593          */
 594         for (k = 2;; k++)
 595         {
 596                 int                     other_level = level - k;
 597
 598                 /*
 599                  * Since make_join_rel(x, y) handles both x,y and y,x cases, we only
 600                  * need to go as far as the halfway point.
 601                  */
 602                 if (k > other_level)
 603                         break;
 604
 605                 foreach(r, joinrels[k])
 606                 {
 607                         RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
 608                         ListCell   *other_rels;
 609                         ListCell   *r2;
 610
 611                         /*
 612                          * We can ignore relations without join clauses here, unless they
 613                          * participate in join-order restrictions --- then we might have
 614                          * to force a bushy join plan.
 615                          */
 616                         if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins &&
 617                                 !has_join_restriction(root, old_rel))
 618                                 continue;
 619
 620                         if (k == other_level)
 621                                 other_rels = lnext(r);  /* only consider remaining rels */
 622                         else
 623                                 other_rels = list_head(joinrels[other_level]);
 624
 625                         for_each_cell(r2, other_rels)
 626                         {
 627                                 RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2);
 628
 629                                 if (!bms_overlap(old_rel->relids, new_rel->relids))
 630                                 {
 631                                         /*
 632                                          * OK, we can build a rel of the right level from this
 633                                          * pair of rels.  Do so if there is at least one relevant
 634                                          * join clause or join order restriction.
 635                                          */
 636                                         if (have_relevant_joinclause(root, old_rel, new_rel) ||
 637                                                 have_join_order_restriction(root, old_rel, new_rel))
 638                                         {
 639                                                 (void) make_join_rel(root, old_rel, new_rel);
 640                                         }
 641                                 }
 642                         }
 643                 }
 644         }
 645
 646         /*----------
 647          * Last-ditch effort: if we failed to find any usable joins so far, force
 648          * a set of cartesian-product joins to be generated.  This handles the
 649          * special case where all the available rels have join clauses but we
 650          * cannot use any of those clauses yet.  This can only happen when we are
 651          * considering a join sub-problem (a sub-joinlist) and all the rels in the
 652          * sub-problem have only join clauses with rels outside the sub-problem.
 653          * An example is
 654          *
 655          *              SELECT ... FROM a INNER JOIN b ON TRUE, c, d, ...
 656          *              WHERE a.w = c.x and b.y = d.z;
 657          *
 658          * If the "a INNER JOIN b" sub-problem does not get flattened into the
 659          * upper level, we must be willing to make a cartesian join of a and b;
 660          * but the code above will not have done so, because it thought that both
 661          * a and b have joinclauses.  We consider only left-sided and right-sided
 662          * cartesian joins in this case (no bushy).
 663          *----------
 664          */
 665         if (joinrels[level] == NIL)
 666         {
 667                 /*
 668                  * This loop is just like the first one, except we always call
 669                  * make_rels_by_clauseless_joins().
 670                  */
 671                 foreach(r, joinrels[level - 1])
 672                 {
 673                         RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
 674
 675                         make_rels_by_clauseless_joins(root,
 676                                                                                   old_rel,
 677                                                                                   list_head(joinrels[1]));
 678                 }
 679
 680                 /*----------
 681                  * When special joins are involved, there may be no legal way
 682                  * to make an N-way join for some values of N.  For example consider
 683                  *
 684                  * SELECT ... FROM t1 WHERE
 685                  *       x IN (SELECT ... FROM t2,t3 WHERE ...) AND
 686                  *       y IN (SELECT ... FROM t4,t5 WHERE ...)
 687                  *
 688                  * We will flatten this query to a 5-way join problem, but there are
 689                  * no 4-way joins that join_is_legal() will consider legal.  We have
 690                  * to accept failure at level 4 and go on to discover a workable
 691                  * bushy plan at level 5.
 692                  *
 693                  * However, if there are no special joins then join_is_legal() should
 694                  * never fail, and so the following sanity check is useful.
 695                  *----------
 696                  */
 697                 if (joinrels[level] == NIL && root->join_info_list == NIL)
 698                         elog(ERROR, "failed to build any %d-way joins", level);
 699         }
 700 }
 701
 702 /*
 703  * make_rels_by_clause_joins
 704  *        Build joins between the given relation 'old_rel' and other relations
 705  *        that participate in join clauses that 'old_rel' also participates in
 706  *        (or participate in join-order restrictions with it).
 707  *        The join rels are returned in root->join_rel_level[join_cur_level].
 708  *
 709  * Note: at levels above 2 we will generate the same joined relation in
 710  * multiple ways --- for example (a join b) join c is the same RelOptInfo as
 711  * (b join c) join a, though the second case will add a different set of Paths
 712  * to it.  This is the reason for using the join_rel_level mechanism, which
 713  * automatically ensures that each new joinrel is only added to the list once.
 714  *
 715  * 'old_rel' is the relation entry for the relation to be joined
 716  * 'other_rels': the first cell in a linked list containing the other
 717  * rels to be considered for joining
 718  *
 719  * Currently, this is only used with initial rels in other_rels, but it
 720  * will work for joining to joinrels too.
 721  */
 722 static void
 723 make_rels_by_clause_joins(PlannerInfo *root,
 724                                                   RelOptInfo *old_rel,
 725                                                   ListCell *other_rels)
 726 {
 727         ListCell   *l;
 728
 729         for_each_cell(l, other_rels)
 730         {
 731                 RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
 732
 733                 if (!bms_overlap(old_rel->relids, other_rel->relids) &&
 734                         (have_relevant_joinclause(root, old_rel, other_rel) ||
 735                          have_join_order_restriction(root, old_rel, other_rel)))
 736                 {
 737                         (void) make_join_rel(root, old_rel, other_rel);
 738                 }
 739         }
 740 }
 741
 742 /*
 743  * make_rels_by_clauseless_joins
 744  *        Given a relation 'old_rel' and a list of other relations
 745  *        'other_rels', create a join relation between 'old_rel' and each
 746  *        member of 'other_rels' that isn't already included in 'old_rel'.
 747  *        The join rels are returned in root->join_rel_level[join_cur_level].
 748  *
 749  * 'old_rel' is the relation entry for the relation to be joined
 750  * 'other_rels': the first cell of a linked list containing the
 751  * other rels to be considered for joining
 752  *
 753  * Currently, this is only used with initial rels in other_rels, but it would
 754  * work for joining to joinrels too.
 755  */
 756 static void
 757 make_rels_by_clauseless_joins(PlannerInfo *root,
 758                                                           RelOptInfo *old_rel,
 759                                                           ListCell *other_rels)
 760 {
 761         ListCell   *l;
 762
 763         for_each_cell(l, other_rels)
 764         {
 765                 RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
 766
 767                 if (!bms_overlap(other_rel->relids, old_rel->relids))
 768                 {
 769                         (void) make_join_rel(root, old_rel, other_rel);
 770                 }
 771         }
 772 }
 773
 774 /*
 775  * join_is_legal
 776  *         Determine whether a proposed join is legal given the query's
 777  *         join order constraints; and if it is, determine the join type.
 778  *
 779  * Caller must supply not only the two rels, but the union of their relids.
 780  * (We could simplify the API by computing joinrelids locally, but this
 781  * would be redundant work in the normal path through make_join_rel.)
 782  *
 783  * On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
 784  * else it's set to point to the associated SpecialJoinInfo node.  Also,
 785  * *reversed_p is set TRUE if the given relations need to be swapped to
 786  * match the SpecialJoinInfo node.
 787  */
 788 static bool
 789 join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 790                           Relids joinrelids,
 791                           SpecialJoinInfo **sjinfo_p, bool *reversed_p)
 792 {
 793         SpecialJoinInfo *match_sjinfo;
 794         bool            reversed;
 795         bool            unique_ified;
 796         bool            is_valid_inner;
 797         ListCell   *l;
 798
 799         /*
 800          * Ensure output params are set on failure return.      This is just to
 801          * suppress uninitialized-variable warnings from overly anal compilers.
 802          */
 803         *sjinfo_p = NULL;
 804         *reversed_p = false;
 805
 806         /*
 807          * If we have any special joins, the proposed join might be illegal; and
 808          * in any case we have to determine its join type.      Scan the join info
 809          * list for conflicts.
 810          */
 811         match_sjinfo = NULL;
 812         reversed = false;
 813         unique_ified = false;
 814         is_valid_inner = true;
 815
 816         foreach(l, root->join_info_list)
 817         {
 818                 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
 819
 820                 /*
 821                  * This special join is not relevant unless its RHS overlaps the
 822                  * proposed join.  (Check this first as a fast path for dismissing
 823                  * most irrelevant SJs quickly.)
 824                  */
 825                 if (!bms_overlap(sjinfo->min_righthand, joinrelids))
 826                         continue;
 827
 828                 /*
 829                  * Also, not relevant if proposed join is fully contained within RHS
 830                  * (ie, we're still building up the RHS).
 831                  */
 832                 if (bms_is_subset(joinrelids, sjinfo->min_righthand))
 833                         continue;
 834
 835                 /*
 836                  * Also, not relevant if SJ is already done within either input.
 837                  */
 838                 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
 839                         bms_is_subset(sjinfo->min_righthand, rel1->relids))
 840                         continue;
 841                 if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
 842                         bms_is_subset(sjinfo->min_righthand, rel2->relids))
 843                         continue;
 844
 845                 /*
 846                  * If it's a semijoin and we already joined the RHS to any other rels
 847                  * within either input, then we must have unique-ified the RHS at that
 848                  * point (see below).  Therefore the semijoin is no longer relevant in
 849                  * this join path.
 850                  */
 851                 if (sjinfo->jointype == JOIN_SEMI)
 852                 {
 853                         if (bms_is_subset(sjinfo->syn_righthand, rel1->relids) &&
 854                                 !bms_equal(sjinfo->syn_righthand, rel1->relids))
 855                                 continue;
 856                         if (bms_is_subset(sjinfo->syn_righthand, rel2->relids) &&
 857                                 !bms_equal(sjinfo->syn_righthand, rel2->relids))
 858                                 continue;
 859                 }
 860
 861                 /*
 862                  * If one input contains min_lefthand and the other contains
 863                  * min_righthand, then we can perform the SJ at this join.
 864                  *
 865                  * Barf if we get matches to more than one SJ (is that possible?)
 866                  */
 867                 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
 868                         bms_is_subset(sjinfo->min_righthand, rel2->relids))
 869                 {
 870                         if (match_sjinfo)
 871                                 return false;   /* invalid join path */
 872                         match_sjinfo = sjinfo;
 873                         reversed = false;
 874                 }
 875                 else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
 876                                  bms_is_subset(sjinfo->min_righthand, rel1->relids))
 877                 {
 878                         if (match_sjinfo)
 879                                 return false;   /* invalid join path */
 880                         match_sjinfo = sjinfo;
 881                         reversed = true;
 882                 }
 883                 else if (sjinfo->jointype == JOIN_SEMI &&
 884                                  bms_equal(sjinfo->syn_righthand, rel2->relids) &&
 885                                  create_unique_path(root, rel2, rel2->cheapest_total_path,
 886                                                                         sjinfo) != NULL)
 887                 {
 888                         /*----------
 889                          * For a semijoin, we can join the RHS to anything else by
 890                          * unique-ifying the RHS (if the RHS can be unique-ified).
 891                          * We will only get here if we have the full RHS but less
 892                          * than min_lefthand on the LHS.
 893                          *
 894                          * The reason to consider such a join path is exemplified by
 895                          *      SELECT ... FROM a,b WHERE (a.x,b.y) IN (SELECT c1,c2 FROM c)
 896                          * If we insist on doing this as a semijoin we will first have
 897                          * to form the cartesian product of A*B.  But if we unique-ify
 898                          * C then the semijoin becomes a plain innerjoin and we can join
 899                          * in any order, eg C to A and then to B.  When C is much smaller
 900                          * than A and B this can be a huge win.  So we allow C to be
 901                          * joined to just A or just B here, and then make_join_rel has
 902                          * to handle the case properly.
 903                          *
 904                          * Note that actually we'll allow unique-ified C to be joined to
 905                          * some other relation D here, too.  That is legal, if usually not
 906                          * very sane, and this routine is only concerned with legality not
 907                          * with whether the join is good strategy.
 908                          *----------
 909                          */
 910                         if (match_sjinfo)
 911                                 return false;   /* invalid join path */
 912                         match_sjinfo = sjinfo;
 913                         reversed = false;
 914                         unique_ified = true;
 915                 }
 916                 else if (sjinfo->jointype == JOIN_SEMI &&
 917                                  bms_equal(sjinfo->syn_righthand, rel1->relids) &&
 918                                  create_unique_path(root, rel1, rel1->cheapest_total_path,
 919                                                                         sjinfo) != NULL)
 920                 {
 921                         /* Reversed semijoin case */
 922                         if (match_sjinfo)
 923                                 return false;   /* invalid join path */
 924                         match_sjinfo = sjinfo;
 925                         reversed = true;
 926                         unique_ified = true;
 927                 }
 928                 else
 929                 {
 930                         /*----------
 931                          * Otherwise, the proposed join overlaps the RHS but isn't
 932                          * a valid implementation of this SJ.  It might still be
 933                          * a legal join, however.  If both inputs overlap the RHS,
 934                          * assume that it's OK.  Since the inputs presumably got past
 935                          * this function's checks previously, they can't overlap the
 936                          * LHS and their violations of the RHS boundary must represent
 937                          * SJs that have been determined to commute with this one.
 938                          * We have to allow this to work correctly in cases like
 939                          *              (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
 940                          * when the c/d join has been determined to commute with the join
 941                          * to a, and hence d is not part of min_righthand for the upper
 942                          * join.  It should be legal to join b to c/d but this will appear
 943                          * as a violation of the upper join's RHS.
 944                          * Furthermore, if one input overlaps the RHS and the other does
 945                          * not, we should still allow the join if it is a valid
 946                          * implementation of some other SJ.  We have to allow this to
 947                          * support the associative identity
 948                          *              (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
 949                          * since joining B directly to C violates the lower SJ's RHS.
 950                          * We assume that make_outerjoininfo() set things up correctly
 951                          * so that we'll only match to some SJ if the join is valid.
 952                          * Set flag here to check at bottom of loop.
 953                          *----------
 954                          */
 955                         if (sjinfo->jointype != JOIN_SEMI &&
 956                                 bms_overlap(rel1->relids, sjinfo->min_righthand) &&
 957                                 bms_overlap(rel2->relids, sjinfo->min_righthand))
 958                         {
 959                                 /* seems OK */
 960                                 Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
 961                         }
 962                         else
 963                                 is_valid_inner = false;
 964                 }
 965         }
 966
 967         /*
 968          * Fail if violated some SJ's RHS and didn't match to another SJ. However,
 969          * "matching" to a semijoin we are implementing by unique-ification
 970          * doesn't count (think: it's really an inner join).
 971          */
 972         if (!is_valid_inner &&
 973                 (match_sjinfo == NULL || unique_ified))
 974                 return false;                   /* invalid join path */
 975
 976         /* Otherwise, it's a valid join */
 977         *sjinfo_p = match_sjinfo;
 978         *reversed_p = reversed;
 979         return true;
 980 }
 981
 982 /*
 983  * has_join_restriction
 984  *              Detect whether the specified relation has join-order restrictions
 985  *              due to being inside an outer join or an IN (sub-SELECT).
 986  *
 987  * Essentially, this tests whether have_join_order_restriction() could
 988  * succeed with this rel and some other one.  It's OK if we sometimes
 989  * say "true" incorrectly.      (Therefore, we don't bother with the relatively
 990  * expensive has_legal_joinclause test.)
 991  */
 992 static bool
 993 has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
 994 {
 995         ListCell   *l;
 996
 997         foreach(l, root->join_info_list)
 998         {
 999                 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
1000
1001                 /* ignore full joins --- other mechanisms preserve their ordering */
1002                 if (sjinfo->jointype == JOIN_FULL)
1003                         continue;
1004
1005                 /* ignore if SJ is already contained in rel */
1006                 if (bms_is_subset(sjinfo->min_lefthand, rel->relids) &&
1007                         bms_is_subset(sjinfo->min_righthand, rel->relids))
1008                         continue;
1009
1010                 /* restricted if it overlaps LHS or RHS, but doesn't contain SJ */
1011                 if (bms_overlap(sjinfo->min_lefthand, rel->relids) ||
1012                         bms_overlap(sjinfo->min_righthand, rel->relids))
1013                         return true;
1014         }
1015
1016         return false;
1017 }
1018
1019 /*
1020  * is_dummy_rel --- has relation been proven empty?
1021  */
1022 static bool
1023 is_dummy_rel(RelOptInfo *rel)
1024 {
1025         return IS_DUMMY_REL(rel);
1026 }
1027
1028 /*
1029  * Mark a relation as proven empty.
1030  *
1031  * During GEQO planning, this can get invoked more than once on the same
1032  * baserel struct, so it's worth checking to see if the rel is already marked
1033  * dummy.
1034  *
1035  * Also, when called during GEQO join planning, we are in a short-lived
1036  * memory context.      We must make sure that the dummy path attached to a
1037  * baserel survives the GEQO cycle, else the baserel is trashed for future
1038  * GEQO cycles.  On the other hand, when we are marking a joinrel during GEQO,
1039  * we don't want the dummy path to clutter the main planning context.  Upshot
1040  * is that the best solution is to explicitly make the dummy path in the same
1041  * context the given RelOptInfo is in.
1042  */
1043 static void
1044 mark_dummy_rel(RelOptInfo *rel)
1045 {
1046         MemoryContext oldcontext;
1047
1048         /* Already marked? */
1049         if (is_dummy_rel(rel))
1050                 return;
1051
1052         /* No, so choose correct context to make the dummy path in */
1053         oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
1054
1055         /* Set dummy size estimate */
1056         rel->rows = 0;
1057
1058         /* Evict any previously chosen paths */
1059         rel->pathlist = NIL;
1060
1061         /* Set up the dummy path */
1062         add_path(rel, (Path *) create_append_path(rel, NIL, NULL));
1063
1064         /* Set or update cheapest_total_path and related fields */
1065         set_cheapest(rel);
1066
1067         MemoryContextSwitchTo(oldcontext);
1068 }
1069
1070 /*
1071  * restriction_is_constant_false --- is a restrictlist just FALSE?
1072  *
1073  * In cases where a qual is provably constant FALSE, eval_const_expressions
1074  * will generally have thrown away anything that's ANDed with it.  In outer
1075  * join situations this will leave us computing cartesian products only to
1076  * decide there's no match for an outer row, which is pretty stupid.  So,
1077  * we need to detect the case.
1078  *
1079  * If only_pushed_down is TRUE, then consider only pushed-down quals.
1080  */
1081 static bool
1082 restriction_is_constant_false(List *restrictlist, bool only_pushed_down)
1083 {
1084         ListCell   *lc;
1085
1086         /*
1087          * Despite the above comment, the restriction list we see here might
1088          * possibly have other members besides the FALSE constant, since other
1089          * quals could get "pushed down" to the outer join level.  So we check
1090          * each member of the list.
1091          */
1092         foreach(lc, restrictlist)
1093         {
1094                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1095
1096                 Assert(IsA(rinfo, RestrictInfo));
1097                 if (only_pushed_down && !rinfo->is_pushed_down)
1098                         continue;
1099
1100                 if (rinfo->clause && IsA(rinfo->clause, Const))
1101                 {
1102                         Const      *con = (Const *) rinfo->clause;
1103
1104                         /* constant NULL is as good as constant FALSE for our purposes */
1105                         if (con->constisnull)
1106                                 return true;
1107                         if (!DatumGetBool(con->constvalue))
1108                                 return true;
1109                 }
1110         }
1111         return false;
1112 }