OSDN Git Service

Restore current hint state when returned from non-hinted query planning.
[pghintplan/pg_hint_plan.git] / core.c
diff --git a/core.c b/core.c
index af4f7f4..dc5a588 100644 (file)
--- a/core.c
+++ b/core.c
@@ -5,8 +5,9 @@
  *
  * src/backend/optimizer/path/allpaths.c
  *     set_append_rel_pathlist()
+ *     generate_mergeappend_paths()
+ *     get_cheapest_parameterized_child_path()
  *     accumulate_append_subpath()
- *     set_dummy_rel_pathlist()
  *     standard_join_search()
  *
  * src/backend/optimizer/path/joinrels.c
@@ -19,7 +20,8 @@
  *     mark_dummy_rel()
  *     restriction_is_constant_false()
  *
- * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *-------------------------------------------------------------------------
 /*
  * set_append_rel_pathlist
  *       Build access paths for an "append relation"
- *
- * The passed-in rel and RTE represent the entire append relation.     The
- * relation's contents are computed by appending together the output of
- * the individual member relations.  Note that in the inheritance case,
- * the first member relation is actually the same table as is mentioned in
- * the parent RTE ... but it has a different RTE and RelOptInfo.  This is
- * a good thing because their outputs are not the same size.
  */
 static void
 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -43,35 +38,16 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
        int                     parentRTindex = rti;
        List       *live_childrels = NIL;
        List       *subpaths = NIL;
+       bool            subpaths_valid = true;
        List       *all_child_pathkeys = NIL;
-       double          parent_rows;
-       double          parent_size;
-       double     *parent_attrsizes;
-       int                     nattrs;
+       List       *all_child_outers = NIL;
        ListCell   *l;
 
        /*
-        * Initialize to compute size estimates for whole append relation.
-        *
-        * We handle width estimates by weighting the widths of different child
-        * rels proportionally to their number of rows.  This is sensible because
-        * the use of width estimates is mainly to compute the total relation
-        * "footprint" if we have to sort or hash it.  To do this, we sum the
-        * total equivalent size (in "double" arithmetic) and then divide by the
-        * total rowcount estimate.  This is done separately for the total rel
-        * width and each attribute.
-        *
-        * Note: if you consider changing this logic, beware that child rels could
-        * have zero rows and/or width, if they were excluded by constraints.
-        */
-       parent_rows = 0;
-       parent_size = 0;
-       nattrs = rel->max_attr - rel->min_attr + 1;
-       parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
-
-       /*
-        * Generate access paths for each member relation, and pick the cheapest
-        * path for each one.
+        * Generate access paths for each member relation, and remember the
+        * cheapest path for each one.  Also, identify all pathkeys (orderings)
+        * and parameterizations (required_outer sets) available for the member
+        * relations.
         */
        foreach(l, root->append_rel_list)
        {
@@ -79,238 +55,202 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                int                     childRTindex;
                RangeTblEntry *childRTE;
                RelOptInfo *childrel;
-               List       *childquals;
-               Node       *childqual;
                ListCell   *lcp;
-               ListCell   *parentvars;
-               ListCell   *childvars;
 
                /* append_rel_list contains all append rels; ignore others */
                if (appinfo->parent_relid != parentRTindex)
                        continue;
 
+               /* Re-locate the child RTE and RelOptInfo */
                childRTindex = appinfo->child_relid;
                childRTE = root->simple_rte_array[childRTindex];
+               childrel = root->simple_rel_array[childRTindex];
 
                /*
-                * The child rel's RelOptInfo was already created during
-                * add_base_rels_to_query.
+                * Compute the child's access paths.
                 */
-               childrel = find_base_rel(root, childRTindex);
-               Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
+               set_rel_pathlist(root, childrel, childRTindex, childRTE);
 
                /*
-                * We have to copy the parent's targetlist and quals to the child,
-                * with appropriate substitution of variables.  However, only the
-                * baserestrictinfo quals are needed before we can check for
-                * constraint exclusion; so do that first and then check to see if we
-                * can disregard this child.
-                *
-                * As of 8.4, the child rel's targetlist might contain non-Var
-                * expressions, which means that substitution into the quals could
-                * produce opportunities for const-simplification, and perhaps even
-                * pseudoconstant quals.  To deal with this, we strip the RestrictInfo
-                * nodes, do the substitution, do const-simplification, and then
-                * reconstitute the RestrictInfo layer.
+                * If child is dummy, ignore it.
                 */
-               childquals = get_all_actual_clauses(rel->baserestrictinfo);
-               childquals = (List *) adjust_appendrel_attrs((Node *) childquals,
-                                                                                                        appinfo);
-               childqual = eval_const_expressions(root, (Node *)
-                                                                                  make_ands_explicit(childquals));
-               if (childqual && IsA(childqual, Const) &&
-                       (((Const *) childqual)->constisnull ||
-                        !DatumGetBool(((Const *) childqual)->constvalue)))
-               {
-                       /*
-                        * Restriction reduces to constant FALSE or constant NULL after
-                        * substitution, so this child need not be scanned.
-                        */
-                       set_dummy_rel_pathlist(childrel);
-                       continue;
-               }
-               childquals = make_ands_implicit((Expr *) childqual);
-               childquals = make_restrictinfos_from_actual_clauses(root,
-                                                                                                                       childquals);
-               childrel->baserestrictinfo = childquals;
-
-               if (relation_excluded_by_constraints(root, childrel, childRTE))
-               {
-                       /*
-                        * This child need not be scanned, so we can omit it from the
-                        * appendrel.  Mark it with a dummy cheapest-path though, in case
-                        * best_appendrel_indexscan() looks at it later.
-                        */
-                       set_dummy_rel_pathlist(childrel);
+               if (IS_DUMMY_REL(childrel))
                        continue;
-               }
-
-               /*
-                * CE failed, so finish copying/modifying targetlist and join quals.
-                *
-                * Note: the resulting childrel->reltargetlist may contain arbitrary
-                * expressions, which normally would not occur in a reltargetlist.
-                * That is okay because nothing outside of this routine will look at
-                * the child rel's reltargetlist.  We do have to cope with the case
-                * while constructing attr_widths estimates below, though.
-                */
-               childrel->joininfo = (List *)
-                       adjust_appendrel_attrs((Node *) rel->joininfo,
-                                                                  appinfo);
-               childrel->reltargetlist = (List *)
-                       adjust_appendrel_attrs((Node *) rel->reltargetlist,
-                                                                  appinfo);
 
                /*
-                * We have to make child entries in the EquivalenceClass data
-                * structures as well.  This is needed either if the parent
-                * participates in some eclass joins (because we will want to consider
-                * inner-indexscan joins on the individual children) or if the parent
-                * has useful pathkeys (because we should try to build MergeAppend
-                * paths that produce those sort orderings).
+                * Child is live, so add it to the live_childrels list for use below.
                 */
-               if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
-                       add_child_rel_equivalences(root, appinfo, rel, childrel);
-               childrel->has_eclass_joins = rel->has_eclass_joins;
-
-               /*
-                * Note: we could compute appropriate attr_needed data for the child's
-                * variables, by transforming the parent's attr_needed through the
-                * translated_vars mapping.  However, currently there's no need
-                * because attr_needed is only examined for base relations not
-                * otherrels.  So we just leave the child's attr_needed empty.
-                */
-
-               /* Remember which childrels are live, for MergeAppend logic below */
                live_childrels = lappend(live_childrels, childrel);
 
                /*
-                * Compute the child's access paths, and add the cheapest one to the
-                * Append path we are constructing for the parent.
+                * If child has an unparameterized cheapest-total path, add that to
+                * the unparameterized Append path we are constructing for the parent.
+                * If not, there's no workable unparameterized path.
                 */
-               set_rel_pathlist(root, childrel, childRTindex, childRTE);
-
-               subpaths = accumulate_append_subpath(subpaths,
-                                                                                        childrel->cheapest_total_path);
+               if (childrel->cheapest_total_path->param_info == NULL)
+                       subpaths = accumulate_append_subpath(subpaths,
+                                                                                         childrel->cheapest_total_path);
+               else
+                       subpaths_valid = false;
 
                /*
-                * Collect a list of all the available path orderings for all the
-                * children.  We use this as a heuristic to indicate which sort
-                * orderings we should build MergeAppend paths for.
+                * Collect lists of all the available path orderings and
+                * parameterizations for all the children.  We use these as a
+                * heuristic to indicate which sort orderings and parameterizations we
+                * should build Append and MergeAppend paths for.
                 */
                foreach(lcp, childrel->pathlist)
                {
                        Path       *childpath = (Path *) lfirst(lcp);
                        List       *childkeys = childpath->pathkeys;
-                       ListCell   *lpk;
-                       bool            found = false;
-
-                       /* Ignore unsorted paths */
-                       if (childkeys == NIL)
-                               continue;
+                       Relids          childouter = PATH_REQ_OUTER(childpath);
 
-                       /* Have we already seen this ordering? */
-                       foreach(lpk, all_child_pathkeys)
+                       /* Unsorted paths don't contribute to pathkey list */
+                       if (childkeys != NIL)
                        {
-                               List       *existing_pathkeys = (List *) lfirst(lpk);
+                               ListCell   *lpk;
+                               bool            found = false;
 
-                               if (compare_pathkeys(existing_pathkeys,
-                                                                        childkeys) == PATHKEYS_EQUAL)
+                               /* Have we already seen this ordering? */
+                               foreach(lpk, all_child_pathkeys)
                                {
-                                       found = true;
-                                       break;
+                                       List       *existing_pathkeys = (List *) lfirst(lpk);
+
+                                       if (compare_pathkeys(existing_pathkeys,
+                                                                                childkeys) == PATHKEYS_EQUAL)
+                                       {
+                                               found = true;
+                                               break;
+                                       }
+                               }
+                               if (!found)
+                               {
+                                       /* No, so add it to all_child_pathkeys */
+                                       all_child_pathkeys = lappend(all_child_pathkeys,
+                                                                                                childkeys);
                                }
                        }
-                       if (!found)
-                       {
-                               /* No, so add it to all_child_pathkeys */
-                               all_child_pathkeys = lappend(all_child_pathkeys, childkeys);
-                       }
-               }
 
-               /*
-                * Accumulate size information from each child.
-                */
-               if (childrel->rows > 0)
-               {
-                       parent_rows += childrel->rows;
-                       parent_size += childrel->width * childrel->rows;
-
-                       /*
-                        * Accumulate per-column estimates too.  We need not do anything
-                        * for PlaceHolderVars in the parent list.  If child expression
-                        * isn't a Var, or we didn't record a width estimate for it, we
-                        * have to fall back on a datatype-based estimate.
-                        *
-                        * By construction, child's reltargetlist is 1-to-1 with parent's.
-                        */
-                       forboth(parentvars, rel->reltargetlist,
-                                       childvars, childrel->reltargetlist)
+                       /* Unparameterized paths don't contribute to param-set list */
+                       if (childouter)
                        {
-                               Var                *parentvar = (Var *) lfirst(parentvars);
-                               Node       *childvar = (Node *) lfirst(childvars);
+                               ListCell   *lco;
+                               bool            found = false;
 
-                               if (IsA(parentvar, Var))
+                               /* Have we already seen this param set? */
+                               foreach(lco, all_child_outers)
                                {
-                                       int                     pndx = parentvar->varattno - rel->min_attr;
-                                       int32           child_width = 0;
+                                       Relids          existing_outers = (Relids) lfirst(lco);
 
-                                       if (IsA(childvar, Var))
+                                       if (bms_equal(existing_outers, childouter))
                                        {
-                                               int             cndx = ((Var *) childvar)->varattno - childrel->min_attr;
-
-                                               child_width = childrel->attr_widths[cndx];
+                                               found = true;
+                                               break;
                                        }
-                                       if (child_width <= 0)
-                                               child_width = get_typavgwidth(exprType(childvar),
-                                                                                                         exprTypmod(childvar));
-                                       Assert(child_width > 0);
-                                       parent_attrsizes[pndx] += child_width * childrel->rows;
+                               }
+                               if (!found)
+                               {
+                                       /* No, so add it to all_child_outers */
+                                       all_child_outers = lappend(all_child_outers,
+                                                                                          childouter);
                                }
                        }
                }
        }
 
        /*
-        * Save the finished size estimates.
+        * If we found unparameterized paths for all children, build an unordered,
+        * unparameterized Append path for the rel.  (Note: this is correct even
+        * if we have zero or one live subpath due to constraint exclusion.)
         */
-       rel->rows = parent_rows;
-       if (parent_rows > 0)
-       {
-               int                     i;
-
-               rel->width = rint(parent_size / parent_rows);
-               for (i = 0; i < nattrs; i++)
-                       rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
-       }
-       else
-               rel->width = 0;                 /* attr_widths should be zero already */
+       if (subpaths_valid)
+               add_path(rel, (Path *) create_append_path(rel, subpaths, NULL));
 
        /*
-        * Set "raw tuples" count equal to "rows" for the appendrel; needed
-        * because some places assume rel->tuples is valid for any baserel.
+        * Also build unparameterized MergeAppend paths based on the collected
+        * list of child pathkeys.
         */
-       rel->tuples = parent_rows;
-
-       pfree(parent_attrsizes);
+       if (subpaths_valid)
+               generate_mergeappend_paths(root, rel, live_childrels,
+                                                                  all_child_pathkeys);
 
        /*
-        * Next, build an unordered Append path for the rel.  (Note: this is
-        * correct even if we have zero or one live subpath due to constraint
-        * exclusion.)
+        * Build Append paths for each parameterization seen among the child rels.
+        * (This may look pretty expensive, but in most cases of practical
+        * interest, the child rels will expose mostly the same parameterizations,
+        * so that not that many cases actually get considered here.)
+        *
+        * The Append node itself cannot enforce quals, so all qual checking must
+        * be done in the child paths.  This means that to have a parameterized
+        * Append path, we must have the exact same parameterization for each
+        * child path; otherwise some children might be failing to check the
+        * moved-down quals.  To make them match up, we can try to increase the
+        * parameterization of lesser-parameterized paths.
         */
-       add_path(rel, (Path *) create_append_path(rel, subpaths));
+       foreach(l, all_child_outers)
+       {
+               Relids          required_outer = (Relids) lfirst(l);
+               ListCell   *lcr;
 
-       /*
-        * Next, build MergeAppend paths based on the collected list of child
-        * pathkeys.  We consider both cheapest-startup and cheapest-total cases,
-        * ie, for each interesting ordering, collect all the cheapest startup
-        * subpaths and all the cheapest total paths, and build a MergeAppend path
-        * for each list.
-        */
-       foreach(l, all_child_pathkeys)
+               /* Select the child paths for an Append with this parameterization */
+               subpaths = NIL;
+               subpaths_valid = true;
+               foreach(lcr, live_childrels)
+               {
+                       RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
+                       Path       *subpath;
+
+                       subpath = get_cheapest_parameterized_child_path(root,
+                                                                                                                       childrel,
+                                                                                                                       required_outer);
+                       if (subpath == NULL)
+                       {
+                               /* failed to make a suitable path for this child */
+                               subpaths_valid = false;
+                               break;
+                       }
+                       subpaths = accumulate_append_subpath(subpaths, subpath);
+               }
+
+               if (subpaths_valid)
+                       add_path(rel, (Path *)
+                                        create_append_path(rel, subpaths, required_outer));
+       }
+}
+
+/*
+ * generate_mergeappend_paths
+ *             Generate MergeAppend paths for an append relation
+ *
+ * Generate a path for each ordering (pathkey list) appearing in
+ * all_child_pathkeys.
+ *
+ * We consider both cheapest-startup and cheapest-total cases, ie, for each
+ * interesting ordering, collect all the cheapest startup subpaths and all the
+ * cheapest total paths, and build a MergeAppend path for each case.
+ *
+ * We don't currently generate any parameterized MergeAppend paths.  While
+ * it would not take much more code here to do so, it's very unclear that it
+ * is worth the planning cycles to investigate such paths: there's little
+ * use for an ordered path on the inside of a nestloop.  In fact, it's likely
+ * that the current coding of add_path would reject such paths out of hand,
+ * because add_path gives no credit for sort ordering of parameterized paths,
+ * and a parameterized MergeAppend is going to be more expensive than the
+ * corresponding parameterized Append path.  If we ever try harder to support
+ * parameterized mergejoin plans, it might be worth adding support for
+ * parameterized MergeAppends to feed such joins.  (See notes in
+ * optimizer/README for why that might not ever happen, though.)
+ */
+static void
+generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
+                                                  List *live_childrels,
+                                                  List *all_child_pathkeys)
+{
+       ListCell   *lcp;
+
+       foreach(lcp, all_child_pathkeys)
        {
-               List       *pathkeys = (List *) lfirst(l);
+               List       *pathkeys = (List *) lfirst(lcp);
                List       *startup_subpaths = NIL;
                List       *total_subpaths = NIL;
                bool            startup_neq_total = false;
@@ -327,20 +267,25 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                        cheapest_startup =
                                get_cheapest_path_for_pathkeys(childrel->pathlist,
                                                                                           pathkeys,
+                                                                                          NULL,
                                                                                           STARTUP_COST);
                        cheapest_total =
                                get_cheapest_path_for_pathkeys(childrel->pathlist,
                                                                                           pathkeys,
+                                                                                          NULL,
                                                                                           TOTAL_COST);
 
                        /*
-                        * If we can't find any paths with the right order just add the
-                        * cheapest-total path; we'll have to sort it.
+                        * If we can't find any paths with the right order just use the
+                        * cheapest-total path; we'll have to sort it later.
                         */
-                       if (cheapest_startup == NULL)
-                               cheapest_startup = childrel->cheapest_total_path;
-                       if (cheapest_total == NULL)
-                               cheapest_total = childrel->cheapest_total_path;
+                       if (cheapest_startup == NULL || cheapest_total == NULL)
+                       {
+                               cheapest_startup = cheapest_total =
+                                       childrel->cheapest_total_path;
+                               /* Assert we do have an unparameterized path for this child */
+                               Assert(cheapest_total->param_info == NULL);
+                       }
 
                        /*
                         * Notice whether we actually have different paths for the
@@ -360,26 +305,103 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                add_path(rel, (Path *) create_merge_append_path(root,
                                                                                                                rel,
                                                                                                                startup_subpaths,
-                                                                                                               pathkeys));
+                                                                                                               pathkeys,
+                                                                                                               NULL));
                if (startup_neq_total)
                        add_path(rel, (Path *) create_merge_append_path(root,
                                                                                                                        rel,
                                                                                                                        total_subpaths,
-                                                                                                                       pathkeys));
+                                                                                                                       pathkeys,
+                                                                                                                       NULL));
        }
+}
 
-       /* Select cheapest path */
-       set_cheapest(rel);
+/*
+ * get_cheapest_parameterized_child_path
+ *             Get cheapest path for this relation that has exactly the requested
+ *             parameterization.
+ *
+ * Returns NULL if unable to create such a path.
+ */
+static Path *
+get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
+                                                                         Relids required_outer)
+{
+       Path       *cheapest;
+       ListCell   *lc;
+
+       /*
+        * Look up the cheapest existing path with no more than the needed
+        * parameterization.  If it has exactly the needed parameterization, we're
+        * done.
+        */
+       cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
+                                                                                         NIL,
+                                                                                         required_outer,
+                                                                                         TOTAL_COST);
+       Assert(cheapest != NULL);
+       if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
+               return cheapest;
+
+       /*
+        * Otherwise, we can "reparameterize" an existing path to match the given
+        * parameterization, which effectively means pushing down additional
+        * joinquals to be checked within the path's scan.  However, some existing
+        * paths might check the available joinquals already while others don't;
+        * therefore, it's not clear which existing path will be cheapest after
+        * reparameterization.  We have to go through them all and find out.
+        */
+       cheapest = NULL;
+       foreach(lc, rel->pathlist)
+       {
+               Path       *path = (Path *) lfirst(lc);
+
+               /* Can't use it if it needs more than requested parameterization */
+               if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
+                       continue;
+
+               /*
+                * Reparameterization can only increase the path's cost, so if it's
+                * already more expensive than the current cheapest, forget it.
+                */
+               if (cheapest != NULL &&
+                       compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
+                       continue;
+
+               /* Reparameterize if needed, then recheck cost */
+               if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
+               {
+                       path = reparameterize_path(root, path, required_outer, 1.0);
+                       if (path == NULL)
+                               continue;               /* failed to reparameterize this one */
+                       Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
+
+                       if (cheapest != NULL &&
+                               compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
+                               continue;
+               }
+
+               /* We have a new best path */
+               cheapest = path;
+       }
+
+       /* Return the best path, or NULL if we found no suitable candidate */
+       return cheapest;
 }
 
 /*
  * accumulate_append_subpath
  *             Add a subpath to the list being built for an Append or MergeAppend
  *
- * It's possible that the child is itself an Append path, in which case
- * we can "cut out the middleman" and just add its child paths to our
- * own list.  (We don't try to do this earlier because we need to
- * apply both levels of transformation to the quals.)
+ * It's possible that the child is itself an Append or MergeAppend path, in
+ * which case we can "cut out the middleman" and just add its child paths to
+ * our own list.  (We don't try to do this earlier because we need to apply
+ * both levels of transformation to the quals.)
+ *
+ * Note that if we omit a child MergeAppend in this way, we are effectively
+ * omitting a sort step, which seems fine: if the parent is to be an Append,
+ * its result would be unsorted anyway, while if the parent is to be a
+ * MergeAppend, there's no point in a separate sort on a child.
  */
 static List *
 accumulate_append_subpath(List *subpaths, Path *path)
@@ -391,31 +413,18 @@ accumulate_append_subpath(List *subpaths, Path *path)
                /* list_copy is important here to avoid sharing list substructure */
                return list_concat(subpaths, list_copy(apath->subpaths));
        }
+       else if (IsA(path, MergeAppendPath))
+       {
+               MergeAppendPath *mpath = (MergeAppendPath *) path;
+
+               /* list_copy is important here to avoid sharing list substructure */
+               return list_concat(subpaths, list_copy(mpath->subpaths));
+       }
        else
                return lappend(subpaths, path);
 }
 
 /*
- * set_dummy_rel_pathlist
- *       Build a dummy path for a relation that's been excluded by constraints
- *
- * Rather than inventing a special "dummy" path type, we represent this as an
- * AppendPath with no members (see also IS_DUMMY_PATH macro).
- */
-static void
-set_dummy_rel_pathlist(RelOptInfo *rel)
-{
-       /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
-       rel->rows = 0;
-       rel->width = 0;
-
-       add_path(rel, (Path *) create_append_path(rel, NIL));
-
-       /* Select cheapest path (pretty easy in this case...) */
-       set_cheapest(rel);
-}
-
-/*
  * standard_join_search
  *       Find possible joinpaths for a query by successively finding ways
  *       to join component relations into join relations.
@@ -424,7 +433,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel)
  *             independent jointree items in the query.  This is > 1.
  *
  * 'initial_rels' is a list of RelOptInfo nodes for each independent
- *             jointree item.  These are the components to be joined together.
+ *             jointree item.  These are the components to be joined together.
  *             Note that levels_needed == list_length(initial_rels).
  *
  * Returns the final level of join relations, i.e., the relation that is
@@ -440,7 +449,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel)
  * needed for these paths need have been instantiated.
  *
  * Note to plugin authors: the functions invoked during standard_join_search()
- * modify root->join_rel_list and root->join_rel_hash. If you want to do more
+ * modify root->join_rel_list and root->join_rel_hash.  If you want to do more
  * than one join-order search, you'll probably need to save and restore the
  * original states of those data structures.  See geqo_eval() for an example.
  */
@@ -543,37 +552,34 @@ join_search_one_level(PlannerInfo *root, int level)
         * We prefer to join using join clauses, but if we find a rel of level-1
         * members that has no join clauses, we will generate Cartesian-product
         * joins against all initial rels not already contained in it.
-        *
-        * In the first pass (level == 2), we try to join each initial rel to each
-        * initial rel that appears later in joinrels[1].  (The mirror-image joins
-        * are handled automatically by make_join_rel.)  In later passes, we try
-        * to join rels of size level-1 from joinrels[level-1] to each initial rel
-        * in joinrels[1].
         */
        foreach(r, joinrels[level - 1])
        {
                RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
-               ListCell   *other_rels;
-
-               if (level == 2)
-                       other_rels = lnext(r);          /* only consider remaining initial
-                                                                                * rels */
-               else
-                       other_rels = list_head(joinrels[1]);            /* consider all initial
-                                                                                                                * rels */
 
                if (old_rel->joininfo != NIL || old_rel->has_eclass_joins ||
                        has_join_restriction(root, old_rel))
                {
                        /*
-                        * Note that if all available join clauses for this rel require
-                        * more than one other rel, we will fail to make any joins against
-                        * it here.  In most cases that's OK; it'll be considered by
-                        * "bushy plan" join code in a higher-level pass where we have
-                        * those other rels collected into a join rel.
+                        * There are join clauses or join order restrictions relevant to
+                        * this rel, so consider joins between this rel and (only) those
+                        * initial rels it is linked to by a clause or restriction.
                         *
-                        * See also the last-ditch case below.
+                        * At level 2 this condition is symmetric, so there is no need to
+                        * look at initial rels before this one in the list; we already
+                        * considered such joins when we were at the earlier rel.  (The
+                        * mirror-image joins are handled automatically by make_join_rel.)
+                        * In later passes (level > 2), we join rels of the previous level
+                        * to each initial rel they don't already include but have a join
+                        * clause or restriction with.
                         */
+                       ListCell   *other_rels;
+
+                       if (level == 2)         /* consider remaining initial rels */
+                               other_rels = lnext(r);
+                       else    /* consider all initial rels */
+                               other_rels = list_head(joinrels[1]);
+
                        make_rels_by_clause_joins(root,
                                                                          old_rel,
                                                                          other_rels);
@@ -584,10 +590,17 @@ join_search_one_level(PlannerInfo *root, int level)
                         * Oops, we have a relation that is not joined to any other
                         * relation, either directly or by join-order restrictions.
                         * Cartesian product time.
+                        *
+                        * We consider a cartesian product with each not-already-included
+                        * initial rel, whether it has other join clauses or not.  At
+                        * level 2, if there are two or more clauseless initial rels, we
+                        * will redundantly consider joining them in both directions; but
+                        * such cases aren't common enough to justify adding complexity to
+                        * avoid the duplicated effort.
                         */
                        make_rels_by_clauseless_joins(root,
                                                                                  old_rel,
-                                                                                 other_rels);
+                                                                                 list_head(joinrels[1]));
                }
        }
 
@@ -617,7 +630,7 @@ join_search_one_level(PlannerInfo *root, int level)
                        ListCell   *r2;
 
                        /*
-                        * We can ignore clauseless joins here, *except* when they
+                        * We can ignore relations without join clauses here, unless they
                         * participate in join-order restrictions --- then we might have
                         * to force a bushy join plan.
                         */
@@ -638,8 +651,8 @@ join_search_one_level(PlannerInfo *root, int level)
                                {
                                        /*
                                         * OK, we can build a rel of the right level from this
-                                        * pair of rels.  Do so if there is at least one usable
-                                        * join clause or a relevant join restriction.
+                                        * pair of rels.  Do so if there is at least one relevant
+                                        * join clause or join order restriction.
                                         */
                                        if (have_relevant_joinclause(root, old_rel, new_rel) ||
                                                have_join_order_restriction(root, old_rel, new_rel))
@@ -651,17 +664,24 @@ join_search_one_level(PlannerInfo *root, int level)
                }
        }
 
-       /*
+       /*----------
         * Last-ditch effort: if we failed to find any usable joins so far, force
         * a set of cartesian-product joins to be generated.  This handles the
         * special case where all the available rels have join clauses but we
-        * cannot use any of those clauses yet.  An example is
+        * cannot use any of those clauses yet.  This can only happen when we are
+        * considering a join sub-problem (a sub-joinlist) and all the rels in the
+        * sub-problem have only join clauses with rels outside the sub-problem.
+        * An example is
         *
-        * SELECT * FROM a,b,c WHERE (a.f1 + b.f2 + c.f3) = 0;
+        *              SELECT ... FROM a INNER JOIN b ON TRUE, c, d, ...
+        *              WHERE a.w = c.x and b.y = d.z;
         *
-        * The join clause will be usable at level 3, but at level 2 we have no
-        * choice but to make cartesian joins.  We consider only left-sided and
-        * right-sided cartesian joins in this case (no bushy).
+        * If the "a INNER JOIN b" sub-problem does not get flattened into the
+        * upper level, we must be willing to make a cartesian join of a and b;
+        * but the code above will not have done so, because it thought that both
+        * a and b have joinclauses.  We consider only left-sided and right-sided
+        * cartesian joins in this case (no bushy).
+        *----------
         */
        if (joinrels[level] == NIL)
        {
@@ -672,23 +692,15 @@ join_search_one_level(PlannerInfo *root, int level)
                foreach(r, joinrels[level - 1])
                {
                        RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
-                       ListCell   *other_rels;
-
-                       if (level == 2)
-                               other_rels = lnext(r);  /* only consider remaining initial
-                                                                                * rels */
-                       else
-                               other_rels = list_head(joinrels[1]);    /* consider all initial
-                                                                                                                * rels */
 
                        make_rels_by_clauseless_joins(root,
                                                                                  old_rel,
-                                                                                 other_rels);
+                                                                                 list_head(joinrels[1]));
                }
 
                /*----------
                 * When special joins are involved, there may be no legal way
-                * to make an N-way join for some values of N.  For example consider
+                * to make an N-way join for some values of N.  For example consider
                 *
                 * SELECT ... FROM t1 WHERE
                 *       x IN (SELECT ... FROM t2,t3 WHERE ...) AND
@@ -699,11 +711,14 @@ join_search_one_level(PlannerInfo *root, int level)
                 * to accept failure at level 4 and go on to discover a workable
                 * bushy plan at level 5.
                 *
-                * However, if there are no special joins then join_is_legal() should
-                * never fail, and so the following sanity check is useful.
+                * However, if there are no special joins and no lateral references
+                * then join_is_legal() should never fail, and so the following sanity
+                * check is useful.
                 *----------
                 */
-               if (joinrels[level] == NIL && root->join_info_list == NIL)
+               if (joinrels[level] == NIL &&
+                       root->join_info_list == NIL &&
+                       !root->hasLateralRTEs)
                        elog(ERROR, "failed to build any %d-way joins", level);
        }
 }
@@ -802,11 +817,11 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
        SpecialJoinInfo *match_sjinfo;
        bool            reversed;
        bool            unique_ified;
-       bool            is_valid_inner;
+       bool            must_be_leftjoin;
        ListCell   *l;
 
        /*
-        * Ensure output params are set on failure return.      This is just to
+        * Ensure output params are set on failure return.  This is just to
         * suppress uninitialized-variable warnings from overly anal compilers.
         */
        *sjinfo_p = NULL;
@@ -814,13 +829,13 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 
        /*
         * If we have any special joins, the proposed join might be illegal; and
-        * in any case we have to determine its join type.      Scan the join info
-        * list for conflicts.
+        * in any case we have to determine its join type.  Scan the join info
+        * list for matches and conflicts.
         */
        match_sjinfo = NULL;
        reversed = false;
        unique_ified = false;
-       is_valid_inner = true;
+       must_be_leftjoin = false;
 
        foreach(l, root->join_info_list)
        {
@@ -871,7 +886,8 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
                 * If one input contains min_lefthand and the other contains
                 * min_righthand, then we can perform the SJ at this join.
                 *
-                * Barf if we get matches to more than one SJ (is that possible?)
+                * Reject if we get matches to more than one SJ; that implies we're
+                * considering something that's not really valid.
                 */
                if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
                        bms_is_subset(sjinfo->min_righthand, rel2->relids))
@@ -936,52 +952,171 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
                }
                else
                {
-                       /*----------
-                        * Otherwise, the proposed join overlaps the RHS but isn't
-                        * a valid implementation of this SJ.  It might still be
-                        * a legal join, however.  If both inputs overlap the RHS,
-                        * assume that it's OK.  Since the inputs presumably got past
-                        * this function's checks previously, they can't overlap the
-                        * LHS and their violations of the RHS boundary must represent
-                        * SJs that have been determined to commute with this one.
-                        * We have to allow this to work correctly in cases like
-                        *              (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
-                        * when the c/d join has been determined to commute with the join
-                        * to a, and hence d is not part of min_righthand for the upper
-                        * join.  It should be legal to join b to c/d but this will appear
-                        * as a violation of the upper join's RHS.
-                        * Furthermore, if one input overlaps the RHS and the other does
-                        * not, we should still allow the join if it is a valid
-                        * implementation of some other SJ.  We have to allow this to
-                        * support the associative identity
-                        *              (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
-                        * since joining B directly to C violates the lower SJ's RHS.
-                        * We assume that make_outerjoininfo() set things up correctly
-                        * so that we'll only match to some SJ if the join is valid.
-                        * Set flag here to check at bottom of loop.
-                        *----------
+                       /*
+                        * Otherwise, the proposed join overlaps the RHS but isn't a valid
+                        * implementation of this SJ.  But don't panic quite yet: the RHS
+                        * violation might have occurred previously, in one or both input
+                        * relations, in which case we must have previously decided that
+                        * it was OK to commute some other SJ with this one.  If we need
+                        * to perform this join to finish building up the RHS, rejecting
+                        * it could lead to not finding any plan at all.  (This can occur
+                        * because of the heuristics elsewhere in this file that postpone
+                        * clauseless joins: we might not consider doing a clauseless join
+                        * within the RHS until after we've performed other, validly
+                        * commutable SJs with one or both sides of the clauseless join.)
+                        * This consideration boils down to the rule that if both inputs
+                        * overlap the RHS, we can allow the join --- they are either
+                        * fully within the RHS, or represent previously-allowed joins to
+                        * rels outside it.
                         */
-                       if (sjinfo->jointype != JOIN_SEMI &&
-                               bms_overlap(rel1->relids, sjinfo->min_righthand) &&
+                       if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
                                bms_overlap(rel2->relids, sjinfo->min_righthand))
-                       {
-                               /* seems OK */
-                               Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
-                       }
-                       else
-                               is_valid_inner = false;
+                               continue;               /* assume valid previous violation of RHS */
+
+                       /*
+                        * The proposed join could still be legal, but only if we're
+                        * allowed to associate it into the RHS of this SJ.  That means
+                        * this SJ must be a LEFT join (not SEMI or ANTI, and certainly
+                        * not FULL) and the proposed join must not overlap the LHS.
+                        */
+                       if (sjinfo->jointype != JOIN_LEFT ||
+                               bms_overlap(joinrelids, sjinfo->min_lefthand))
+                               return false;   /* invalid join path */
+
+                       /*
+                        * To be valid, the proposed join must be a LEFT join; otherwise
+                        * it can't associate into this SJ's RHS.  But we may not yet have
+                        * found the SpecialJoinInfo matching the proposed join, so we
+                        * can't test that yet.  Remember the requirement for later.
+                        */
+                       must_be_leftjoin = true;
                }
        }
 
        /*
-        * Fail if violated some SJ's RHS and didn't match to another SJ. However,
-        * "matching" to a semijoin we are implementing by unique-ification
-        * doesn't count (think: it's really an inner join).
+        * Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the
+        * proposed join can't associate into an SJ's RHS.
+        *
+        * Also, fail if the proposed join's predicate isn't strict; we're
+        * essentially checking to see if we can apply outer-join identity 3, and
+        * that's a requirement.  (This check may be redundant with checks in
+        * make_outerjoininfo, but I'm not quite sure, and it's cheap to test.)
         */
-       if (!is_valid_inner &&
-               (match_sjinfo == NULL || unique_ified))
+       if (must_be_leftjoin &&
+               (match_sjinfo == NULL ||
+                match_sjinfo->jointype != JOIN_LEFT ||
+                !match_sjinfo->lhs_strict))
                return false;                   /* invalid join path */
 
+       /*
+        * We also have to check for constraints imposed by LATERAL references.
+        */
+       if (root->hasLateralRTEs)
+       {
+               bool            lateral_fwd;
+               bool            lateral_rev;
+               Relids          join_lateral_rels;
+
+               /*
+                * The proposed rels could each contain lateral references to the
+                * other, in which case the join is impossible.  If there are lateral
+                * references in just one direction, then the join has to be done with
+                * a nestloop with the lateral referencer on the inside.  If the join
+                * matches an SJ that cannot be implemented by such a nestloop, the
+                * join is impossible.
+                *
+                * Also, if the lateral reference is only indirect, we should reject
+                * the join; whatever rel(s) the reference chain goes through must be
+                * joined to first.
+                *
+                * Another case that might keep us from building a valid plan is the
+                * implementation restriction described by have_dangerous_phv().
+                */
+               lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
+               lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
+               if (lateral_fwd && lateral_rev)
+                       return false;           /* have lateral refs in both directions */
+               if (lateral_fwd)
+               {
+                       /* has to be implemented as nestloop with rel1 on left */
+                       if (match_sjinfo &&
+                               (reversed ||
+                                unique_ified ||
+                                match_sjinfo->jointype == JOIN_FULL))
+                               return false;   /* not implementable as nestloop */
+                       /* check there is a direct reference from rel2 to rel1 */
+                       if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids))
+                               return false;   /* only indirect refs, so reject */
+                       /* check we won't have a dangerous PHV */
+                       if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
+                               return false;   /* might be unable to handle required PHV */
+               }
+               else if (lateral_rev)
+               {
+                       /* has to be implemented as nestloop with rel2 on left */
+                       if (match_sjinfo &&
+                               (!reversed ||
+                                unique_ified ||
+                                match_sjinfo->jointype == JOIN_FULL))
+                               return false;   /* not implementable as nestloop */
+                       /* check there is a direct reference from rel1 to rel2 */
+                       if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids))
+                               return false;   /* only indirect refs, so reject */
+                       /* check we won't have a dangerous PHV */
+                       if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
+                               return false;   /* might be unable to handle required PHV */
+               }
+
+               /*
+                * LATERAL references could also cause problems later on if we accept
+                * this join: if the join's minimum parameterization includes any rels
+                * that would have to be on the inside of an outer join with this join
+                * rel, then it's never going to be possible to build the complete
+                * query using this join.  We should reject this join not only because
+                * it'll save work, but because if we don't, the clauseless-join
+                * heuristics might think that legality of this join means that some
+                * other join rel need not be formed, and that could lead to failure
+                * to find any plan at all.  We have to consider not only rels that
+                * are directly on the inner side of an OJ with the joinrel, but also
+                * ones that are indirectly so, so search to find all such rels.
+                */
+               join_lateral_rels = min_join_parameterization(root, joinrelids,
+                                                                                                         rel1, rel2);
+               if (join_lateral_rels)
+               {
+                       Relids          join_plus_rhs = bms_copy(joinrelids);
+                       bool            more;
+
+                       do
+                       {
+                               more = false;
+                               foreach(l, root->join_info_list)
+                               {
+                                       SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
+
+                                       if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) &&
+                                               !bms_is_subset(sjinfo->min_righthand, join_plus_rhs))
+                                       {
+                                               join_plus_rhs = bms_add_members(join_plus_rhs,
+                                                                                                         sjinfo->min_righthand);
+                                               more = true;
+                                       }
+                                       /* full joins constrain both sides symmetrically */
+                                       if (sjinfo->jointype == JOIN_FULL &&
+                                               bms_overlap(sjinfo->min_righthand, join_plus_rhs) &&
+                                               !bms_is_subset(sjinfo->min_lefthand, join_plus_rhs))
+                                       {
+                                               join_plus_rhs = bms_add_members(join_plus_rhs,
+                                                                                                               sjinfo->min_lefthand);
+                                               more = true;
+                                       }
+                               }
+                       } while (more);
+                       if (bms_overlap(join_plus_rhs, join_lateral_rels))
+                               return false;   /* will not be able to join to some RHS rel */
+               }
+       }
+
        /* Otherwise, it's a valid join */
        *sjinfo_p = match_sjinfo;
        *reversed_p = reversed;
@@ -990,12 +1125,13 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 
 /*
  * has_join_restriction
- *             Detect whether the specified relation has join-order restrictions
- *             due to being inside an outer join or an IN (sub-SELECT).
+ *             Detect whether the specified relation has join-order restrictions,
+ *             due to being inside an outer join or an IN (sub-SELECT),
+ *             or participating in any LATERAL references or multi-rel PHVs.
  *
  * Essentially, this tests whether have_join_order_restriction() could
  * succeed with this rel and some other one.  It's OK if we sometimes
- * say "true" incorrectly.     (Therefore, we don't bother with the relatively
+ * say "true" incorrectly.  (Therefore, we don't bother with the relatively
  * expensive has_legal_joinclause test.)
  */
 static bool
@@ -1003,6 +1139,18 @@ has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
 {
        ListCell   *l;
 
+       if (rel->lateral_relids != NULL || rel->lateral_referencers != NULL)
+               return true;
+
+       foreach(l, root->placeholder_list)
+       {
+               PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
+
+               if (bms_is_subset(rel->relids, phinfo->ph_eval_at) &&
+                       !bms_equal(rel->relids, phinfo->ph_eval_at))
+                       return true;
+       }
+
        foreach(l, root->join_info_list)
        {
                SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
@@ -1027,14 +1175,11 @@ has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
 
 /*
  * is_dummy_rel --- has relation been proven empty?
- *
- * If so, it will have a single path that is dummy.
  */
 static bool
 is_dummy_rel(RelOptInfo *rel)
 {
-       return (rel->cheapest_total_path != NULL &&
-                       IS_DUMMY_PATH(rel->cheapest_total_path));
+       return IS_DUMMY_REL(rel);
 }
 
 /*
@@ -1045,7 +1190,7 @@ is_dummy_rel(RelOptInfo *rel)
  * dummy.
  *
  * Also, when called during GEQO join planning, we are in a short-lived
- * memory context.     We must make sure that the dummy path attached to a
+ * memory context.  We must make sure that the dummy path attached to a
  * baserel survives the GEQO cycle, else the baserel is trashed for future
  * GEQO cycles.  On the other hand, when we are marking a joinrel during GEQO,
  * we don't want the dummy path to clutter the main planning context.  Upshot
@@ -1071,27 +1216,30 @@ mark_dummy_rel(RelOptInfo *rel)
        rel->pathlist = NIL;
 
        /* Set up the dummy path */
-       add_path(rel, (Path *) create_append_path(rel, NIL));
+       add_path(rel, (Path *) create_append_path(rel, NIL, NULL));
 
-       /* Set or update cheapest_total_path */
+       /* Set or update cheapest_total_path and related fields */
        set_cheapest(rel);
 
        MemoryContextSwitchTo(oldcontext);
 }
 
 /*
- * restriction_is_constant_false --- is a restrictlist just FALSE?
+ * restriction_is_constant_false --- is a restrictlist just false?
  *
- * In cases where a qual is provably constant FALSE, eval_const_expressions
+ * In cases where a qual is provably constant false, eval_const_expressions
  * will generally have thrown away anything that's ANDed with it.  In outer
  * join situations this will leave us computing cartesian products only to
  * decide there's no match for an outer row, which is pretty stupid.  So,
  * we need to detect the case.
  *
- * If only_pushed_down is TRUE, then consider only pushed-down quals.
+ * If only_pushed_down is true, then consider only quals that are pushed-down
+ * from the point of view of the joinrel.
  */
 static bool
-restriction_is_constant_false(List *restrictlist, bool only_pushed_down)
+restriction_is_constant_false(List *restrictlist,
+                                                         RelOptInfo *joinrel,
+                                                         bool only_pushed_down)
 {
        ListCell   *lc;
 
@@ -1106,7 +1254,7 @@ restriction_is_constant_false(List *restrictlist, bool only_pushed_down)
                RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
 
                Assert(IsA(rinfo, RestrictInfo));
-               if (only_pushed_down && !rinfo->is_pushed_down)
+               if (only_pushed_down && !RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
                        continue;
 
                if (rinfo->clause && IsA(rinfo->clause, Const))