OSDN Git Service

Support PostgreSQL 14
[pghintplan/pg_hint_plan.git] / make_join_rel.c
index 88c9abf..638e500 100644 (file)
 /*-------------------------------------------------------------------------
  *
  * make_join_rel.c
- *       Routines copied from PostgreSQL core distribution.
+ *       Routines copied from PostgreSQL core distribution with some
+ *       modifications.
  *
- * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * src/backend/optimizer/path/joinrels.c
+ *
+ * This file contains the following functions from corresponding files.
+ *
+ *     static functions:
+ *     make_join_rel()
+ *     populate_joinrel_with_paths()
+ *
+ * Portions Copyright (c) 2013-2020, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *-------------------------------------------------------------------------
  */
 
-static bool is_dummy_rel(RelOptInfo *rel);
-static void mark_dummy_rel(RelOptInfo *rel);
-static bool restriction_is_constant_false(List *restrictlist,
-                                                         bool only_pushed_down);
-
 /*
- * join_is_legal
- *        Determine whether a proposed join is legal given the query's
- *        join order constraints; and if it is, determine the join type.
- *
- * Caller must supply not only the two rels, but the union of their relids.
- * (We could simplify the API by computing joinrelids locally, but this
- * would be redundant work in the normal path through make_join_rel.)
- *
- * On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
- * else it's set to point to the associated SpecialJoinInfo node.  Also,
- * *reversed_p is set TRUE if the given relations need to be swapped to
- * match the SpecialJoinInfo node.
+ * adjust_rows: tweak estimated row numbers according to the hint.
  */
-static bool
-join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
-                         Relids joinrelids,
-                         SpecialJoinInfo **sjinfo_p, bool *reversed_p)
+static double
+adjust_rows(double rows, RowsHint *hint)
 {
-       SpecialJoinInfo *match_sjinfo;
-       bool            reversed;
-       bool            unique_ified;
-       bool            is_valid_inner;
-       ListCell   *l;
-
-       /*
-        * Ensure output params are set on failure return.      This is just to
-        * suppress uninitialized-variable warnings from overly anal compilers.
-        */
-       *sjinfo_p = NULL;
-       *reversed_p = false;
-
-       /*
-        * If we have any special joins, the proposed join might be illegal; and
-        * in any case we have to determine its join type.      Scan the join info
-        * list for conflicts.
-        */
-       match_sjinfo = NULL;
-       reversed = false;
-       unique_ified = false;
-       is_valid_inner = true;
-
-       foreach(l, root->join_info_list)
-       {
-               SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
-
-               /*
-                * This special join is not relevant unless its RHS overlaps the
-                * proposed join.  (Check this first as a fast path for dismissing
-                * most irrelevant SJs quickly.)
-                */
-               if (!bms_overlap(sjinfo->min_righthand, joinrelids))
-                       continue;
-
-               /*
-                * Also, not relevant if proposed join is fully contained within RHS
-                * (ie, we're still building up the RHS).
-                */
-               if (bms_is_subset(joinrelids, sjinfo->min_righthand))
-                       continue;
-
-               /*
-                * Also, not relevant if SJ is already done within either input.
-                */
-               if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
-                       bms_is_subset(sjinfo->min_righthand, rel1->relids))
-                       continue;
-               if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
-                       bms_is_subset(sjinfo->min_righthand, rel2->relids))
-                       continue;
-
-               /*
-                * If it's a semijoin and we already joined the RHS to any other rels
-                * within either input, then we must have unique-ified the RHS at that
-                * point (see below).  Therefore the semijoin is no longer relevant in
-                * this join path.
-                */
-               if (sjinfo->jointype == JOIN_SEMI)
-               {
-                       if (bms_is_subset(sjinfo->syn_righthand, rel1->relids) &&
-                               !bms_equal(sjinfo->syn_righthand, rel1->relids))
-                               continue;
-                       if (bms_is_subset(sjinfo->syn_righthand, rel2->relids) &&
-                               !bms_equal(sjinfo->syn_righthand, rel2->relids))
-                               continue;
-               }
-
-               /*
-                * If one input contains min_lefthand and the other contains
-                * min_righthand, then we can perform the SJ at this join.
-                *
-                * Barf if we get matches to more than one SJ (is that possible?)
-                */
-               if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
-                       bms_is_subset(sjinfo->min_righthand, rel2->relids))
-               {
-                       if (match_sjinfo)
-                               return false;   /* invalid join path */
-                       match_sjinfo = sjinfo;
-                       reversed = false;
-               }
-               else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
-                                bms_is_subset(sjinfo->min_righthand, rel1->relids))
-               {
-                       if (match_sjinfo)
-                               return false;   /* invalid join path */
-                       match_sjinfo = sjinfo;
-                       reversed = true;
-               }
-               else if (sjinfo->jointype == JOIN_SEMI &&
-                                bms_equal(sjinfo->syn_righthand, rel2->relids) &&
-                                create_unique_path(root, rel2, rel2->cheapest_total_path,
-                                                                       sjinfo) != NULL)
-               {
-                       /*----------
-                        * For a semijoin, we can join the RHS to anything else by
-                        * unique-ifying the RHS (if the RHS can be unique-ified).
-                        * We will only get here if we have the full RHS but less
-                        * than min_lefthand on the LHS.
-                        *
-                        * The reason to consider such a join path is exemplified by
-                        *      SELECT ... FROM a,b WHERE (a.x,b.y) IN (SELECT c1,c2 FROM c)
-                        * If we insist on doing this as a semijoin we will first have
-                        * to form the cartesian product of A*B.  But if we unique-ify
-                        * C then the semijoin becomes a plain innerjoin and we can join
-                        * in any order, eg C to A and then to B.  When C is much smaller
-                        * than A and B this can be a huge win.  So we allow C to be
-                        * joined to just A or just B here, and then make_join_rel has
-                        * to handle the case properly.
-                        *
-                        * Note that actually we'll allow unique-ified C to be joined to
-                        * some other relation D here, too.  That is legal, if usually not
-                        * very sane, and this routine is only concerned with legality not
-                        * with whether the join is good strategy.
-                        *----------
-                        */
-                       if (match_sjinfo)
-                               return false;   /* invalid join path */
-                       match_sjinfo = sjinfo;
-                       reversed = false;
-                       unique_ified = true;
-               }
-               else if (sjinfo->jointype == JOIN_SEMI &&
-                                bms_equal(sjinfo->syn_righthand, rel1->relids) &&
-                                create_unique_path(root, rel1, rel1->cheapest_total_path,
-                                                                       sjinfo) != NULL)
-               {
-                       /* Reversed semijoin case */
-                       if (match_sjinfo)
-                               return false;   /* invalid join path */
-                       match_sjinfo = sjinfo;
-                       reversed = true;
-                       unique_ified = true;
-               }
-               else
-               {
-                       /*----------
-                        * Otherwise, the proposed join overlaps the RHS but isn't
-                        * a valid implementation of this SJ.  It might still be
-                        * a legal join, however.  If both inputs overlap the RHS,
-                        * assume that it's OK.  Since the inputs presumably got past
-                        * this function's checks previously, they can't overlap the
-                        * LHS and their violations of the RHS boundary must represent
-                        * SJs that have been determined to commute with this one.
-                        * We have to allow this to work correctly in cases like
-                        *              (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
-                        * when the c/d join has been determined to commute with the join
-                        * to a, and hence d is not part of min_righthand for the upper
-                        * join.  It should be legal to join b to c/d but this will appear
-                        * as a violation of the upper join's RHS.
-                        * Furthermore, if one input overlaps the RHS and the other does
-                        * not, we should still allow the join if it is a valid
-                        * implementation of some other SJ.  We have to allow this to
-                        * support the associative identity
-                        *              (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
-                        * since joining B directly to C violates the lower SJ's RHS.
-                        * We assume that make_outerjoininfo() set things up correctly
-                        * so that we'll only match to some SJ if the join is valid.
-                        * Set flag here to check at bottom of loop.
-                        *----------
-                        */
-                       if (sjinfo->jointype != JOIN_SEMI &&
-                               bms_overlap(rel1->relids, sjinfo->min_righthand) &&
-                               bms_overlap(rel2->relids, sjinfo->min_righthand))
-                       {
-                               /* seems OK */
-                               Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
-                       }
-                       else
-                               is_valid_inner = false;
-               }
-       }
-
-       /*
-        * Fail if violated some SJ's RHS and didn't match to another SJ. However,
-        * "matching" to a semijoin we are implementing by unique-ification
-        * doesn't count (think: it's really an inner join).
-        */
-       if (!is_valid_inner &&
-               (match_sjinfo == NULL || unique_ified))
-               return false;                   /* invalid join path */
-
-       /* Otherwise, it's a valid join */
-       *sjinfo_p = match_sjinfo;
-       *reversed_p = reversed;
-       return true;
+       double          result = 0.0;   /* keep compiler quiet */
+
+       if (hint->value_type == RVT_ABSOLUTE)
+               result = hint->rows;
+       else if (hint->value_type == RVT_ADD)
+               result = rows + hint->rows;
+       else if (hint->value_type == RVT_SUB)
+               result =  rows - hint->rows;
+       else if (hint->value_type == RVT_MULTI)
+               result = rows * hint->rows;
+       else
+               Assert(false);  /* unrecognized rows value type */
+
+       hint->base.state = HINT_STATE_USED;
+       if (result < 1.0)
+               ereport(WARNING,
+                               (errmsg("Force estimate to be at least one row, to avoid possible divide-by-zero when interpolating costs : %s",
+                                       hint->base.hint_str)));
+       result = clamp_row_est(result);
+       elog(DEBUG1, "adjusted rows %d to %d", (int) rows, (int) result);
+
+       return result;
 }
 
 
@@ -271,7 +98,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 
        /*
         * If it's a plain inner join, then we won't have found anything in
-        * join_info_list.      Make up a SpecialJoinInfo so that selectivity
+        * join_info_list.  Make up a SpecialJoinInfo so that selectivity
         * estimation functions will know what's being joined.
         */
        if (sjinfo == NULL)
@@ -286,7 +113,10 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                /* we don't bother trying to make the remaining fields valid */
                sjinfo->lhs_strict = false;
                sjinfo->delay_upper_joins = false;
-               sjinfo->join_quals = NIL;
+               sjinfo->semi_can_btree = false;
+               sjinfo->semi_can_hash = false;
+               sjinfo->semi_operators = NIL;
+               sjinfo->semi_rhs_exprs = NIL;
        }
 
        /*
@@ -296,6 +126,84 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
        joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
                                                         &restrictlist);
 
+       /* !!! START: HERE IS THE PART WHICH IS ADDED FOR PG_HINT_PLAN !!! */
+       {
+               RowsHint   *rows_hint = NULL;
+               int                     i;
+               RowsHint   *justforme = NULL;
+               RowsHint   *domultiply = NULL;
+
+               /* Search for applicable rows hint for this join node */
+               for (i = 0; i < current_hint_state->num_hints[HINT_TYPE_ROWS]; i++)
+               {
+                       rows_hint = current_hint_state->rows_hints[i];
+
+                       /*
+                        * Skip this rows_hint if it is invalid from the first or it
+                        * doesn't target any join rels.
+                        */
+                       if (!rows_hint->joinrelids ||
+                               rows_hint->base.state == HINT_STATE_ERROR)
+                               continue;
+
+                       if (bms_equal(joinrelids, rows_hint->joinrelids))
+                       {
+                               /*
+                                * This joinrel is just the target of this rows_hint, so tweak
+                                * rows estimation according to the hint.
+                                */
+                               justforme = rows_hint;
+                       }
+                       else if (!(bms_is_subset(rows_hint->joinrelids, rel1->relids) ||
+                                          bms_is_subset(rows_hint->joinrelids, rel2->relids)) &&
+                                        bms_is_subset(rows_hint->joinrelids, joinrelids) &&
+                                        rows_hint->value_type == RVT_MULTI)
+                       {
+                               /*
+                                * If the rows_hint's target relids is not a subset of both of
+                                * component rels and is a subset of this joinrel, ths hint's
+                                * targets spread over both component rels. This menas that
+                                * this hint has been never applied so far and this joinrel is
+                                * the first (and only) chance to fire in current join tree.
+                                * Only the multiplication hint has the cumulative nature so we
+                                * apply only RVT_MULTI in this way.
+                                */
+                               domultiply = rows_hint;
+                       }
+               }
+
+               if (justforme)
+               {
+                       /*
+                        * If a hint just for me is found, no other adjust method is
+                        * useles, but this cannot be more than twice becuase this joinrel
+                        * is already adjusted by this hint.
+                        */
+                       if (justforme->base.state == HINT_STATE_NOTUSED)
+                               joinrel->rows = adjust_rows(joinrel->rows, justforme);
+               }
+               else
+               {
+                       if (domultiply)
+                       {
+                               /*
+                                * If we have multiple routes up to this joinrel which are not
+                                * applicable this hint, this multiply hint will applied more
+                                * than twice. But there's no means to know of that,
+                                * re-estimate the row number of this joinrel always just
+                                * before applying the hint. This is a bit different from
+                                * normal planner behavior but it doesn't harm so much.
+                                */
+                               set_joinrel_size_estimates(root, joinrel, rel1, rel2, sjinfo,
+                                                                                  restrictlist);
+                               
+                               joinrel->rows = adjust_rows(joinrel->rows, domultiply);
+                       }
+                       
+               }
+       }
+       /* !!! END: HERE IS THE PART WHICH IS ADDED FOR PG_HINT_PLAN !!! */
+
        /*
         * If we've already proven this join is empty, we needn't consider any
         * more paths for it.
@@ -306,6 +214,28 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                return joinrel;
        }
 
+       /* Add paths to the join relation. */
+       populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo,
+                                                               restrictlist);
+
+       bms_free(joinrelids);
+
+       return joinrel;
+}
+
+
+/*
+ * populate_joinrel_with_paths
+ *       Add paths to the given joinrel for given pair of joining relations. The
+ *       SpecialJoinInfo provides details about the join and the restrictlist
+ *       contains the join clauses and the other clauses applicable for given pair
+ *       of the joining relations.
+ */
+static void
+populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
+                                                       RelOptInfo *rel2, RelOptInfo *joinrel,
+                                                       SpecialJoinInfo *sjinfo, List *restrictlist)
+{
        /*
         * Consider paths using each rel as both outer and inner.  Depending on
         * the join type, a provably empty outer or inner rel might mean the join
@@ -328,7 +258,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
        {
                case JOIN_INNER:
                        if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
-                               restriction_is_constant_false(restrictlist, false))
+                               restriction_is_constant_false(restrictlist, joinrel, false))
                        {
                                mark_dummy_rel(joinrel);
                                break;
@@ -342,12 +272,12 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                        break;
                case JOIN_LEFT:
                        if (is_dummy_rel(rel1) ||
-                               restriction_is_constant_false(restrictlist, true))
+                               restriction_is_constant_false(restrictlist, joinrel, true))
                        {
                                mark_dummy_rel(joinrel);
                                break;
                        }
-                       if (restriction_is_constant_false(restrictlist, false) &&
+                       if (restriction_is_constant_false(restrictlist, joinrel, false) &&
                                bms_is_subset(rel2->relids, sjinfo->syn_righthand))
                                mark_dummy_rel(rel2);
                        add_paths_to_joinrel(root, joinrel, rel1, rel2,
@@ -359,7 +289,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                        break;
                case JOIN_FULL:
                        if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) ||
-                               restriction_is_constant_false(restrictlist, true))
+                               restriction_is_constant_false(restrictlist, joinrel, true))
                        {
                                mark_dummy_rel(joinrel);
                                break;
@@ -395,7 +325,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                                bms_is_subset(sjinfo->min_righthand, rel2->relids))
                        {
                                if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
-                                       restriction_is_constant_false(restrictlist, false))
+                                       restriction_is_constant_false(restrictlist, joinrel, false))
                                {
                                        mark_dummy_rel(joinrel);
                                        break;
@@ -418,7 +348,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                                                                   sjinfo) != NULL)
                        {
                                if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
-                                       restriction_is_constant_false(restrictlist, false))
+                                       restriction_is_constant_false(restrictlist, joinrel, false))
                                {
                                        mark_dummy_rel(joinrel);
                                        break;
@@ -433,12 +363,12 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                        break;
                case JOIN_ANTI:
                        if (is_dummy_rel(rel1) ||
-                               restriction_is_constant_false(restrictlist, true))
+                               restriction_is_constant_false(restrictlist, joinrel, true))
                        {
                                mark_dummy_rel(joinrel);
                                break;
                        }
-                       if (restriction_is_constant_false(restrictlist, false) &&
+                       if (restriction_is_constant_false(restrictlist, joinrel, false) &&
                                bms_is_subset(rel2->relids, sjinfo->syn_righthand))
                                mark_dummy_rel(rel2);
                        add_paths_to_joinrel(root, joinrel, rel1, rel2,
@@ -451,107 +381,6 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                        break;
        }
 
-       bms_free(joinrelids);
-
-       return joinrel;
-}
-
-/*
- * is_dummy_rel --- has relation been proven empty?
- *
- * If so, it will have a single path that is dummy.
- */
-static bool
-is_dummy_rel(RelOptInfo *rel)
-{
-       return (rel->cheapest_total_path != NULL &&
-                       IS_DUMMY_PATH(rel->cheapest_total_path));
-}
-
-
-/*
- * Mark a relation as proven empty.
- *
- * During GEQO planning, this can get invoked more than once on the same
- * baserel struct, so it's worth checking to see if the rel is already marked
- * dummy.
- *
- * Also, when called during GEQO join planning, we are in a short-lived
- * memory context.     We must make sure that the dummy path attached to a
- * baserel survives the GEQO cycle, else the baserel is trashed for future
- * GEQO cycles.  On the other hand, when we are marking a joinrel during GEQO,
- * we don't want the dummy path to clutter the main planning context.  Upshot
- * is that the best solution is to explicitly make the dummy path in the same
- * context the given RelOptInfo is in.
- */
-static void
-mark_dummy_rel(RelOptInfo *rel)
-{
-       MemoryContext oldcontext;
-
-       /* Already marked? */
-       if (is_dummy_rel(rel))
-               return;
-
-       /* No, so choose correct context to make the dummy path in */
-       oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
-
-       /* Set dummy size estimate */
-       rel->rows = 0;
-
-       /* Evict any previously chosen paths */
-       rel->pathlist = NIL;
-
-       /* Set up the dummy path */
-       add_path(rel, (Path *) create_append_path(rel, NIL));
-
-       /* Set or update cheapest_total_path */
-       set_cheapest(rel);
-
-       MemoryContextSwitchTo(oldcontext);
-}
-
-
-/*
- * restriction_is_constant_false --- is a restrictlist just FALSE?
- *
- * In cases where a qual is provably constant FALSE, eval_const_expressions
- * will generally have thrown away anything that's ANDed with it.  In outer
- * join situations this will leave us computing cartesian products only to
- * decide there's no match for an outer row, which is pretty stupid.  So,
- * we need to detect the case.
- *
- * If only_pushed_down is TRUE, then consider only pushed-down quals.
- */
-static bool
-restriction_is_constant_false(List *restrictlist, bool only_pushed_down)
-{
-       ListCell   *lc;
-
-       /*
-        * Despite the above comment, the restriction list we see here might
-        * possibly have other members besides the FALSE constant, since other
-        * quals could get "pushed down" to the outer join level.  So we check
-        * each member of the list.
-        */
-       foreach(lc, restrictlist)
-       {
-               RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
-
-               Assert(IsA(rinfo, RestrictInfo));
-               if (only_pushed_down && !rinfo->is_pushed_down)
-                       continue;
-
-               if (rinfo->clause && IsA(rinfo->clause, Const))
-               {
-                       Const      *con = (Const *) rinfo->clause;
-
-                       /* constant NULL is as good as constant FALSE for our purposes */
-                       if (con->constisnull)
-                               return true;
-                       if (!DatumGetBool(con->constvalue))
-                               return true;
-               }
-       }
-       return false;
+       /* Apply partitionwise join technique, if possible. */
+       try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist);
 }