OSDN Git Service

Extend code that deduces implied equality clauses to detect whether a
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 18 Oct 2001 16:11:42 +0000 (16:11 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 18 Oct 2001 16:11:42 +0000 (16:11 +0000)
clause being added to a particular restriction-clause list is redundant
with those already in the list.  This avoids useless work at runtime,
and (perhaps more importantly) keeps the selectivity estimation routines
from generating too-small estimates of numbers of output rows.
Also some minor improvements in OPTIMIZER_DEBUG displays.

src/backend/nodes/print.c
src/backend/optimizer/README
src/backend/optimizer/path/allpaths.c
src/backend/optimizer/path/joinrels.c
src/backend/optimizer/path/pathkeys.c
src/backend/optimizer/plan/initsplan.c
src/backend/optimizer/plan/planner.c
src/backend/optimizer/util/relnode.c
src/include/optimizer/paths.h

index 932f55a..9834769 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/print.c,v 1.47 2001/03/22 03:59:32 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/print.c,v 1.48 2001/10/18 16:11:41 tgl Exp $
  *
  * HISTORY
  *       AUTHOR                        DATE                    MAJOR EVENT
 #include "postgres.h"
 
 #include "access/printtup.h"
+#include "catalog/pg_type.h"
 #include "nodes/print.h"
 #include "optimizer/clauses.h"
 #include "parser/parsetree.h"
 #include "utils/lsyscache.h"
+#include "utils/syscache.h"
 
 static char *plannode_type(Plan *p);
 
@@ -188,6 +190,36 @@ print_expr(Node *expr, List *rtable)
                }
                printf("%s.%s", relname, attname);
        }
+       else if (IsA(expr, Const))
+       {
+               Const      *c = (Const *) expr;
+               HeapTuple       typeTup;
+               Oid                     typoutput;
+               Oid                     typelem;
+               char       *outputstr;
+
+               if (c->constisnull)
+               {
+                       printf("NULL");
+                       return;
+               }
+
+               typeTup = SearchSysCache(TYPEOID,
+                                                                ObjectIdGetDatum(c->consttype),
+                                                                0, 0, 0);
+               if (!HeapTupleIsValid(typeTup))
+                       elog(ERROR, "Cache lookup for type %u failed", c->consttype);
+               typoutput = ((Form_pg_type) GETSTRUCT(typeTup))->typoutput;
+               typelem = ((Form_pg_type) GETSTRUCT(typeTup))->typelem;
+               ReleaseSysCache(typeTup);
+
+               outputstr = DatumGetCString(OidFunctionCall3(typoutput,
+                                                                                                        c->constvalue,
+                                                                                                        ObjectIdGetDatum(typelem),
+                                                                                                        Int32GetDatum(-1)));
+               printf("%s", outputstr);
+               pfree(outputstr);
+       }
        else if (IsA(expr, Expr))
        {
                Expr       *e = (Expr *) expr;
@@ -232,7 +264,7 @@ print_pathkeys(List *pathkeys, List *rtable)
                        if (lnext(k))
                                printf(", ");
                }
-               printf(") ");
+               printf(")");
                if (lnext(i))
                        printf(", ");
        }
index 520ab1b..472efbc 100644 (file)
@@ -461,6 +461,23 @@ code than it would otherwise have, and reduces the number of tuples
 processed in join stages, so it's a win to make these deductions even
 if we weren't forced to.
 
+When we generate implied equality constraints, we may find ourselves
+adding redundant clauses to specific relations.  For example, consider
+       SELECT * FROM t1, t2, t3 WHERE t1.a = t2.b AND t2.b = t3.c;
+We will generate the implied clause t1.a = t3.c and add it to the tree.
+This is good since it allows us to consider joining t1 and t3 directly,
+which we otherwise wouldn't do.  But when we reach the stage of joining
+all three relations, we will have redundant join clauses --- eg, if we
+join t1 and t2 first, then the path that joins (t1 t2) to t3 will have
+both t2.b = t3.c and t1.a = t3.c as restriction clauses.  This is bad;
+not only is evaluation of the extra clause useless work at runtime,
+but the selectivity estimator routines will underestimate the number
+of tuples produced since they won't know that the two clauses are
+perfectly redundant.  We fix this by detecting and removing redundant
+clauses as the restriction clause list is built for each join.  (We
+can't do it sooner, since which clauses are redundant will vary depending
+on the join order.)
+
 Yet another implication of all this is that mergejoinable operators
 must form closed equivalence sets.  For example, if "int2 = int4"
 and "int4 = int8" are both marked mergejoinable, then there had better
index 8a6e9b9..ebc303d 100644 (file)
@@ -8,13 +8,16 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.78 2001/07/31 17:56:30 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.79 2001/10/18 16:11:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include "postgres.h"
 
+#ifdef OPTIMIZER_DEBUG
+#include "nodes/print.h"
+#endif
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/geqo.h"
@@ -42,11 +45,6 @@ static void set_subquery_pathlist(Query *root, RelOptInfo *rel,
 static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed,
                                          List *initial_rels);
 
-#ifdef OPTIMIZER_DEBUG
-static void debug_print_rel(Query *root, RelOptInfo *rel);
-
-#endif
-
 
 /*
  * make_one_rel
@@ -116,6 +114,10 @@ set_base_rel_pathlists(Query *root)
                        /* Plain relation */
                        set_plain_rel_pathlist(root, rel, rte);
                }
+
+#ifdef OPTIMIZER_DEBUG
+               debug_print_rel(root, rel);
+#endif
        }
 }
 
@@ -520,10 +522,22 @@ make_one_rel_by_joins(Query *root, int levels_needed, List *initial_rels)
 #ifdef OPTIMIZER_DEBUG
 
 static void
-print_joinclauses(Query *root, List *clauses)
+print_relids(Relids relids)
+{
+       List       *l;
+
+       foreach(l, relids)
+       {
+               printf("%d", lfirsti(l));
+               if (lnext(l))
+                       printf(" ");
+       }
+}
+
+static void
+print_restrictclauses(Query *root, List *clauses)
 {
        List       *l;
-       extern void print_expr(Node *expr, List *rtable);       /* in print.c */
 
        foreach(l, clauses)
        {
@@ -531,21 +545,17 @@ print_joinclauses(Query *root, List *clauses)
 
                print_expr((Node *) c->clause, root->rtable);
                if (lnext(l))
-                       printf(" ");
+                       printf(", ");
        }
 }
 
 static void
 print_path(Query *root, Path *path, int indent)
 {
-       char       *ptype = NULL;
-       JoinPath   *jp;
-       bool            join = false;
+       const char   *ptype;
+       bool            join;
        int                     i;
 
-       for (i = 0; i < indent; i++)
-               printf("\t");
-
        switch (nodeTag(path))
        {
                case T_Path:
@@ -556,6 +566,10 @@ print_path(Query *root, Path *path, int indent)
                        ptype = "IdxScan";
                        join = false;
                        break;
+               case T_TidPath:
+                       ptype = "TidScan";
+                       join = false;
+                       break;
                case T_NestPath:
                        ptype = "Nestloop";
                        join = true;
@@ -569,82 +583,82 @@ print_path(Query *root, Path *path, int indent)
                        join = true;
                        break;
                default:
+                       ptype = "???Path";
+                       join = false;
                        break;
        }
+
+       for (i = 0; i < indent; i++)
+               printf("\t");
+       printf("%s(", ptype);
+       print_relids(path->parent->relids);
+       printf(") rows=%.0f cost=%.2f..%.2f\n",
+                  path->parent->rows, path->startup_cost, path->total_cost);
+
+       if (path->pathkeys)
+       {
+               for (i = 0; i < indent; i++)
+                       printf("\t");
+               printf("  pathkeys: ");
+               print_pathkeys(path->pathkeys, root->rtable);
+       }
+
        if (join)
        {
-               jp = (JoinPath *) path;
+               JoinPath   *jp = (JoinPath *) path;
 
-               printf("%s rows=%.0f cost=%.2f..%.2f\n",
-                          ptype, path->parent->rows,
-                          path->startup_cost, path->total_cost);
+               for (i = 0; i < indent; i++)
+                       printf("\t");
+               printf("  clauses: ");
+               print_restrictclauses(root, jp->joinrestrictinfo);
+               printf("\n");
 
-               if (path->pathkeys)
+               if (nodeTag(path) == T_MergePath)
                {
-                       for (i = 0; i < indent; i++)
-                               printf("\t");
-                       printf("  pathkeys=");
-                       print_pathkeys(path->pathkeys, root->rtable);
-               }
+                       MergePath  *mp = (MergePath *) path;
 
-               switch (nodeTag(path))
-               {
-                       case T_MergePath:
-                       case T_HashPath:
+                       if (mp->outersortkeys || mp->innersortkeys)
+                       {
                                for (i = 0; i < indent; i++)
                                        printf("\t");
-                               printf("  clauses=(");
-                               print_joinclauses(root, jp->joinrestrictinfo);
-                               printf(")\n");
-
-                               if (nodeTag(path) == T_MergePath)
-                               {
-                                       MergePath  *mp = (MergePath *) path;
-
-                                       if (mp->outersortkeys || mp->innersortkeys)
-                                       {
-                                               for (i = 0; i < indent; i++)
-                                                       printf("\t");
-                                               printf("  sortouter=%d sortinner=%d\n",
-                                                          ((mp->outersortkeys) ? 1 : 0),
-                                                          ((mp->innersortkeys) ? 1 : 0));
-                                       }
-                               }
-                               break;
-                       default:
-                               break;
+                               printf("  sortouter=%d sortinner=%d\n",
+                                          ((mp->outersortkeys) ? 1 : 0),
+                                          ((mp->innersortkeys) ? 1 : 0));
+                       }
                }
+
                print_path(root, jp->outerjoinpath, indent + 1);
                print_path(root, jp->innerjoinpath, indent + 1);
        }
-       else
-       {
-               int                     relid = lfirsti(path->parent->relids);
-
-               printf("%s(%d) rows=%.0f cost=%.2f..%.2f\n",
-                          ptype, relid, path->parent->rows,
-                          path->startup_cost, path->total_cost);
-
-               if (path->pathkeys)
-               {
-                       for (i = 0; i < indent; i++)
-                               printf("\t");
-                       printf("  pathkeys=");
-                       print_pathkeys(path->pathkeys, root->rtable);
-               }
-       }
 }
 
-static void
+void
 debug_print_rel(Query *root, RelOptInfo *rel)
 {
        List       *l;
 
-       printf("(");
-       foreach(l, rel->relids)
-               printf("%d ", lfirsti(l));
+       printf("RELOPTINFO (");
+       print_relids(rel->relids);
        printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
 
+       if (rel->baserestrictinfo)
+       {
+               printf("\tbaserestrictinfo: ");
+               print_restrictclauses(root, rel->baserestrictinfo);
+               printf("\n");
+       }
+
+       foreach(l, rel->joininfo)
+       {
+               JoinInfo *j = (JoinInfo *) lfirst(l);
+
+               printf("\tjoininfo (");
+               print_relids(j->unjoined_relids);
+               printf("): ");
+               print_restrictclauses(root, j->jinfo_restrictinfo);
+               printf("\n");
+       }
+
        printf("\tpath list:\n");
        foreach(l, rel->pathlist)
                print_path(root, lfirst(l), 1);
@@ -652,6 +666,8 @@ debug_print_rel(Query *root, RelOptInfo *rel)
        print_path(root, rel->cheapest_startup_path, 1);
        printf("\n\tcheapest total path:\n");
        print_path(root, rel->cheapest_total_path, 1);
+       printf("\n");
+       fflush(stdout);
 }
 
 #endif  /* OPTIMIZER_DEBUG */
index 3bde257..8c805e0 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.53 2001/05/20 20:28:18 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.54 2001/10/18 16:11:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -374,6 +374,10 @@ make_jointree_rel(Query *root, Node *jtnode)
                 */
                set_cheapest(rel);
 
+#ifdef OPTIMIZER_DEBUG
+               debug_print_rel(root, rel);
+#endif
+
                return rel;
        }
        else
index 4f4b672..9e96429 100644 (file)
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.32 2001/03/22 06:16:14 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.33 2001/10/18 16:11:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -705,7 +705,7 @@ make_pathkeys_for_sortclauses(List *sortclauses,
  * This is a worthwhile savings because these routines will be invoked
  * many times when dealing with a many-relation query.
  */
-static void
+void
 cache_mergeclause_pathkeys(Query *root, RestrictInfo *restrictinfo)
 {
        Node       *key;
index cf2f798..ff2e13e 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.63 2001/06/05 05:26:04 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.64 2001/10/18 16:11:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -40,10 +40,13 @@ static void mark_baserels_for_outer_join(Query *root, Relids rels,
 static void distribute_qual_to_rels(Query *root, Node *clause,
                                                bool ispusheddown,
                                                bool isouterjoin,
+                                               bool isdeduced,
                                                Relids qualscope);
 static void add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo,
                                          Relids join_relids);
 static void add_vars_to_targetlist(Query *root, List *vars);
+static bool qual_is_redundant(Query *root, RestrictInfo *restrictinfo,
+                                                         List *restrictlist);
 static void check_mergejoinable(RestrictInfo *restrictinfo);
 static void check_hashjoinable(RestrictInfo *restrictinfo);
 
@@ -219,7 +222,7 @@ distribute_quals_to_rels(Query *root, Node *jtnode)
                 */
                foreach(qual, (List *) f->quals)
                        distribute_qual_to_rels(root, (Node *) lfirst(qual),
-                                                                       true, false, result);
+                                                                       true, false, false, result);
        }
        else if (IsA(jtnode, JoinExpr))
        {
@@ -279,7 +282,7 @@ distribute_quals_to_rels(Query *root, Node *jtnode)
 
                foreach(qual, (List *) j->quals)
                        distribute_qual_to_rels(root, (Node *) lfirst(qual),
-                                                                       false, isouterjoin, result);
+                                                                       false, isouterjoin, false, result);
        }
        else
                elog(ERROR, "distribute_quals_to_rels: unexpected node type %d",
@@ -339,6 +342,7 @@ mark_baserels_for_outer_join(Query *root, Relids rels, Relids outerrels)
  * 'ispusheddown': if TRUE, force the clause to be marked 'ispusheddown'
  *             (this indicates the clause came from a FromExpr, not a JoinExpr)
  * 'isouterjoin': TRUE if the qual came from an OUTER JOIN's ON-clause
+ * 'isdeduced': TRUE if the qual came from implied-equality deduction
  * 'qualscope': list of baserels the qual's syntactic scope covers
  *
  * 'qualscope' identifies what level of JOIN the qual came from.  For a top
@@ -349,6 +353,7 @@ static void
 distribute_qual_to_rels(Query *root, Node *clause,
                                                bool ispusheddown,
                                                bool isouterjoin,
+                                               bool isdeduced,
                                                Relids qualscope)
 {
        RestrictInfo *restrictinfo = makeNode(RestrictInfo);
@@ -405,8 +410,17 @@ distribute_qual_to_rels(Query *root, Node *clause,
         * clause's own reference list.  At the time we are called, the
         * outerjoinset list of each baserel will show exactly those outer
         * joins that are below the qual in the join tree.
+        *
+        * If the qual came from implied-equality deduction, we can evaluate
+        * the qual at its natural semantic level.
+        * 
         */
-       if (isouterjoin)
+       if (isdeduced)
+       {
+               Assert(sameseti(relids, qualscope));
+               can_be_equijoin = true;
+       }
+       else if (isouterjoin)
        {
                relids = qualscope;
                can_be_equijoin = false;
@@ -461,9 +475,6 @@ distribute_qual_to_rels(Query *root, Node *clause,
                 */
                RelOptInfo *rel = build_base_rel(root, lfirsti(relids));
 
-               rel->baserestrictinfo = lappend(rel->baserestrictinfo,
-                                                                               restrictinfo);
-
                /*
                 * Check for a "mergejoinable" clause even though it's not a join
                 * clause.      This is so that we can recognize that "a.x = a.y"
@@ -474,10 +485,26 @@ distribute_qual_to_rels(Query *root, Node *clause,
                 */
                if (can_be_equijoin)
                        check_mergejoinable(restrictinfo);
+
+               /*
+                * If the clause was deduced from implied equality, check to see
+                * whether it is redundant with restriction clauses we already have
+                * for this rel.  Note we cannot apply this check to user-written
+                * clauses, since we haven't found the canonical pathkey sets yet
+                * while processing user clauses.  (NB: no comparable check is done
+                * in the join-clause case; redundancy will be detected when the
+                * join clause is moved into a join rel's restriction list.)
+                */
+               if (!isdeduced ||
+                       !qual_is_redundant(root, restrictinfo, rel->baserestrictinfo))
+               {
+                       /* Add clause to rel's restriction list */
+                       rel->baserestrictinfo = lappend(rel->baserestrictinfo,
+                                                                                       restrictinfo);
+               }
        }
        else if (relids != NIL)
        {
-
                /*
                 * 'clause' is a join clause, since there is more than one rel in
                 * the relid list.      Set additional RestrictInfo fields for
@@ -524,9 +551,11 @@ distribute_qual_to_rels(Query *root, Node *clause,
         * the two sides represent equivalent PathKeyItems for path keys: any
         * path that is sorted by one side will also be sorted by the other
         * (as soon as the two rels are joined, that is).  Record the key
-        * equivalence for future use.
+        * equivalence for future use.  (We can skip this for a deduced clause,
+        * since the keys are already known equivalent in that case.)
         */
-       if (can_be_equijoin && restrictinfo->mergejoinoperator != InvalidOid)
+       if (can_be_equijoin && restrictinfo->mergejoinoperator != InvalidOid &&
+               !isdeduced)
                add_equijoined_keys(root, restrictinfo);
 }
 
@@ -684,18 +713,106 @@ process_implied_equality(Query *root, Node *item1, Node *item2,
 
        /*
         * Note: we mark the qual "pushed down" to ensure that it can never be
-        * taken for an original JOIN/ON clause.  We also claim it is an
-        * outer- join clause, which it isn't, but that keeps
-        * distribute_qual_to_rels from examining the outerjoinsets of the
-        * relevant rels (which are no longer of interest, but could keep the
-        * qual from being pushed down to where it should be).  It'll also
-        * save a useless call to add_equijoined keys...
+        * taken for an original JOIN/ON clause.
         */
        distribute_qual_to_rels(root, (Node *) clause,
-                                                       true, true,
+                                                       true, false, true,
                                                        pull_varnos((Node *) clause));
 }
 
+/*
+ * qual_is_redundant
+ *       Detect whether an implied-equality qual that turns out to be a
+ *       restriction clause for a single base relation is redundant with
+ *       already-known restriction clauses for that rel.  This occurs with,
+ *       for example,
+ *                             SELECT * FROM tab WHERE f1 = f2 AND f2 = f3;
+ *       We need to suppress the redundant condition to avoid computing
+ *       too-small selectivity, not to mention wasting time at execution.
+ */
+static bool
+qual_is_redundant(Query *root,
+                                 RestrictInfo *restrictinfo,
+                                 List *restrictlist)
+{
+       List   *oldquals;
+       List   *olditem;
+       Node   *newleft;
+       Node   *newright;
+       List   *equalvars;
+       bool    someadded;
+
+       /*
+        * Set cached pathkeys.  NB: it is okay to do this now because this
+        * routine is only invoked while we are generating implied equalities.
+        * Therefore, the equi_key_list is already complete and so we can
+        * correctly determine canonical pathkeys.
+        */
+       cache_mergeclause_pathkeys(root, restrictinfo);
+       /* If different, say "not redundant" (should never happen) */
+       if (restrictinfo->left_pathkey != restrictinfo->right_pathkey)
+               return false;
+       /*
+        * Scan existing quals to find those referencing same pathkeys.
+        * Usually there will be few, if any, so build a list of just the
+        * interesting ones.
+        */
+       oldquals = NIL;
+       foreach(olditem, restrictlist)
+       {
+               RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem);
+
+               if (oldrinfo->mergejoinoperator != InvalidOid)
+               {
+                       cache_mergeclause_pathkeys(root, oldrinfo);
+                       if (restrictinfo->left_pathkey == oldrinfo->left_pathkey &&
+                               restrictinfo->right_pathkey == oldrinfo->right_pathkey)
+                               oldquals = lcons(oldrinfo, oldquals);
+               }
+       }
+       if (oldquals == NIL)
+               return false;
+       /*
+        * Now, we want to develop a list of Vars that are known equal to the
+        * left side of the new qual.  We traverse the old-quals list repeatedly
+        * to transitively expand the Vars list.  If at any point we find we
+        * can reach the right-side Var of the new qual, we are done.  We give
+        * up when we can't expand the equalvars list any more.
+        */
+       newleft = (Node *) get_leftop(restrictinfo->clause);
+       newright = (Node *) get_rightop(restrictinfo->clause);
+       equalvars = makeList1(newleft);
+       do {
+               someadded = false;
+               foreach(olditem, oldquals)
+               {
+                       RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem);
+                       Node *oldleft = (Node *) get_leftop(oldrinfo->clause);
+                       Node *oldright = (Node *) get_rightop(oldrinfo->clause);
+                       Node *newguy = NULL;
+
+                       if (member(oldleft, equalvars))
+                               newguy = oldright;
+                       else if (member(oldright, equalvars))
+                               newguy = oldleft;
+                       else
+                               continue;
+                       if (equal(newguy, newright))
+                               return true;    /* we proved new clause is redundant */
+                       equalvars = lcons(newguy, equalvars);
+                       someadded = true;
+                       /*
+                        * Remove this qual from list, since we don't need it anymore.
+                        * Note this doesn't break the foreach() loop, since lremove
+                        * doesn't touch the next-link of the removed cons cell.
+                        */
+                       oldquals = lremove(oldrinfo, oldquals);
+               }
+       } while (someadded);
+
+       return false;                           /* it's not redundant */
+}
+
 
 /*****************************************************************************
  *
index a2fa883..48e4168 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.108 2001/06/05 05:26:04 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.109 2001/10/18 16:11:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,6 +17,9 @@
 
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
+#ifdef OPTIMIZER_DEBUG
+#include "nodes/print.h"
+#endif
 #include "optimizer/clauses.h"
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
index 86d9231..3b85f32 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.33 2001/05/20 20:28:19 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.34 2001/10/18 16:11:42 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,6 +17,7 @@
 #include "optimizer/cost.h"
 #include "optimizer/joininfo.h"
 #include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
 #include "optimizer/plancat.h"
 #include "optimizer/tlist.h"
 #include "parser/parsetree.h"
 
 static RelOptInfo *make_base_rel(Query *root, int relid);
 static List *new_join_tlist(List *tlist, int first_resdomno);
-static List *build_joinrel_restrictlist(RelOptInfo *joinrel,
-                                                  RelOptInfo *outer_rel,
-                                                  RelOptInfo *inner_rel);
+static List *build_joinrel_restrictlist(Query *root,
+                                                                               RelOptInfo *joinrel,
+                                                                               RelOptInfo *outer_rel,
+                                                                               RelOptInfo *inner_rel);
 static void build_joinrel_joinlist(RelOptInfo *joinrel,
                                           RelOptInfo *outer_rel,
                                           RelOptInfo *inner_rel);
@@ -274,7 +276,8 @@ build_join_rel(Query *root,
                 * particular pair of component relations.
                 */
                if (restrictlist_ptr)
-                       *restrictlist_ptr = build_joinrel_restrictlist(joinrel,
+                       *restrictlist_ptr = build_joinrel_restrictlist(root,
+                                                                                                                  joinrel,
                                                                                                                   outer_rel,
                                                                                                                   inner_rel);
                return joinrel;
@@ -326,7 +329,10 @@ build_join_rel(Query *root,
         * caller might or might not need the restrictlist, but I need it
         * anyway for set_joinrel_size_estimates().)
         */
-       restrictlist = build_joinrel_restrictlist(joinrel, outer_rel, inner_rel);
+       restrictlist = build_joinrel_restrictlist(root,
+                                                                                         joinrel,
+                                                                                         outer_rel,
+                                                                                         inner_rel);
        if (restrictlist_ptr)
                *restrictlist_ptr = restrictlist;
        build_joinrel_joinlist(joinrel, outer_rel, inner_rel);
@@ -407,6 +413,11 @@ new_join_tlist(List *tlist,
  *       join paths made from this pair of sub-relations.      (It will not need to
  *       be considered further up the join tree.)
  *
+ *       When building a restriction list, we eliminate redundant clauses.
+ *       We don't try to do that for join clause lists, since the join clauses
+ *       aren't really doing anything, just waiting to become part of higher
+ *       levels' restriction lists.
+ *
  * 'joinrel' is a join relation node
  * 'outer_rel' and 'inner_rel' are a pair of relations that can be joined
  *             to form joinrel.
@@ -421,19 +432,80 @@ new_join_tlist(List *tlist,
  * the original nodes in the lists made for the join relation.
  */
 static List *
-build_joinrel_restrictlist(RelOptInfo *joinrel,
+build_joinrel_restrictlist(Query *root,
+                                                  RelOptInfo *joinrel,
                                                   RelOptInfo *outer_rel,
                                                   RelOptInfo *inner_rel)
 {
+       List   *result = NIL;
+       List   *rlist;
+       List   *item;
+
+       /*
+        * Collect all the clauses that syntactically belong at this level.
+        */
+       rlist = nconc(subbuild_joinrel_restrictlist(joinrel,
+                                                                                               outer_rel->joininfo),
+                                 subbuild_joinrel_restrictlist(joinrel,
+                                                                                               inner_rel->joininfo));
 
        /*
-        * We must eliminate duplicates, since we will see the same clauses
-        * arriving from both input relations...
+        * Eliminate duplicate and redundant clauses.
+        *
+        * We must eliminate duplicates, since we will see many of the same
+        * clauses arriving from both input relations.  Also, if a clause is
+        * a mergejoinable clause, it's possible that it is redundant with
+        * previous clauses (see optimizer/README for discussion).  We detect
+        * that case and omit the redundant clause from the result list.
+        *
+        * We can detect redundant mergejoinable clauses very cheaply by using
+        * their left and right pathkeys, which uniquely identify the sets of
+        * equijoined variables in question.  All the members of a pathkey set
+        * that are in the left relation have already been forced to be equal;
+        * likewise for those in the right relation.  So, we need to have only
+        * one clause that checks equality between any set member on the left and
+        * any member on the right; by transitivity, all the rest are then equal.
         */
-       return set_union(subbuild_joinrel_restrictlist(joinrel,
-                                                                                                  outer_rel->joininfo),
-                                        subbuild_joinrel_restrictlist(joinrel,
-                                                                                                  inner_rel->joininfo));
+       foreach(item, rlist)
+       {
+               RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
+
+               /* eliminate duplicates */
+               if (member(rinfo, result))
+                       continue;
+
+               /* check for redundant merge clauses */
+               if (rinfo->mergejoinoperator != InvalidOid)
+               {
+                       bool    redundant = false;
+                       List   *olditem;
+
+                       cache_mergeclause_pathkeys(root, rinfo);
+
+                       foreach(olditem, result)
+                       {
+                               RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem);
+
+                               if (oldrinfo->mergejoinoperator != InvalidOid &&
+                                       rinfo->left_pathkey == oldrinfo->left_pathkey &&
+                                       rinfo->right_pathkey == oldrinfo->right_pathkey)
+                               {
+                                       redundant = true;
+                                       break;
+                               }
+                       }
+
+                       if (redundant)
+                               continue;
+               }
+
+               /* otherwise, add it to result list */
+               result = lappend(result, rinfo);
+       }
+
+       freeList(rlist);
+
+       return result;
 }
 
 static void
@@ -458,7 +530,6 @@ subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
 
                if (is_subseti(joininfo->unjoined_relids, joinrel->relids))
                {
-
                        /*
                         * Clauses in this JoinInfo list become restriction clauses
                         * for the joinrel, since they refer to no outside rels.
@@ -471,7 +542,6 @@ subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
                }
                else
                {
-
                        /*
                         * These clauses are still join clauses at this level, so we
                         * ignore them in this routine.
@@ -497,7 +567,6 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel,
                                                                                          joinrel->relids);
                if (new_unjoined_relids == NIL)
                {
-
                        /*
                         * Clauses in this JoinInfo list become restriction clauses
                         * for the joinrel, since they refer to no outside rels. So we
@@ -506,7 +575,6 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel,
                }
                else
                {
-
                        /*
                         * These clauses are still join clauses at this level, so find
                         * or make the appropriate JoinInfo item for the joinrel, and
index 08fe2cc..1dcb2cc 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: paths.h,v 1.55 2001/08/21 16:36:06 tgl Exp $
+ * $Id: paths.h,v 1.56 2001/10/18 16:11:42 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -31,6 +31,10 @@ extern int   geqo_rels;
 extern RelOptInfo *make_one_rel(Query *root);
 extern RelOptInfo *make_fromexpr_rel(Query *root, FromExpr *from);
 
+#ifdef OPTIMIZER_DEBUG
+extern void debug_print_rel(Query *root, RelOptInfo *rel);
+#endif
+
 /*
  * indxpath.c
  *       routines to generate index paths
@@ -111,6 +115,8 @@ extern List *build_join_pathkeys(Query *root,
                                        List *outer_pathkeys);
 extern List *make_pathkeys_for_sortclauses(List *sortclauses,
                                                          List *tlist);
+extern void cache_mergeclause_pathkeys(Query *root,
+                                                                          RestrictInfo *restrictinfo);
 extern List *find_mergeclauses_for_pathkeys(Query *root,
                                                           List *pathkeys,
                                                           List *restrictinfos);