OSDN Git Service

First cut at unifying regular selectivity estimation with indexscan
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 23 Jan 2000 02:07:00 +0000 (02:07 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 23 Jan 2000 02:07:00 +0000 (02:07 +0000)
selectivity estimation wasn't right.  This is better...

src/backend/optimizer/path/clausesel.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/plan/createplan.c
src/backend/utils/adt/selfuncs.c
src/include/optimizer/cost.h

index a25dd68..d3a494f 100644 (file)
@@ -1,13 +1,13 @@
 /*-------------------------------------------------------------------------
  *
  * clausesel.c
- *       Routines to compute and set clause selectivities
+ *       Routines to compute clause selectivities
  *
  * Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.27 2000/01/09 00:26:31 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.28 2000/01/23 02:06:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
  ****************************************************************************/
 
 /*
- * restrictlist_selec -
+ * restrictlist_selectivity -
  *       Compute the selectivity of an implicitly-ANDed list of RestrictInfo
  *       clauses.
  *
- * This is the same as clauselist_selec except for the form of the input.
+ * This is the same as clauselist_selectivity except for the representation
+ * of the clause list.
  */
 Selectivity
-restrictlist_selec(Query *root, List *restrictinfo_list)
+restrictlist_selectivity(Query *root,
+                                                List *restrictinfo_list,
+                                                int varRelid)
 {
        List       *clauselist = get_actual_clauses(restrictinfo_list);
        Selectivity     result;
 
-       result = clauselist_selec(root, clauselist);
+       result = clauselist_selectivity(root, clauselist, varRelid);
        freeList(clauselist);
        return result;
 }
 
 /*
- * clauselist_selec -
+ * clauselist_selectivity -
  *       Compute the selectivity of an implicitly-ANDed list of boolean
- *       expression clauses.
+ *       expression clauses.  The list can be empty, in which case 1.0
+ *       must be returned.
+ *
+ * See clause_selectivity() for the meaning of the varRelid parameter.
  */
 Selectivity
-clauselist_selec(Query *root, List *clauses)
+clauselist_selectivity(Query *root,
+                                          List *clauses,
+                                          int varRelid)
 {
        Selectivity             s1 = 1.0;
        List               *clause;
 
        /* Use the product of the selectivities of the subclauses.
-        * XXX this is probably too optimistic, since the subclauses
+        * XXX this is too optimistic, since the subclauses
         * are very likely not independent...
         */
        foreach(clause, clauses)
        {
-               Selectivity     s2 = compute_clause_selec(root, (Node *) lfirst(clause));
+               Selectivity     s2 = clause_selectivity(root,
+                                                                                       (Node *) lfirst(clause),
+                                                                                       varRelid);
                s1 = s1 * s2;
        }
        return s1;
 }
 
 /*
- * compute_clause_selec -
+ * clause_selectivity -
  *       Compute the selectivity of a general boolean expression clause.
+ *
+ * varRelid is either 0 or a rangetable index.
+ *
+ * When varRelid is not 0, only variables belonging to that relation are
+ * considered in computing selectivity; other vars are treated as constants
+ * of unknown values.  This is appropriate for estimating the selectivity of
+ * a join clause that is being used as a restriction clause in a scan of a
+ * nestloop join's inner relation --- varRelid should then be the ID of the
+ * inner relation.
+ *
+ * When varRelid is 0, all variables are treated as variables.  This
+ * is appropriate for ordinary join clauses and restriction clauses.
  */
 Selectivity
-compute_clause_selec(Query *root, Node *clause)
+clause_selectivity(Query *root,
+                                  Node *clause,
+                                  int varRelid)
 {
        Selectivity             s1 = 1.0;       /* default for any unhandled clause type */
 
@@ -88,13 +112,16 @@ compute_clause_selec(Query *root, Node *clause)
                 * didn't want to have to do system cache look ups to find out all
                 * of that info.
                 */
-               s1 = restriction_selectivity(F_EQSEL,
-                                                                        BooleanEqualOperator,
-                                                                        getrelid(((Var *) clause)->varno,
-                                                                                         root->rtable),
-                                                                        ((Var *) clause)->varattno,
-                                                                        Int8GetDatum(true),
-                                                                        SEL_CONSTANT | SEL_RIGHT);
+               Index   varno = ((Var *) clause)->varno;
+
+               if (varRelid == 0 || varRelid == varno)
+                       s1 = restriction_selectivity(F_EQSEL,
+                                                                                BooleanEqualOperator,
+                                                                                getrelid(varno, root->rtable),
+                                                                                ((Var *) clause)->varattno,
+                                                                                Int8GetDatum(true),
+                                                                                SEL_CONSTANT | SEL_RIGHT);
+               /* an outer-relation bool var is taken as always true... */
        }
        else if (IsA(clause, Param))
        {
@@ -109,12 +136,16 @@ compute_clause_selec(Query *root, Node *clause)
        else if (not_clause(clause))
        {
                /* inverse of the selectivity of the underlying clause */
-               s1 = 1.0 - compute_clause_selec(root,
-                                                                               (Node *) get_notclausearg((Expr *) clause));
+               s1 = 1.0 - clause_selectivity(root,
+                                                                         (Node*) get_notclausearg((Expr*) clause),
+                                                                         varRelid);
        }
        else if (and_clause(clause))
        {
-               s1 = clauselist_selec(root, ((Expr *) clause)->args);
+               /* share code with clauselist_selectivity() */
+               s1 = clauselist_selectivity(root,
+                                                                       ((Expr *) clause)->args,
+                                                                       varRelid);
        }
        else if (or_clause(clause))
        {
@@ -127,50 +158,37 @@ compute_clause_selec(Query *root, Node *clause)
                s1 = 0.0;
                foreach(arg, ((Expr *) clause)->args)
                {
-                       Selectivity     s2 = compute_clause_selec(root, (Node *) lfirst(arg));
+                       Selectivity     s2 = clause_selectivity(root,
+                                                                                               (Node *) lfirst(arg),
+                                                                                               varRelid);
                        s1 = s1 + s2 - s1 * s2;
                }
        }
        else if (is_opclause(clause))
        {
-               if (NumRelids(clause) == 1)
-               {
-                       /* The opclause is not a join clause, since there is only one
-                        * relid in the clause.  The clause selectivity will be based on
-                        * the operator selectivity and operand values.
-                        */
-                       Oid                     opno = ((Oper *) ((Expr *) clause)->oper)->opno;
-                       RegProcedure oprrest = get_oprrest(opno);
+               Oid                     opno = ((Oper *) ((Expr *) clause)->oper)->opno;
+               bool            is_join_clause;
 
+               if (varRelid != 0)
+               {
                        /*
-                        * if the oprrest procedure is missing for whatever reason, use a
-                        * selectivity of 0.5
+                        * If we are considering a nestloop join then all clauses
+                        * are restriction clauses, since we are only interested in
+                        * the one relation.
                         */
-                       if (!oprrest)
-                               s1 = (Selectivity) 0.5;
-                       else
-                       {
-                               int                     relidx;
-                               AttrNumber      attno;
-                               Datum           constval;
-                               int                     flag;
-                               Oid                     reloid;
-
-                               get_relattval(clause, 0, &relidx, &attno, &constval, &flag);
-                               reloid = relidx ? getrelid(relidx, root->rtable) : InvalidOid;
-                               s1 = restriction_selectivity(oprrest, opno,
-                                                                                        reloid, attno,
-                                                                                        constval, flag);
-                       }
+                       is_join_clause = false;
                }
                else
                {
                        /*
-                        * The clause must be a join clause.  The clause selectivity will
-                        * be based on the relations to be scanned and the attributes they
-                        * are to be joined on.
+                        * Otherwise, it's a join if there's more than one relation used.
                         */
-                       Oid                     opno = ((Oper *) ((Expr *) clause)->oper)->opno;
+                       is_join_clause = (NumRelids(clause) > 1);
+               }
+
+               if (is_join_clause)
+               {
+                       /* Estimate selectivity for a join clause. */
                        RegProcedure oprjoin = get_oprjoin(opno);
 
                        /*
@@ -196,6 +214,33 @@ compute_clause_selec(Query *root, Node *clause)
                                                                          reloid2, attno2);
                        }
                }
+               else
+               {
+                       /* Estimate selectivity for a restriction clause. */
+                       RegProcedure oprrest = get_oprrest(opno);
+
+                       /*
+                        * if the oprrest procedure is missing for whatever reason, use a
+                        * selectivity of 0.5
+                        */
+                       if (!oprrest)
+                               s1 = (Selectivity) 0.5;
+                       else
+                       {
+                               int                     relidx;
+                               AttrNumber      attno;
+                               Datum           constval;
+                               int                     flag;
+                               Oid                     reloid;
+
+                               get_relattval(clause, varRelid,
+                                                         &relidx, &attno, &constval, &flag);
+                               reloid = relidx ? getrelid(relidx, root->rtable) : InvalidOid;
+                               s1 = restriction_selectivity(oprrest, opno,
+                                                                                        reloid, attno,
+                                                                                        constval, flag);
+                       }
+               }
        }
        else if (is_funcclause(clause))
        {
index 5c0f54a..ca2d586 100644 (file)
@@ -18,7 +18,7 @@
  * Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.48 2000/01/22 23:50:14 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.49 2000/01/23 02:06:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -459,7 +459,10 @@ set_rel_rows_width(Query *root, RelOptInfo *rel)
        /* Should only be applied to base relations */
        Assert(length(rel->relids) == 1);
 
-       rel->rows = rel->tuples * restrictlist_selec(root, rel->restrictinfo);
+       rel->rows = rel->tuples *
+               restrictlist_selectivity(root,
+                                                                rel->restrictinfo,
+                                                                lfirsti(rel->relids));
        Assert(rel->rows >= 0);
 
        set_rel_width(root, rel);
@@ -479,8 +482,10 @@ set_joinrel_rows_width(Query *root, RelOptInfo *rel,
        temp = joinpath->outerjoinpath->parent->rows *
                joinpath->innerjoinpath->parent->rows;
 
-       /* apply restrictivity */
-       temp *= restrictlist_selec(root, joinpath->path.parent->restrictinfo);
+       /* apply join restrictivity */
+       temp *= restrictlist_selectivity(root,
+                                                                        joinpath->path.parent->restrictinfo,
+                                                                        0);
 
        Assert(temp >= 0);
        rel->rows = temp;
index fd87a89..da3e74a 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.79 2000/01/15 02:59:30 petere Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.80 2000/01/23 02:07:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -305,6 +305,7 @@ create_indexscan_node(Query *root,
                                          List *scan_clauses)
 {
        List       *indxqual = best_path->indexqual;
+       Index           baserelid;
        List       *qpqual;
        List       *fixed_indxqual;
        List       *ixid;
@@ -314,6 +315,7 @@ create_indexscan_node(Query *root,
 
        /* there should be exactly one base rel involved... */
        Assert(length(best_path->path.parent->relids) == 1);
+       baserelid = lfirsti(best_path->path.parent->relids);
 
        /* check to see if any of the indices are lossy */
        foreach(ixid, best_path->indexid)
@@ -382,7 +384,9 @@ create_indexscan_node(Query *root,
                {
                        /* recompute output row estimate using all available quals */
                        plan_rows = best_path->path.parent->tuples *
-                               clauselist_selec(root, lcons(indxqual_expr, qpqual));
+                               clauselist_selectivity(root,
+                                                                          lcons(indxqual_expr, qpqual),
+                                                                          baserelid);
                }
 
                if (lossy)
@@ -401,7 +405,9 @@ create_indexscan_node(Query *root,
                {
                        /* recompute output row estimate using all available quals */
                        plan_rows = best_path->path.parent->tuples *
-                               clauselist_selec(root, nconc(listCopy(indxqual_list), qpqual));
+                               clauselist_selectivity(root,
+                                                                          nconc(listCopy(indxqual_list), qpqual),
+                                                                          baserelid);
                }
 
                if (lossy)
@@ -417,7 +423,7 @@ create_indexscan_node(Query *root,
 
        scan_node = make_indexscan(tlist,
                                                           qpqual,
-                                                          lfirsti(best_path->path.parent->relids),
+                                                          baserelid,
                                                           best_path->indexid,
                                                           fixed_indxqual,
                                                           indxqual);
index f1c458b..6af241f 100644 (file)
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.49 2000/01/22 23:50:20 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.50 2000/01/23 02:06:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -725,7 +725,8 @@ genericcostestimate(Query *root, RelOptInfo *rel,
        double numIndexPages;
 
        /* Estimate the fraction of main-table tuples that will be visited */
-    *indexSelectivity = clauselist_selec(root, indexQuals);
+    *indexSelectivity = clauselist_selectivity(root, indexQuals,
+                                                                                          lfirsti(rel->relids));
 
        /* Estimate the number of index tuples that will be visited */
        numIndexTuples = *indexSelectivity * index->tuples;
index 9c16fc8..fd6daee 100644 (file)
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: cost.h,v 1.26 2000/01/22 23:50:26 tgl Exp $
+ * $Id: cost.h,v 1.27 2000/01/23 02:06:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include "nodes/relation.h"
 
+/* defaults for costsize.c's Cost parameters */
+/* NB: cost-estimation code should use the variables, not the constants! */
+#define CPU_PAGE_WEIGHT  0.033
+#define CPU_INDEX_PAGE_WEIGHT  0.017
+
 /* defaults for function attributes used for expensive function calculations */
 #define BYTE_PCT 100
 #define PERBYTE_CPU 0
 #define PERCALL_CPU 0
 #define OUTIN_RATIO 100
-/* defaults for costsize.c's Cost parameters */
-/* NB: cost-estimation code should use the variables, not the constants! */
-#define CPU_PAGE_WEIGHT  0.033
-#define CPU_INDEX_PAGE_WEIGHT  0.017
 
 
 /*
@@ -61,8 +62,14 @@ extern void set_joinrel_rows_width(Query *root, RelOptInfo *rel,
  * prototypes for clausesel.c
  *       routines to compute clause selectivities
  */
-extern Selectivity restrictlist_selec(Query *root, List *restrictinfo_list);
-extern Selectivity clauselist_selec(Query *root, List *clauses);
-extern Selectivity compute_clause_selec(Query *root, Node *clause);
+extern Selectivity restrictlist_selectivity(Query *root,
+                                                                                       List *restrictinfo_list,
+                                                                                       int varRelid);
+extern Selectivity clauselist_selectivity(Query *root,
+                                                                                 List *clauses,
+                                                                                 int varRelid);
+extern Selectivity clause_selectivity(Query *root,
+                                                                         Node *clause,
+                                                                         int varRelid);
 
 #endif  /* COST_H */