OSDN Git Service

Teach the planner to treat a partial unique index as proving a variable is
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 15 Feb 2009 20:16:21 +0000 (20:16 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 15 Feb 2009 20:16:21 +0000 (20:16 +0000)
unique for a particular query, if the index predicate is satisfied.  This
requires a bit of reordering of operations so that we check the predicates
before doing any selectivity estimates, but shouldn't really cause any
noticeable slowdown.  Per a comment from Michal Politowski.

src/backend/optimizer/path/allpaths.c
src/backend/optimizer/path/indxpath.c
src/backend/optimizer/path/orindxpath.c
src/backend/optimizer/util/plancat.c
src/backend/utils/adt/selfuncs.c

index b94b8a5..4e0d9ca 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.179 2009/01/01 17:23:43 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.180 2009/02/15 20:16:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -225,19 +225,25 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
                return;
        }
 
+       /*
+        * Test any partial indexes of rel for applicability.  We must do this
+        * first since partial unique indexes can affect size estimates.
+        */
+       check_partial_indexes(root, rel);
+
        /* Mark rel with estimated output rows, width, etc */
        set_baserel_size_estimates(root, rel);
 
-       /* Test any partial indexes of rel for applicability */
-       check_partial_indexes(root, rel);
-
        /*
         * Check to see if we can extract any restriction conditions from join
         * quals that are OR-of-AND structures.  If so, add them to the rel's
-        * restriction list, and recompute the size estimates.
+        * restriction list, and redo the above steps.
         */
        if (create_or_index_quals(root, rel))
+       {
+               check_partial_indexes(root, rel);
                set_baserel_size_estimates(root, rel);
+       }
 
        /*
         * Generate paths and add them to the rel's pathlist.
index b3482b4..6f27a19 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.235 2009/01/01 17:23:43 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.236 2009/02/15 20:16:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1367,8 +1367,12 @@ match_rowcompare_to_indexcol(IndexOptInfo *index,
 
 /*
  * check_partial_indexes
- *             Check each partial index of the relation, and mark it predOK or not
- *             depending on whether the predicate is satisfied for this query.
+ *             Check each partial index of the relation, and mark it predOK if
+ *             the index's predicate is satisfied for this query.
+ *
+ * Note: it is possible for this to get re-run after adding more restrictions
+ * to the rel; so we might be able to prove more indexes OK.  We assume that
+ * adding more restrictions can't make an index not OK.
  */
 void
 check_partial_indexes(PlannerInfo *root, RelOptInfo *rel)
@@ -1383,6 +1387,9 @@ check_partial_indexes(PlannerInfo *root, RelOptInfo *rel)
                if (index->indpred == NIL)
                        continue;                       /* ignore non-partial indexes */
 
+               if (index->predOK)
+                       continue;                       /* don't repeat work if already proven OK */
+
                index->predOK = predicate_implied_by(index->indpred,
                                                                                         restrictinfo_list);
        }
index 638078e..2f8a673 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.87 2009/02/06 23:43:23 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.88 2009/02/15 20:16:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -181,6 +181,6 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
                /* It isn't an outer join clause, so no need to adjust outer_selec */
        }
 
-       /* Tell caller to recompute rel's rows estimate */
+       /* Tell caller to recompute partial index status and rowcount estimate */
        return true;
 }
index f4f8ac8..e880e66 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.154 2009/01/07 22:40:49 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.155 2009/02/15 20:16:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -939,15 +939,16 @@ has_unique_index(RelOptInfo *rel, AttrNumber attno)
 
                /*
                 * Note: ignore partial indexes, since they don't allow us to conclude
-                * that all attr values are distinct.  We don't take any interest in
-                * expressional indexes either. Also, a multicolumn unique index
-                * doesn't allow us to conclude that just the specified attr is
-                * unique.
+                * that all attr values are distinct, *unless* they are marked predOK
+                * which means we know the index's predicate is satisfied by the query.
+                * We don't take any interest in expressional indexes either. Also, a
+                * multicolumn unique index doesn't allow us to conclude that just the
+                * specified attr is unique.
                 */
                if (index->unique &&
                        index->ncolumns == 1 &&
                        index->indexkeys[0] == attno &&
-                       index->indpred == NIL)
+                       (index->indpred == NIL || index->predOK))
                        return true;
        }
        return false;
index 7c3f773..e4b1acb 100644 (file)
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.258 2009/01/01 17:23:50 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.259 2009/02/15 20:16:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -234,6 +234,15 @@ var_eq_const(VariableStatData *vardata, Oid operator,
        if (constisnull)
                return 0.0;
 
+       /*
+        * If we matched the var to a unique index, assume there is exactly one
+        * match regardless of anything else.  (This is slightly bogus, since
+        * the index's equality operator might be different from ours, but it's
+        * more likely to be right than ignoring the information.)
+        */
+       if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
+               return 1.0 / vardata->rel->tuples;
+
        if (HeapTupleIsValid(vardata->statsTuple))
        {
                Form_pg_statistic stats;
@@ -357,6 +366,15 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
 {
        double          selec;
 
+       /*
+        * If we matched the var to a unique index, assume there is exactly one
+        * match regardless of anything else.  (This is slightly bogus, since
+        * the index's equality operator might be different from ours, but it's
+        * more likely to be right than ignoring the information.)
+        */
+       if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
+               return 1.0 / vardata->rel->tuples;
+
        if (HeapTupleIsValid(vardata->statsTuple))
        {
                Form_pg_statistic stats;
@@ -3969,6 +3987,8 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
  *     atttype, atttypmod: type data to pass to get_attstatsslot().  This is
  *             commonly the same as the exposed type of the variable argument,
  *             but can be different in binary-compatible-type cases.
+ *     isunique: TRUE if we were able to match the var to a unique index,
+ *             implying its values are unique for this query.
  *
  * Caller is responsible for doing ReleaseVariableStats() before exiting.
  */
@@ -4005,6 +4025,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
                vardata->rel = find_base_rel(root, var->varno);
                vardata->atttype = var->vartype;
                vardata->atttypmod = var->vartypmod;
+               vardata->isunique = has_unique_index(vardata->rel, var->varattno);
 
                rte = root->simple_rte_array[var->varno];
 
@@ -4121,13 +4142,6 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
                        if (indexpr_item == NULL)
                                continue;               /* no expressions here... */
 
-                       /*
-                        * Ignore partial indexes since they probably don't reflect
-                        * whole-relation statistics.  Possibly reconsider this later.
-                        */
-                       if (index->indpred)
-                               continue;
-
                        for (pos = 0; pos < index->ncolumns; pos++)
                        {
                                if (index->indexkeys[pos] == 0)
@@ -4147,9 +4161,19 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
                                                 */
                                                if (index->unique &&
                                                        index->ncolumns == 1 &&
-                                                       index->indpred == NIL)
+                                                       (index->indpred == NIL || index->predOK))
                                                        vardata->isunique = true;
-                                               /* Has it got stats? */
+
+                                               /*
+                                                * Has it got stats?  We only consider stats for
+                                                * non-partial indexes, since partial indexes
+                                                * probably don't reflect whole-relation statistics;
+                                                * the above check for uniqueness is the only info
+                                                * we take from a partial index.
+                                                *
+                                                * An index stats hook, however, must make its own
+                                                * decisions about what to do with partial indexes.
+                                                */
                                                if (get_index_stats_hook &&
                                                        (*get_index_stats_hook) (root, index->indexoid,
                                                                                                         pos + 1, vardata))
@@ -4163,7 +4187,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
                                                                !vardata->freefunc)
                                                                elog(ERROR, "no function provided to release variable stats with");
                                                }
-                                               else
+                                               else if (index->indpred == NIL)
                                                {
                                                        vardata->statsTuple =
                                                                SearchSysCache(STATRELATT,
@@ -4254,19 +4278,12 @@ get_variable_numdistinct(VariableStatData *vardata)
 
        /*
         * If there is a unique index for the variable, assume it is unique no
-        * matter what pg_statistic says (the statistics could be out of date).
-        * Can skip search if we already think it's unique.
+        * matter what pg_statistic says; the statistics could be out of date,
+        * or we might have found a partial unique index that proves the var
+        * is unique for this query.
         */
-       if (stadistinct != -1.0)
-       {
-               if (vardata->isunique)
-                       stadistinct = -1.0;
-               else if (vardata->var && IsA(vardata->var, Var) &&
-                                vardata->rel &&
-                                has_unique_index(vardata->rel,
-                                                                 ((Var *) vardata->var)->varattno))
-                       stadistinct = -1.0;
-       }
+       if (vardata->isunique)
+               stadistinct = -1.0;
 
        /*
         * If we had an absolute estimate, use that.