From ce6e31de9c99430256468d059eb226ea7a267376 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 15 Feb 2009 20:16:21 +0000 Subject: [PATCH] Teach the planner to treat a partial unique index as proving a variable is unique for a particular query, if the index predicate is satisfied. This requires a bit of reordering of operations so that we check the predicates before doing any selectivity estimates, but shouldn't really cause any noticeable slowdown. Per a comment from Michal Politowski. --- src/backend/optimizer/path/allpaths.c | 16 ++++++--- src/backend/optimizer/path/indxpath.c | 13 +++++-- src/backend/optimizer/path/orindxpath.c | 4 +-- src/backend/optimizer/util/plancat.c | 13 +++---- src/backend/utils/adt/selfuncs.c | 63 +++++++++++++++++++++------------ 5 files changed, 70 insertions(+), 39 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index b94b8a5811..4e0d9ca4e5 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.179 2009/01/01 17:23:43 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.180 2009/02/15 20:16:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -225,19 +225,25 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) return; } + /* + * Test any partial indexes of rel for applicability. We must do this + * first since partial unique indexes can affect size estimates. + */ + check_partial_indexes(root, rel); + /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); - /* Test any partial indexes of rel for applicability */ - check_partial_indexes(root, rel); - /* * Check to see if we can extract any restriction conditions from join * quals that are OR-of-AND structures. If so, add them to the rel's - * restriction list, and recompute the size estimates. + * restriction list, and redo the above steps. */ if (create_or_index_quals(root, rel)) + { + check_partial_indexes(root, rel); set_baserel_size_estimates(root, rel); + } /* * Generate paths and add them to the rel's pathlist. diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index b3482b4b5b..6f27a19182 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.235 2009/01/01 17:23:43 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.236 2009/02/15 20:16:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1367,8 +1367,12 @@ match_rowcompare_to_indexcol(IndexOptInfo *index, /* * check_partial_indexes - * Check each partial index of the relation, and mark it predOK or not - * depending on whether the predicate is satisfied for this query. + * Check each partial index of the relation, and mark it predOK if + * the index's predicate is satisfied for this query. + * + * Note: it is possible for this to get re-run after adding more restrictions + * to the rel; so we might be able to prove more indexes OK. We assume that + * adding more restrictions can't make an index not OK. */ void check_partial_indexes(PlannerInfo *root, RelOptInfo *rel) @@ -1383,6 +1387,9 @@ check_partial_indexes(PlannerInfo *root, RelOptInfo *rel) if (index->indpred == NIL) continue; /* ignore non-partial indexes */ + if (index->predOK) + continue; /* don't repeat work if already proven OK */ + index->predOK = predicate_implied_by(index->indpred, restrictinfo_list); } diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c index 638078e169..2f8a67394e 100644 --- a/src/backend/optimizer/path/orindxpath.c +++ b/src/backend/optimizer/path/orindxpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.87 2009/02/06 23:43:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.88 2009/02/15 20:16:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -181,6 +181,6 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel) /* It isn't an outer join clause, so no need to adjust outer_selec */ } - /* Tell caller to recompute rel's rows estimate */ + /* Tell caller to recompute partial index status and rowcount estimate */ return true; } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index f4f8ac8a76..e880e668cb 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.154 2009/01/07 22:40:49 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.155 2009/02/15 20:16:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -939,15 +939,16 @@ has_unique_index(RelOptInfo *rel, AttrNumber attno) /* * Note: ignore partial indexes, since they don't allow us to conclude - * that all attr values are distinct. We don't take any interest in - * expressional indexes either. Also, a multicolumn unique index - * doesn't allow us to conclude that just the specified attr is - * unique. + * that all attr values are distinct, *unless* they are marked predOK + * which means we know the index's predicate is satisfied by the query. + * We don't take any interest in expressional indexes either. Also, a + * multicolumn unique index doesn't allow us to conclude that just the + * specified attr is unique. */ if (index->unique && index->ncolumns == 1 && index->indexkeys[0] == attno && - index->indpred == NIL) + (index->indpred == NIL || index->predOK)) return true; } return false; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 7c3f77308b..e4b1acb1f8 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.258 2009/01/01 17:23:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.259 2009/02/15 20:16:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -234,6 +234,15 @@ var_eq_const(VariableStatData *vardata, Oid operator, if (constisnull) return 0.0; + /* + * If we matched the var to a unique index, assume there is exactly one + * match regardless of anything else. (This is slightly bogus, since + * the index's equality operator might be different from ours, but it's + * more likely to be right than ignoring the information.) + */ + if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0) + return 1.0 / vardata->rel->tuples; + if (HeapTupleIsValid(vardata->statsTuple)) { Form_pg_statistic stats; @@ -357,6 +366,15 @@ var_eq_non_const(VariableStatData *vardata, Oid operator, { double selec; + /* + * If we matched the var to a unique index, assume there is exactly one + * match regardless of anything else. (This is slightly bogus, since + * the index's equality operator might be different from ours, but it's + * more likely to be right than ignoring the information.) + */ + if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0) + return 1.0 / vardata->rel->tuples; + if (HeapTupleIsValid(vardata->statsTuple)) { Form_pg_statistic stats; @@ -3969,6 +3987,8 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, * atttype, atttypmod: type data to pass to get_attstatsslot(). This is * commonly the same as the exposed type of the variable argument, * but can be different in binary-compatible-type cases. + * isunique: TRUE if we were able to match the var to a unique index, + * implying its values are unique for this query. * * Caller is responsible for doing ReleaseVariableStats() before exiting. */ @@ -4005,6 +4025,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, vardata->rel = find_base_rel(root, var->varno); vardata->atttype = var->vartype; vardata->atttypmod = var->vartypmod; + vardata->isunique = has_unique_index(vardata->rel, var->varattno); rte = root->simple_rte_array[var->varno]; @@ -4121,13 +4142,6 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, if (indexpr_item == NULL) continue; /* no expressions here... */ - /* - * Ignore partial indexes since they probably don't reflect - * whole-relation statistics. Possibly reconsider this later. - */ - if (index->indpred) - continue; - for (pos = 0; pos < index->ncolumns; pos++) { if (index->indexkeys[pos] == 0) @@ -4147,9 +4161,19 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, */ if (index->unique && index->ncolumns == 1 && - index->indpred == NIL) + (index->indpred == NIL || index->predOK)) vardata->isunique = true; - /* Has it got stats? */ + + /* + * Has it got stats? We only consider stats for + * non-partial indexes, since partial indexes + * probably don't reflect whole-relation statistics; + * the above check for uniqueness is the only info + * we take from a partial index. + * + * An index stats hook, however, must make its own + * decisions about what to do with partial indexes. + */ if (get_index_stats_hook && (*get_index_stats_hook) (root, index->indexoid, pos + 1, vardata)) @@ -4163,7 +4187,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, !vardata->freefunc) elog(ERROR, "no function provided to release variable stats with"); } - else + else if (index->indpred == NIL) { vardata->statsTuple = SearchSysCache(STATRELATT, @@ -4254,19 +4278,12 @@ get_variable_numdistinct(VariableStatData *vardata) /* * If there is a unique index for the variable, assume it is unique no - * matter what pg_statistic says (the statistics could be out of date). - * Can skip search if we already think it's unique. + * matter what pg_statistic says; the statistics could be out of date, + * or we might have found a partial unique index that proves the var + * is unique for this query. */ - if (stadistinct != -1.0) - { - if (vardata->isunique) - stadistinct = -1.0; - else if (vardata->var && IsA(vardata->var, Var) && - vardata->rel && - has_unique_index(vardata->rel, - ((Var *) vardata->var)->varattno)) - stadistinct = -1.0; - } + if (vardata->isunique) + stadistinct = -1.0; /* * If we had an absolute estimate, use that. -- 2.11.0