From: Tom Lane Date: Sun, 23 Jan 2000 02:07:00 +0000 (+0000) Subject: First cut at unifying regular selectivity estimation with indexscan X-Git-Tag: REL9_0_0~24075 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=8449df8a67864b5009fa078cf21884461f4ed4e6;p=pg-rex%2Fsyncrep.git First cut at unifying regular selectivity estimation with indexscan selectivity estimation wasn't right. This is better... --- diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index a25dd68da3..d3a494f9bc 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * clausesel.c - * Routines to compute and set clause selectivities + * Routines to compute clause selectivities * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.27 2000/01/09 00:26:31 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.28 2000/01/23 02:06:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,52 +28,76 @@ ****************************************************************************/ /* - * restrictlist_selec - + * restrictlist_selectivity - * Compute the selectivity of an implicitly-ANDed list of RestrictInfo * clauses. * - * This is the same as clauselist_selec except for the form of the input. + * This is the same as clauselist_selectivity except for the representation + * of the clause list. */ Selectivity -restrictlist_selec(Query *root, List *restrictinfo_list) +restrictlist_selectivity(Query *root, + List *restrictinfo_list, + int varRelid) { List *clauselist = get_actual_clauses(restrictinfo_list); Selectivity result; - result = clauselist_selec(root, clauselist); + result = clauselist_selectivity(root, clauselist, varRelid); freeList(clauselist); return result; } /* - * clauselist_selec - + * clauselist_selectivity - * Compute the selectivity of an implicitly-ANDed list of boolean - * expression clauses. + * expression clauses. The list can be empty, in which case 1.0 + * must be returned. + * + * See clause_selectivity() for the meaning of the varRelid parameter. */ Selectivity -clauselist_selec(Query *root, List *clauses) +clauselist_selectivity(Query *root, + List *clauses, + int varRelid) { Selectivity s1 = 1.0; List *clause; /* Use the product of the selectivities of the subclauses. - * XXX this is probably too optimistic, since the subclauses + * XXX this is too optimistic, since the subclauses * are very likely not independent... */ foreach(clause, clauses) { - Selectivity s2 = compute_clause_selec(root, (Node *) lfirst(clause)); + Selectivity s2 = clause_selectivity(root, + (Node *) lfirst(clause), + varRelid); s1 = s1 * s2; } return s1; } /* - * compute_clause_selec - + * clause_selectivity - * Compute the selectivity of a general boolean expression clause. + * + * varRelid is either 0 or a rangetable index. + * + * When varRelid is not 0, only variables belonging to that relation are + * considered in computing selectivity; other vars are treated as constants + * of unknown values. This is appropriate for estimating the selectivity of + * a join clause that is being used as a restriction clause in a scan of a + * nestloop join's inner relation --- varRelid should then be the ID of the + * inner relation. + * + * When varRelid is 0, all variables are treated as variables. This + * is appropriate for ordinary join clauses and restriction clauses. */ Selectivity -compute_clause_selec(Query *root, Node *clause) +clause_selectivity(Query *root, + Node *clause, + int varRelid) { Selectivity s1 = 1.0; /* default for any unhandled clause type */ @@ -88,13 +112,16 @@ compute_clause_selec(Query *root, Node *clause) * didn't want to have to do system cache look ups to find out all * of that info. */ - s1 = restriction_selectivity(F_EQSEL, - BooleanEqualOperator, - getrelid(((Var *) clause)->varno, - root->rtable), - ((Var *) clause)->varattno, - Int8GetDatum(true), - SEL_CONSTANT | SEL_RIGHT); + Index varno = ((Var *) clause)->varno; + + if (varRelid == 0 || varRelid == varno) + s1 = restriction_selectivity(F_EQSEL, + BooleanEqualOperator, + getrelid(varno, root->rtable), + ((Var *) clause)->varattno, + Int8GetDatum(true), + SEL_CONSTANT | SEL_RIGHT); + /* an outer-relation bool var is taken as always true... */ } else if (IsA(clause, Param)) { @@ -109,12 +136,16 @@ compute_clause_selec(Query *root, Node *clause) else if (not_clause(clause)) { /* inverse of the selectivity of the underlying clause */ - s1 = 1.0 - compute_clause_selec(root, - (Node *) get_notclausearg((Expr *) clause)); + s1 = 1.0 - clause_selectivity(root, + (Node*) get_notclausearg((Expr*) clause), + varRelid); } else if (and_clause(clause)) { - s1 = clauselist_selec(root, ((Expr *) clause)->args); + /* share code with clauselist_selectivity() */ + s1 = clauselist_selectivity(root, + ((Expr *) clause)->args, + varRelid); } else if (or_clause(clause)) { @@ -127,50 +158,37 @@ compute_clause_selec(Query *root, Node *clause) s1 = 0.0; foreach(arg, ((Expr *) clause)->args) { - Selectivity s2 = compute_clause_selec(root, (Node *) lfirst(arg)); + Selectivity s2 = clause_selectivity(root, + (Node *) lfirst(arg), + varRelid); s1 = s1 + s2 - s1 * s2; } } else if (is_opclause(clause)) { - if (NumRelids(clause) == 1) - { - /* The opclause is not a join clause, since there is only one - * relid in the clause. The clause selectivity will be based on - * the operator selectivity and operand values. - */ - Oid opno = ((Oper *) ((Expr *) clause)->oper)->opno; - RegProcedure oprrest = get_oprrest(opno); + Oid opno = ((Oper *) ((Expr *) clause)->oper)->opno; + bool is_join_clause; + if (varRelid != 0) + { /* - * if the oprrest procedure is missing for whatever reason, use a - * selectivity of 0.5 + * If we are considering a nestloop join then all clauses + * are restriction clauses, since we are only interested in + * the one relation. */ - if (!oprrest) - s1 = (Selectivity) 0.5; - else - { - int relidx; - AttrNumber attno; - Datum constval; - int flag; - Oid reloid; - - get_relattval(clause, 0, &relidx, &attno, &constval, &flag); - reloid = relidx ? getrelid(relidx, root->rtable) : InvalidOid; - s1 = restriction_selectivity(oprrest, opno, - reloid, attno, - constval, flag); - } + is_join_clause = false; } else { /* - * The clause must be a join clause. The clause selectivity will - * be based on the relations to be scanned and the attributes they - * are to be joined on. + * Otherwise, it's a join if there's more than one relation used. */ - Oid opno = ((Oper *) ((Expr *) clause)->oper)->opno; + is_join_clause = (NumRelids(clause) > 1); + } + + if (is_join_clause) + { + /* Estimate selectivity for a join clause. */ RegProcedure oprjoin = get_oprjoin(opno); /* @@ -196,6 +214,33 @@ compute_clause_selec(Query *root, Node *clause) reloid2, attno2); } } + else + { + /* Estimate selectivity for a restriction clause. */ + RegProcedure oprrest = get_oprrest(opno); + + /* + * if the oprrest procedure is missing for whatever reason, use a + * selectivity of 0.5 + */ + if (!oprrest) + s1 = (Selectivity) 0.5; + else + { + int relidx; + AttrNumber attno; + Datum constval; + int flag; + Oid reloid; + + get_relattval(clause, varRelid, + &relidx, &attno, &constval, &flag); + reloid = relidx ? getrelid(relidx, root->rtable) : InvalidOid; + s1 = restriction_selectivity(oprrest, opno, + reloid, attno, + constval, flag); + } + } } else if (is_funcclause(clause)) { diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 5c0f54a73e..ca2d586757 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -18,7 +18,7 @@ * Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.48 2000/01/22 23:50:14 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.49 2000/01/23 02:06:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -459,7 +459,10 @@ set_rel_rows_width(Query *root, RelOptInfo *rel) /* Should only be applied to base relations */ Assert(length(rel->relids) == 1); - rel->rows = rel->tuples * restrictlist_selec(root, rel->restrictinfo); + rel->rows = rel->tuples * + restrictlist_selectivity(root, + rel->restrictinfo, + lfirsti(rel->relids)); Assert(rel->rows >= 0); set_rel_width(root, rel); @@ -479,8 +482,10 @@ set_joinrel_rows_width(Query *root, RelOptInfo *rel, temp = joinpath->outerjoinpath->parent->rows * joinpath->innerjoinpath->parent->rows; - /* apply restrictivity */ - temp *= restrictlist_selec(root, joinpath->path.parent->restrictinfo); + /* apply join restrictivity */ + temp *= restrictlist_selectivity(root, + joinpath->path.parent->restrictinfo, + 0); Assert(temp >= 0); rel->rows = temp; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index fd87a89968..da3e74af71 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.79 2000/01/15 02:59:30 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.80 2000/01/23 02:07:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -305,6 +305,7 @@ create_indexscan_node(Query *root, List *scan_clauses) { List *indxqual = best_path->indexqual; + Index baserelid; List *qpqual; List *fixed_indxqual; List *ixid; @@ -314,6 +315,7 @@ create_indexscan_node(Query *root, /* there should be exactly one base rel involved... */ Assert(length(best_path->path.parent->relids) == 1); + baserelid = lfirsti(best_path->path.parent->relids); /* check to see if any of the indices are lossy */ foreach(ixid, best_path->indexid) @@ -382,7 +384,9 @@ create_indexscan_node(Query *root, { /* recompute output row estimate using all available quals */ plan_rows = best_path->path.parent->tuples * - clauselist_selec(root, lcons(indxqual_expr, qpqual)); + clauselist_selectivity(root, + lcons(indxqual_expr, qpqual), + baserelid); } if (lossy) @@ -401,7 +405,9 @@ create_indexscan_node(Query *root, { /* recompute output row estimate using all available quals */ plan_rows = best_path->path.parent->tuples * - clauselist_selec(root, nconc(listCopy(indxqual_list), qpqual)); + clauselist_selectivity(root, + nconc(listCopy(indxqual_list), qpqual), + baserelid); } if (lossy) @@ -417,7 +423,7 @@ create_indexscan_node(Query *root, scan_node = make_indexscan(tlist, qpqual, - lfirsti(best_path->path.parent->relids), + baserelid, best_path->indexid, fixed_indxqual, indxqual); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index f1c458b761..6af241f9a1 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.49 2000/01/22 23:50:20 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.50 2000/01/23 02:06:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -725,7 +725,8 @@ genericcostestimate(Query *root, RelOptInfo *rel, double numIndexPages; /* Estimate the fraction of main-table tuples that will be visited */ - *indexSelectivity = clauselist_selec(root, indexQuals); + *indexSelectivity = clauselist_selectivity(root, indexQuals, + lfirsti(rel->relids)); /* Estimate the number of index tuples that will be visited */ numIndexTuples = *indexSelectivity * index->tuples; diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 9c16fc8fea..fd6daee1b1 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: cost.h,v 1.26 2000/01/22 23:50:26 tgl Exp $ + * $Id: cost.h,v 1.27 2000/01/23 02:06:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -15,15 +15,16 @@ #include "nodes/relation.h" +/* defaults for costsize.c's Cost parameters */ +/* NB: cost-estimation code should use the variables, not the constants! */ +#define CPU_PAGE_WEIGHT 0.033 +#define CPU_INDEX_PAGE_WEIGHT 0.017 + /* defaults for function attributes used for expensive function calculations */ #define BYTE_PCT 100 #define PERBYTE_CPU 0 #define PERCALL_CPU 0 #define OUTIN_RATIO 100 -/* defaults for costsize.c's Cost parameters */ -/* NB: cost-estimation code should use the variables, not the constants! */ -#define CPU_PAGE_WEIGHT 0.033 -#define CPU_INDEX_PAGE_WEIGHT 0.017 /* @@ -61,8 +62,14 @@ extern void set_joinrel_rows_width(Query *root, RelOptInfo *rel, * prototypes for clausesel.c * routines to compute clause selectivities */ -extern Selectivity restrictlist_selec(Query *root, List *restrictinfo_list); -extern Selectivity clauselist_selec(Query *root, List *clauses); -extern Selectivity compute_clause_selec(Query *root, Node *clause); +extern Selectivity restrictlist_selectivity(Query *root, + List *restrictinfo_list, + int varRelid); +extern Selectivity clauselist_selectivity(Query *root, + List *clauses, + int varRelid); +extern Selectivity clause_selectivity(Query *root, + Node *clause, + int varRelid); #endif /* COST_H */