1 /*-------------------------------------------------------------------------
4 * Routines to compute clause selectivities
6 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.96 2009/01/01 17:23:43 momjian Exp $
13 *-------------------------------------------------------------------------
17 #include "catalog/pg_operator.h"
18 #include "nodes/makefuncs.h"
19 #include "optimizer/clauses.h"
20 #include "optimizer/cost.h"
21 #include "optimizer/pathnode.h"
22 #include "optimizer/plancat.h"
23 #include "parser/parsetree.h"
24 #include "utils/fmgroids.h"
25 #include "utils/lsyscache.h"
26 #include "utils/selfuncs.h"
30 * Data structure for accumulating info about possible range-query
31 * clause pairs in clauselist_selectivity.
33 typedef struct RangeQueryClause
35 struct RangeQueryClause *next; /* next in linked list */
36 Node *var; /* The common variable of the clauses */
37 bool have_lobound; /* found a low-bound clause yet? */
38 bool have_hibound; /* found a high-bound clause yet? */
39 Selectivity lobound; /* Selectivity of a var > something clause */
40 Selectivity hibound; /* Selectivity of a var < something clause */
43 static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
44 bool varonleft, bool isLTsel, Selectivity s2);
47 /****************************************************************************
48 * ROUTINES TO COMPUTE SELECTIVITIES
49 ****************************************************************************/
52 * clauselist_selectivity -
53 * Compute the selectivity of an implicitly-ANDed list of boolean
54 * expression clauses. The list can be empty, in which case 1.0
55 * must be returned. List elements may be either RestrictInfos
56 * or bare expression clauses --- the former is preferred since
57 * it allows caching of results.
59 * See clause_selectivity() for the meaning of the additional parameters.
61 * Our basic approach is to take the product of the selectivities of the
62 * subclauses. However, that's only right if the subclauses have independent
63 * probabilities, and in reality they are often NOT independent. So,
64 * we want to be smarter where we can.
66 * Currently, the only extra smarts we have is to recognize "range queries",
67 * such as "x > 34 AND x < 42". Clauses are recognized as possible range
68 * query components if they are restriction opclauses whose operators have
69 * scalarltsel() or scalargtsel() as their restriction selectivity estimator.
70 * We pair up clauses of this form that refer to the same variable. An
71 * unpairable clause of this kind is simply multiplied into the selectivity
72 * product in the normal way. But when we find a pair, we know that the
73 * selectivities represent the relative positions of the low and high bounds
74 * within the column's range, so instead of figuring the selectivity as
75 * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this,
76 * see that hisel is the fraction of the range below the high bound, while
77 * losel is the fraction above the low bound; so hisel can be interpreted
78 * directly as a 0..1 value but we need to convert losel to 1-losel before
79 * interpreting it as a value. Then the available range is 1-losel to hisel.
80 * However, this calculation double-excludes nulls, so really we need
81 * hisel + losel + null_frac - 1.)
83 * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
84 * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
85 * yields an impossible (negative) result.
87 * A free side-effect is that we can recognize redundant inequalities such
88 * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
90 * Of course this is all very dependent on the behavior of
91 * scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
94 clauselist_selectivity(PlannerInfo *root,
98 SpecialJoinInfo *sjinfo)
100 Selectivity s1 = 1.0;
101 RangeQueryClause *rqlist = NULL;
105 * If there's exactly one clause, then no use in trying to match up
106 * pairs, so just go directly to clause_selectivity().
108 if (list_length(clauses) == 1)
109 return clause_selectivity(root, (Node *) linitial(clauses),
110 varRelid, jointype, sjinfo);
113 * Initial scan over clauses. Anything that doesn't look like a potential
114 * rangequery clause gets multiplied into s1 and forgotten. Anything that
115 * does gets inserted into an rqlist entry.
119 Node *clause = (Node *) lfirst(l);
123 /* Always compute the selectivity using clause_selectivity */
124 s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
127 * Check for being passed a RestrictInfo.
129 * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
130 * 0.0; just use that rather than looking for range pairs.
132 if (IsA(clause, RestrictInfo))
134 rinfo = (RestrictInfo *) clause;
135 if (rinfo->pseudoconstant)
140 clause = (Node *) rinfo->clause;
146 * See if it looks like a restriction clause with a pseudoconstant on
147 * one side. (Anything more complicated than that might not behave in
148 * the simple way we are expecting.) Most of the tests here can be
149 * done more efficiently with rinfo than without.
151 if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
153 OpExpr *expr = (OpExpr *) clause;
154 bool varonleft = true;
159 ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
160 (is_pseudo_constant_clause_relids(lsecond(expr->args),
161 rinfo->right_relids) ||
163 is_pseudo_constant_clause_relids(linitial(expr->args),
164 rinfo->left_relids)));
168 ok = (NumRelids(clause) == 1) &&
169 (is_pseudo_constant_clause(lsecond(expr->args)) ||
171 is_pseudo_constant_clause(linitial(expr->args))));
177 * If it's not a "<" or ">" operator, just merge the
178 * selectivity in generically. But if it's the right oprrest,
179 * add the clause to rqlist for later processing.
181 switch (get_oprrest(expr->opno))
184 addRangeClause(&rqlist, clause,
185 varonleft, true, s2);
188 addRangeClause(&rqlist, clause,
189 varonleft, false, s2);
192 /* Just merge the selectivity in generically */
196 continue; /* drop to loop bottom */
200 /* Not the right form, so treat it generically. */
205 * Now scan the rangequery pair list.
207 while (rqlist != NULL)
209 RangeQueryClause *rqnext;
211 if (rqlist->have_lobound && rqlist->have_hibound)
213 /* Successfully matched a pair of range clauses */
217 * Exact equality to the default value probably means the
218 * selectivity function punted. This is not airtight but should
221 if (rqlist->hibound == DEFAULT_INEQ_SEL ||
222 rqlist->lobound == DEFAULT_INEQ_SEL)
224 s2 = DEFAULT_RANGE_INEQ_SEL;
228 s2 = rqlist->hibound + rqlist->lobound - 1.0;
230 /* Adjust for double-exclusion of NULLs */
231 s2 += nulltestsel(root, IS_NULL, rqlist->var,
232 varRelid, jointype, sjinfo);
235 * A zero or slightly negative s2 should be converted into a
236 * small positive value; we probably are dealing with a very
237 * tight range and got a bogus result due to roundoff errors.
238 * However, if s2 is very negative, then we probably have
239 * default selectivity estimates on one or both sides of the
240 * range that we failed to recognize above for some reason.
247 * No data available --- use a default estimate that
248 * is small, but not real small.
250 s2 = DEFAULT_RANGE_INEQ_SEL;
255 * It's just roundoff error; use a small positive
262 /* Merge in the selectivity of the pair of clauses */
267 /* Only found one of a pair, merge it in generically */
268 if (rqlist->have_lobound)
269 s1 *= rqlist->lobound;
271 s1 *= rqlist->hibound;
273 /* release storage and advance */
274 rqnext = rqlist->next;
283 * addRangeClause --- add a new range clause for clauselist_selectivity
285 * Here is where we try to match up pairs of range-query clauses
288 addRangeClause(RangeQueryClause **rqlist, Node *clause,
289 bool varonleft, bool isLTsel, Selectivity s2)
291 RangeQueryClause *rqelem;
297 var = get_leftop((Expr *) clause);
298 is_lobound = !isLTsel; /* x < something is high bound */
302 var = get_rightop((Expr *) clause);
303 is_lobound = isLTsel; /* something < x is low bound */
306 for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
309 * We use full equal() here because the "var" might be a function of
310 * one or more attributes of the same relation...
312 if (!equal(var, rqelem->var))
314 /* Found the right group to put this clause in */
317 if (!rqelem->have_lobound)
319 rqelem->have_lobound = true;
320 rqelem->lobound = s2;
326 * We have found two similar clauses, such as
328 * Keep only the more restrictive one.
331 if (rqelem->lobound > s2)
332 rqelem->lobound = s2;
337 if (!rqelem->have_hibound)
339 rqelem->have_hibound = true;
340 rqelem->hibound = s2;
346 * We have found two similar clauses, such as
348 * Keep only the more restrictive one.
351 if (rqelem->hibound > s2)
352 rqelem->hibound = s2;
358 /* No matching var found, so make a new clause-pair data structure */
359 rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
363 rqelem->have_lobound = true;
364 rqelem->have_hibound = false;
365 rqelem->lobound = s2;
369 rqelem->have_lobound = false;
370 rqelem->have_hibound = true;
371 rqelem->hibound = s2;
373 rqelem->next = *rqlist;
378 * bms_is_subset_singleton
380 * Same result as bms_is_subset(s, bms_make_singleton(x)),
381 * but a little faster and doesn't leak memory.
383 * Is this of use anywhere else? If so move to bitmapset.c ...
386 bms_is_subset_singleton(const Bitmapset *s, int x)
388 switch (bms_membership(s))
393 return bms_is_member(x, s);
397 /* can't get here... */
402 * treat_as_join_clause -
403 * Decide whether an operator clause is to be handled by the
404 * restriction or join estimator. Subroutine for clause_selectivity().
407 treat_as_join_clause(Node *clause, RestrictInfo *rinfo,
408 int varRelid, SpecialJoinInfo *sjinfo)
413 * Caller is forcing restriction mode (eg, because we are examining
414 * an inner indexscan qual).
418 else if (sjinfo == NULL)
421 * It must be a restriction clause, since it's being evaluated at
429 * Otherwise, it's a join if there's more than one relation used.
430 * We can optimize this calculation if an rinfo was passed.
432 * XXX Since we know the clause is being evaluated at a join,
433 * the only way it could be single-relation is if it was delayed
434 * by outer joins. Although we can make use of the restriction
435 * qual estimators anyway, it seems likely that we ought to account
436 * for the probability of injected nulls somehow.
439 return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
441 return (NumRelids(clause) > 1);
447 * clause_selectivity -
448 * Compute the selectivity of a general boolean expression clause.
450 * The clause can be either a RestrictInfo or a plain expression. If it's
451 * a RestrictInfo, we try to cache the selectivity for possible re-use,
452 * so passing RestrictInfos is preferred.
454 * varRelid is either 0 or a rangetable index.
456 * When varRelid is not 0, only variables belonging to that relation are
457 * considered in computing selectivity; other vars are treated as constants
458 * of unknown values. This is appropriate for estimating the selectivity of
459 * a join clause that is being used as a restriction clause in a scan of a
460 * nestloop join's inner relation --- varRelid should then be the ID of the
463 * When varRelid is 0, all variables are treated as variables. This
464 * is appropriate for ordinary join clauses and restriction clauses.
466 * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
467 * if the clause isn't a join clause.
469 * sjinfo is NULL for a non-join clause, otherwise it provides additional
470 * context information about the join being performed. There are some
472 * 1. For a special (not INNER) join, sjinfo is always a member of
473 * root->join_info_list.
474 * 2. For an INNER join, sjinfo is just a transient struct, and only the
475 * relids and jointype fields in it can be trusted.
476 * It is possible for jointype to be different from sjinfo->jointype.
477 * This indicates we are considering a variant join: either with
478 * the LHS and RHS switched, or with one input unique-ified.
480 * Note: when passing nonzero varRelid, it's normally appropriate to set
481 * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
482 * join clause; because we aren't treating it as a join clause.
485 clause_selectivity(PlannerInfo *root,
489 SpecialJoinInfo *sjinfo)
491 Selectivity s1 = 0.5; /* default for any unhandled clause type */
492 RestrictInfo *rinfo = NULL;
493 bool cacheable = false;
495 if (clause == NULL) /* can this still happen? */
498 if (IsA(clause, RestrictInfo))
500 rinfo = (RestrictInfo *) clause;
503 * If the clause is marked pseudoconstant, then it will be used as a
504 * gating qual and should not affect selectivity estimates; hence
505 * return 1.0. The only exception is that a constant FALSE may be
506 * taken as having selectivity 0.0, since it will surely mean no rows
507 * out of the plan. This case is simple enough that we need not
508 * bother caching the result.
510 if (rinfo->pseudoconstant)
512 if (!IsA(rinfo->clause, Const))
513 return (Selectivity) 1.0;
517 * If the clause is marked redundant, always return 1.0.
519 if (rinfo->this_selec > 1)
520 return (Selectivity) 1.0;
523 * If possible, cache the result of the selectivity calculation for
524 * the clause. We can cache if varRelid is zero or the clause
525 * contains only vars of that relid --- otherwise varRelid will affect
526 * the result, so mustn't cache.
529 bms_is_subset_singleton(rinfo->clause_relids, varRelid))
531 /* Cacheable --- do we already have the result? */
532 if (rinfo->this_selec >= 0)
533 return rinfo->this_selec;
538 * Proceed with examination of contained clause. If the clause is an
539 * OR-clause, we want to look at the variant with sub-RestrictInfos,
540 * so that per-subclause selectivities can be cached.
543 clause = (Node *) rinfo->orclause;
545 clause = (Node *) rinfo->clause;
548 if (IsA(clause, Var))
550 Var *var = (Var *) clause;
553 * We probably shouldn't ever see an uplevel Var here, but if we do,
554 * return the default selectivity...
556 if (var->varlevelsup == 0 &&
557 (varRelid == 0 || varRelid == (int) var->varno))
560 * A Var at the top of a clause must be a bool Var. This is
561 * equivalent to the clause reln.attribute = 't', so we
562 * compute the selectivity as if that is what we have.
564 s1 = restriction_selectivity(root,
565 BooleanEqualOperator,
572 else if (IsA(clause, Const))
574 /* bool constant is pretty easy... */
575 Const *con = (Const *) clause;
577 s1 = con->constisnull ? 0.0 :
578 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
580 else if (IsA(clause, Param))
582 /* see if we can replace the Param */
583 Node *subst = estimate_expression_value(root, clause);
585 if (IsA(subst, Const))
587 /* bool constant is pretty easy... */
588 Const *con = (Const *) subst;
590 s1 = con->constisnull ? 0.0 :
591 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
595 /* XXX any way to do better than default? */
598 else if (not_clause(clause))
600 /* inverse of the selectivity of the underlying clause */
601 s1 = 1.0 - clause_selectivity(root,
602 (Node *) get_notclausearg((Expr *) clause),
607 else if (and_clause(clause))
609 /* share code with clauselist_selectivity() */
610 s1 = clauselist_selectivity(root,
611 ((BoolExpr *) clause)->args,
616 else if (or_clause(clause))
619 * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to
620 * account for the probable overlap of selected tuple sets.
622 * XXX is this too conservative?
627 foreach(arg, ((BoolExpr *) clause)->args)
629 Selectivity s2 = clause_selectivity(root,
630 (Node *) lfirst(arg),
635 s1 = s1 + s2 - s1 * s2;
638 else if (is_opclause(clause) || IsA(clause, DistinctExpr))
640 Oid opno = ((OpExpr *) clause)->opno;
642 if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
644 /* Estimate selectivity for a join clause. */
645 s1 = join_selectivity(root, opno,
646 ((OpExpr *) clause)->args,
652 /* Estimate selectivity for a restriction clause. */
653 s1 = restriction_selectivity(root, opno,
654 ((OpExpr *) clause)->args,
659 * DistinctExpr has the same representation as OpExpr, but the
660 * contained operator is "=" not "<>", so we must negate the result.
661 * This estimation method doesn't give the right behavior for nulls,
662 * but it's better than doing nothing.
664 if (IsA(clause, DistinctExpr))
667 else if (is_funcclause(clause))
670 * This is not an operator, so we guess at the selectivity. THIS IS A
671 * HACK TO GET V4 OUT THE DOOR. FUNCS SHOULD BE ABLE TO HAVE
672 * SELECTIVITIES THEMSELVES. -- JMH 7/9/92
674 s1 = (Selectivity) 0.3333333;
677 else if (IsA(clause, SubPlan) ||
678 IsA(clause, AlternativeSubPlan))
681 * Just for the moment! FIX ME! - vadim 02/04/98
683 s1 = (Selectivity) 0.5;
686 else if (IsA(clause, ScalarArrayOpExpr))
688 /* Use node specific selectivity calculation function */
689 s1 = scalararraysel(root,
690 (ScalarArrayOpExpr *) clause,
691 treat_as_join_clause(clause, rinfo,
697 else if (IsA(clause, RowCompareExpr))
699 /* Use node specific selectivity calculation function */
700 s1 = rowcomparesel(root,
701 (RowCompareExpr *) clause,
706 else if (IsA(clause, NullTest))
708 /* Use node specific selectivity calculation function */
709 s1 = nulltestsel(root,
710 ((NullTest *) clause)->nulltesttype,
711 (Node *) ((NullTest *) clause)->arg,
716 else if (IsA(clause, BooleanTest))
718 /* Use node specific selectivity calculation function */
719 s1 = booltestsel(root,
720 ((BooleanTest *) clause)->booltesttype,
721 (Node *) ((BooleanTest *) clause)->arg,
726 else if (IsA(clause, CurrentOfExpr))
728 /* CURRENT OF selects at most one row of its table */
729 CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
730 RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
732 if (crel->tuples > 0)
733 s1 = 1.0 / crel->tuples;
735 else if (IsA(clause, RelabelType))
737 /* Not sure this case is needed, but it can't hurt */
738 s1 = clause_selectivity(root,
739 (Node *) ((RelabelType *) clause)->arg,
744 else if (IsA(clause, CoerceToDomain))
746 /* Not sure this case is needed, but it can't hurt */
747 s1 = clause_selectivity(root,
748 (Node *) ((CoerceToDomain *) clause)->arg,
754 /* Cache the result if possible */
756 rinfo->this_selec = s1;
758 #ifdef SELECTIVITY_DEBUG
759 elog(DEBUG4, "clause_selectivity: s1 %f", s1);
760 #endif /* SELECTIVITY_DEBUG */