OSDN Git Service

Fix best_inner_indexscan to return both the cheapest-total-cost and
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 22 May 2007 01:40:33 +0000 (01:40 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 22 May 2007 01:40:33 +0000 (01:40 +0000)
cheapest-startup-cost innerjoin indexscans, and make joinpath.c consider
both of these (when different) as the inside of a nestloop join.  The
original design was based on the assumption that indexscan paths always
have negligible startup cost, and so total cost is the only important
figure of merit; an assumption that's obviously broken by bitmap
indexscans.  This oversight could lead to choosing poor plans in cases
where fast-start behavior is more important than total cost, such as
LIMIT and IN queries.  8.1-vintage brain fade exposed by an example from
Chuck D.

src/backend/nodes/outfuncs.c
src/backend/optimizer/path/indxpath.c
src/backend/optimizer/path/joinpath.c
src/include/nodes/relation.h
src/include/optimizer/paths.h

index 58d8ee6..4bd923a 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.306 2007/04/27 22:05:47 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.307 2007/05/22 01:40:33 tgl Exp $
  *
  * NOTES
  *       Every node type that can appear in stored rules' parsetrees *must*
@@ -1440,7 +1440,8 @@ _outInnerIndexscanInfo(StringInfo str, InnerIndexscanInfo *node)
        WRITE_NODE_TYPE("INNERINDEXSCANINFO");
        WRITE_BITMAPSET_FIELD(other_relids);
        WRITE_BOOL_FIELD(isouterjoin);
-       WRITE_NODE_FIELD(best_innerpath);
+       WRITE_NODE_FIELD(cheapest_startup_innerpath);
+       WRITE_NODE_FIELD(cheapest_total_innerpath);
 }
 
 static void
index 268aa92..b9439e6 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.221 2007/04/17 20:03:03 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.222 2007/05/22 01:40:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1599,26 +1599,26 @@ eclass_matches_any_index(EquivalenceClass *ec, EquivalenceMember *em,
 
 /*
  * best_inner_indexscan
- *       Finds the best available inner indexscan for a nestloop join
+ *       Finds the best available inner indexscans for a nestloop join
  *       with the given rel on the inside and the given outer_rel outside.
- *       May return NULL if there are no possible inner indexscans.
  *
- * We ignore ordering considerations (since a nestloop's inner scan's order
- * is uninteresting).  Also, we consider only total cost when deciding which
- * of two possible paths is better --- this assumes that all indexpaths have
- * negligible startup cost.  (True today, but someday we might have to think
- * harder.)  Therefore, there is only one dimension of comparison and so it's
- * sufficient to return a single "best" path.
+ * *cheapest_startup gets the path with least startup cost
+ * *cheapest_total gets the path with least total cost (often the same path)
+ * Both are set to NULL if there are no possible inner indexscans.
+ *
+ * We ignore ordering considerations, since a nestloop's inner scan's order
+ * is uninteresting.  Hence startup cost and total cost are the only figures
+ * of merit to consider.
  *
  * Note: create_index_paths() must have been run previously for this rel,
- * else the result will always be NULL.
+ * else the results will always be NULL.
  */
-Path *
+void
 best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
-                                        RelOptInfo *outer_rel, JoinType jointype)
+                                        RelOptInfo *outer_rel, JoinType jointype,
+                                        Path **cheapest_startup, Path **cheapest_total)
 {
        Relids          outer_relids;
-       Path       *cheapest;
        bool            isouterjoin;
        List       *clause_list;
        List       *indexpaths;
@@ -1627,6 +1627,9 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
        InnerIndexscanInfo *info;
        MemoryContext oldcontext;
 
+       /* Initialize results for failure returns */
+       *cheapest_startup = *cheapest_total = NULL;
+
        /*
         * Nestloop only supports inner, left, and IN joins.
         */
@@ -1641,14 +1644,14 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
                        isouterjoin = true;
                        break;
                default:
-                       return NULL;
+                       return;
        }
 
        /*
         * If there are no indexable joinclauses for this rel, exit quickly.
         */
        if (bms_is_empty(rel->index_outer_relids))
-               return NULL;
+               return;
 
        /*
         * Otherwise, we have to do path selection in the main planning context,
@@ -1668,17 +1671,17 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
        {
                bms_free(outer_relids);
                MemoryContextSwitchTo(oldcontext);
-               return NULL;
+               return;
        }
 
        /*
         * Look to see if we already computed the result for this set of relevant
         * outerrels.  (We include the isouterjoin status in the cache lookup key
         * for safety.  In practice I suspect this is not necessary because it
-        * should always be the same for a given innerrel.)
+        * should always be the same for a given combination of rels.)
         *
         * NOTE: because we cache on outer_relids rather than outer_rel->relids,
-        * we will report the same path and hence path cost for joins with
+        * we will report the same paths and hence path cost for joins with
         * different sets of irrelevant rels on the outside.  Now that cost_index
         * is sensitive to outer_rel->rows, this is not really right.  However the
         * error is probably not large.  Is it worth establishing a separate cache
@@ -1692,7 +1695,9 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
                {
                        bms_free(outer_relids);
                        MemoryContextSwitchTo(oldcontext);
-                       return info->best_innerpath;
+                       *cheapest_startup = info->cheapest_startup_innerpath;
+                       *cheapest_total = info->cheapest_total_innerpath;
+                       return;
                }
        }
 
@@ -1755,28 +1760,32 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
        }
 
        /*
-        * Now choose the cheapest member of indexpaths.
+        * Now choose the cheapest members of indexpaths.
         */
-       cheapest = NULL;
-       foreach(l, indexpaths)
+       if (indexpaths != NIL)
        {
-               Path       *path = (Path *) lfirst(l);
+               *cheapest_startup = *cheapest_total = (Path *) linitial(indexpaths);
+
+               for_each_cell(l, lnext(list_head(indexpaths)))
+               {
+                       Path       *path = (Path *) lfirst(l);
 
-               if (cheapest == NULL ||
-                       compare_path_costs(path, cheapest, TOTAL_COST) < 0)
-                       cheapest = path;
+                       if (compare_path_costs(path, *cheapest_startup, STARTUP_COST) < 0)
+                               *cheapest_startup = path;
+                       if (compare_path_costs(path, *cheapest_total, TOTAL_COST) < 0)
+                               *cheapest_total = path;
+               }
        }
 
-       /* Cache the result --- whether positive or negative */
+       /* Cache the results --- whether positive or negative */
        info = makeNode(InnerIndexscanInfo);
        info->other_relids = outer_relids;
        info->isouterjoin = isouterjoin;
-       info->best_innerpath = cheapest;
+       info->cheapest_startup_innerpath = *cheapest_startup;
+       info->cheapest_total_innerpath = *cheapest_total;
        rel->index_inner_paths = lcons(info, rel->index_inner_paths);
 
        MemoryContextSwitchTo(oldcontext);
-
-       return cheapest;
 }
 
 /*
index 3a68d79..3671d69 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.111 2007/01/20 20:45:39 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.112 2007/05/22 01:40:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -286,10 +286,11 @@ sort_inner_and_outer(PlannerInfo *root,
  *       only outer paths that are already ordered well enough for merging).
  *
  * We always generate a nestloop path for each available outer path.
- * In fact we may generate as many as four: one on the cheapest-total-cost
+ * In fact we may generate as many as five: one on the cheapest-total-cost
  * inner path, one on the same with materialization, one on the
- * cheapest-startup-cost inner path (if different),
- * and one on the best inner-indexscan path (if any).
+ * cheapest-startup-cost inner path (if different), one on the
+ * cheapest-total inner-indexscan path (if any), and one on the
+ * cheapest-startup inner-indexscan path (if different).
  *
  * We also consider mergejoins if mergejoin clauses are available.     We have
  * two ways to generate the inner path for a mergejoin: sort the cheapest
@@ -325,7 +326,8 @@ match_unsorted_outer(PlannerInfo *root,
        Path       *inner_cheapest_startup = innerrel->cheapest_startup_path;
        Path       *inner_cheapest_total = innerrel->cheapest_total_path;
        Path       *matpath = NULL;
-       Path       *bestinnerjoin = NULL;
+       Path       *index_cheapest_startup = NULL;
+       Path       *index_cheapest_total = NULL;
        ListCell   *l;
 
        /*
@@ -383,17 +385,20 @@ match_unsorted_outer(PlannerInfo *root,
                                create_material_path(innerrel, inner_cheapest_total);
 
                /*
-                * Get the best innerjoin indexpath (if any) for this outer rel. It's
-                * the same for all outer paths.
+                * Get the best innerjoin indexpaths (if any) for this outer rel.
+                * They're the same for all outer paths.
                 */
                if (innerrel->reloptkind != RELOPT_JOINREL)
                {
                        if (IsA(inner_cheapest_total, AppendPath))
-                               bestinnerjoin = best_appendrel_indexscan(root, innerrel,
-                                                                                                                outerrel, jointype);
+                               index_cheapest_total = best_appendrel_indexscan(root,
+                                                                                                                               innerrel,
+                                                                                                                               outerrel,
+                                                                                                                               jointype);
                        else if (innerrel->rtekind == RTE_RELATION)
-                               bestinnerjoin = best_inner_indexscan(root, innerrel,
-                                                                                                        outerrel, jointype);
+                               best_inner_indexscan(root, innerrel, outerrel, jointype,
+                                                                        &index_cheapest_startup,
+                                                                        &index_cheapest_total);
                }
        }
 
@@ -435,8 +440,8 @@ match_unsorted_outer(PlannerInfo *root,
                         * Always consider a nestloop join with this outer and
                         * cheapest-total-cost inner.  When appropriate, also consider
                         * using the materialized form of the cheapest inner, the
-                        * cheapest-startup-cost inner path, and the best innerjoin
-                        * indexpath.
+                        * cheapest-startup-cost inner path, and the cheapest innerjoin
+                        * indexpaths.
                         */
                        add_path(joinrel, (Path *)
                                         create_nestloop_path(root,
@@ -464,13 +469,23 @@ match_unsorted_outer(PlannerInfo *root,
                                                                                          inner_cheapest_startup,
                                                                                          restrictlist,
                                                                                          merge_pathkeys));
-                       if (bestinnerjoin != NULL)
+                       if (index_cheapest_total != NULL)
                                add_path(joinrel, (Path *)
                                                 create_nestloop_path(root,
                                                                                          joinrel,
                                                                                          jointype,
                                                                                          outerpath,
-                                                                                         bestinnerjoin,
+                                                                                         index_cheapest_total,
+                                                                                         restrictlist,
+                                                                                         merge_pathkeys));
+                       if (index_cheapest_startup != NULL &&
+                               index_cheapest_startup != index_cheapest_total)
+                               add_path(joinrel, (Path *)
+                                                create_nestloop_path(root,
+                                                                                         joinrel,
+                                                                                         jointype,
+                                                                                         outerpath,
+                                                                                         index_cheapest_startup,
                                                                                          restrictlist,
                                                                                          merge_pathkeys));
                }
@@ -789,6 +804,9 @@ hash_inner_and_outer(PlannerInfo *root,
  *       with the given append relation on the inside and the given outer_rel
  *       outside.      Returns an AppendPath comprising the best inner scans, or
  *       NULL if there are no possible inner indexscans.
+ *
+ * Note that we currently consider only cheapest-total-cost.  It's not
+ * very clear what cheapest-startup-cost might mean for an AppendPath.
  */
 static Path *
 best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
@@ -804,7 +822,8 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
                AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
                int                     childRTindex;
                RelOptInfo *childrel;
-               Path       *bestinnerjoin;
+               Path       *index_cheapest_startup;
+               Path       *index_cheapest_total;
 
                /* append_rel_list contains all append rels; ignore others */
                if (appinfo->parent_relid != parentRTindex)
@@ -824,10 +843,10 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
                        continue;                       /* OK, we can ignore it */
 
                /*
-                * Get the best innerjoin indexpath (if any) for this child rel.
+                * Get the best innerjoin indexpaths (if any) for this child rel.
                 */
-               bestinnerjoin = best_inner_indexscan(root, childrel,
-                                                                                        outer_rel, jointype);
+               best_inner_indexscan(root, childrel, outer_rel, jointype,
+                                                        &index_cheapest_startup, &index_cheapest_total);
 
                /*
                 * If no luck on an indexpath for this rel, we'll still consider an
@@ -835,12 +854,12 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
                 * find at least one indexpath, else there's not going to be any
                 * improvement over the base path for the appendrel.
                 */
-               if (bestinnerjoin)
+               if (index_cheapest_total)
                        found_indexscan = true;
                else
-                       bestinnerjoin = childrel->cheapest_total_path;
+                       index_cheapest_total = childrel->cheapest_total_path;
 
-               append_paths = lappend(append_paths, bestinnerjoin);
+               append_paths = lappend(append_paths, index_cheapest_total);
        }
 
        if (!found_indexscan)
index 57af937..c7225a1 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.141 2007/04/21 21:01:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.142 2007/05/22 01:40:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -996,20 +996,20 @@ typedef struct MergeScanSelCache
  * relation includes all other relids appearing in those joinclauses.
  * The set of usable joinclauses, and thus the best inner indexscan,
  * thus varies depending on which outer relation we consider; so we have
- * to recompute the best such path for every join.     To avoid lots of
+ * to recompute the best such paths for every join.  To avoid lots of
  * redundant computation, we cache the results of such searches.  For
  * each relation we compute the set of possible otherrelids (all relids
  * appearing in joinquals that could become indexquals for this table).
  * Two outer relations whose relids have the same intersection with this
  * set will have the same set of available joinclauses and thus the same
- * best inner indexscan for the inner relation.  By taking the intersection
+ * best inner indexscans for the inner relation.  By taking the intersection
  * before scanning the cache, we avoid recomputing when considering
  * join rels that differ only by the inclusion of irrelevant other rels.
  *
  * The search key also includes a bool showing whether the join being
  * considered is an outer join.  Since we constrain the join order for
  * outer joins, I believe that this bool can only have one possible value
- * for any particular base relation; but store it anyway to avoid confusion.
+ * for any particular lookup key; but store it anyway to avoid confusion.
  */
 
 typedef struct InnerIndexscanInfo
@@ -1018,8 +1018,9 @@ typedef struct InnerIndexscanInfo
        /* The lookup key: */
        Relids          other_relids;   /* a set of relevant other relids */
        bool            isouterjoin;    /* true if join is outer */
-       /* Best path for this lookup key: */
-       Path       *best_innerpath; /* best inner indexscan, or NULL if none */
+       /* Best paths for this lookup key (NULL if no available indexscans): */
+       Path       *cheapest_startup_innerpath; /* cheapest startup cost */
+       Path       *cheapest_total_innerpath;   /* cheapest total cost */
 } InnerIndexscanInfo;
 
 /*
index c7b8d8c..7199a54 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.97 2007/04/15 20:09:28 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.98 2007/05/22 01:40:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -45,8 +45,9 @@ extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
 extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
                                                 List *clauses, List *outer_clauses,
                                                 RelOptInfo *outer_rel);
-extern Path *best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
-                                        RelOptInfo *outer_rel, JoinType jointype);
+extern void best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
+                                        RelOptInfo *outer_rel, JoinType jointype,
+                                        Path **cheapest_startup, Path **cheapest_total);
 extern List *group_clauses_by_indexkey(IndexOptInfo *index,
                                                  List *clauses, List *outer_clauses,
                                                  Relids outer_relids,