Be more realistic about plans involving Materialize nodes: take their

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 30 Nov 2002 05:21:03 +0000 (05:21 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 30 Nov 2002 05:21:03 +0000 (05:21 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 30 Nov 2002 05:21:03 +0000 (05:21 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 30 Nov 2002 05:21:03 +0000 (05:21 +0000)
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c

index 7ceb7cd..1d7bf67 100644 (file)
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- *     $Id: execAmi.c,v 1.64 2002/06/20 20:29:27 momjian Exp $
+ *     $Id: execAmi.c,v 1.65 2002/11/30 05:21:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -170,14 +170,10 @@ ExecReScan(Plan *node, ExprContext *exprCtxt, Plan *parent)
         }
  }
  
-/* ----------------------------------------------------------------
- *             ExecMarkPos
- *
- *             Marks the current scan position.
+/*
+ * ExecMarkPos
   *
- *             XXX Needs to be extended to include all the node types,
- *             or at least all the ones that can be directly below a mergejoin.
- * ----------------------------------------------------------------
+ * Marks the current scan position.
   */
  void
  ExecMarkPos(Plan *node)
@@ -192,6 +188,10 @@ ExecMarkPos(Plan *node)
                         ExecIndexMarkPos((IndexScan *) node);
                         break;
  
+               case T_TidScan:
+                       ExecTidMarkPos((TidScan *) node);
+                       break;
+
                 case T_FunctionScan:
                         ExecFunctionMarkPos((FunctionScan *) node);
                         break;
@@ -204,10 +204,6 @@ ExecMarkPos(Plan *node)
                         ExecSortMarkPos((Sort *) node);
                         break;
  
-               case T_TidScan:
-                       ExecTidMarkPos((TidScan *) node);
-                       break;
-
                 default:
                         /* don't make hard error unless caller asks to restore... */
                         elog(LOG, "ExecMarkPos: node type %d not supported",
@@ -216,14 +212,10 @@ ExecMarkPos(Plan *node)
         }
  }
  
-/* ----------------------------------------------------------------
- *             ExecRestrPos
- *
- *             restores the scan position previously saved with ExecMarkPos()
+/*
+ * ExecRestrPos
   *
- *             XXX Needs to be extended to include all the node types,
- *             or at least all the ones that can be directly below a mergejoin.
- * ----------------------------------------------------------------
+ * restores the scan position previously saved with ExecMarkPos()
   */
  void
  ExecRestrPos(Plan *node)
@@ -238,6 +230,10 @@ ExecRestrPos(Plan *node)
                         ExecIndexRestrPos((IndexScan *) node);
                         break;
  
+               case T_TidScan:
+                       ExecTidRestrPos((TidScan *) node);
+                       break;
+
                 case T_FunctionScan:
                         ExecFunctionRestrPos((FunctionScan *) node);
                         break;
@@ -256,3 +252,29 @@ ExecRestrPos(Plan *node)
                         break;
         }
  }
+
+/*
+ * ExecSupportsMarkRestore - does a plan type support mark/restore?
+ *
+ * XXX Ideally, all plan node types would support mark/restore, and this
+ * wouldn't be needed.  For now, this had better match the routines above.
+ */
+bool
+ExecSupportsMarkRestore(NodeTag plantype)
+{
+       switch (plantype)
+       {
+               case T_SeqScan:
+               case T_IndexScan:
+               case T_TidScan:
+               case T_FunctionScan:
+               case T_Material:
+               case T_Sort:
+                       return true;
+
+               default:
+                       break;
+       }
+
+       return false;
+}
diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c

index 074a527..b6848d6 100644 (file)
--- a/src/backend/executor/nodeSeqscan.c
+++ b/src/backend/executor/nodeSeqscan.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeSeqscan.c,v 1.37 2002/09/04 20:31:18 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeSeqscan.c,v 1.38 2002/11/30 05:21:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -19,9 +19,8 @@
   *             ExecInitSeqScan                 creates and initializes a seqscan node.
   *             ExecEndSeqScan                  releases any storage allocated.
   *             ExecSeqReScan                   rescans the relation
- *             ExecMarkPos                             marks scan position
- *             ExecRestrPos                    restores scan position
- *
+ *             ExecSeqMarkPos                  marks scan position
+ *             ExecSeqRestrPos                 restores scan position
   */
  #include "postgres.h"
  
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c

index f5e4776..0fb7c6d 100644 (file)
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -8,19 +8,19 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeTidscan.c,v 1.26 2002/09/04 20:31:18 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeTidscan.c,v 1.27 2002/11/30 05:21:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  /*
   * INTERFACE ROUTINES
   *
- *             ExecTidScan             scans a relation using tids
+ *             ExecTidScan                     scans a relation using tids
   *             ExecInitTidScan         creates and initializes state info.
   *             ExecTidReScan           rescans the tid relation.
   *             ExecEndTidScan          releases all storage.
   *             ExecTidMarkPos          marks scan position.
- *
+ *             ExecTidRestrPos         restores scan position.
   */
  #include "postgres.h"
  
@@ -345,7 +345,6 @@ ExecTidMarkPos(TidScan *node)
         tidstate->tss_MarkTidPtr = tidstate->tss_TidPtr;
  }
  
-#ifdef NOT_USED
  /* ----------------------------------------------------------------
   *             ExecTidRestrPos
   *
@@ -363,7 +362,6 @@ ExecTidRestrPos(TidScan *node)
         tidstate = node->tidstate;
         tidstate->tss_TidPtr = tidstate->tss_MarkTidPtr;
  }
-#endif
  
  /* ----------------------------------------------------------------
   *             ExecInitTidScan
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index 7798913..d11b5ed 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.224 2002/11/30 00:08:16 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.225 2002/11/30 05:21:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1143,6 +1143,27 @@ _copyResultPath(ResultPath *from)
  }
  
  /*
+ * _copyMaterialPath
+ */
+static MaterialPath *
+_copyMaterialPath(MaterialPath *from)
+{
+       MaterialPath    *newnode = makeNode(MaterialPath);
+
+       /*
+        * copy node superclass fields
+        */
+       CopyPathFields((Path *) from, (Path *) newnode);
+
+       /*
+        * copy remainder of node
+        */
+       COPY_NODE_FIELD(subpath);
+
+       return newnode;
+}
+
+/*
   * CopyJoinPathFields
   *
   *             This function copies the fields of the JoinPath node.  It is used by
@@ -2739,6 +2760,9 @@ copyObject(void *from)
                 case T_RelOptInfo:
                         retval = _copyRelOptInfo(from);
                         break;
+               case T_IndexOptInfo:
+                       retval = _copyIndexOptInfo(from);
+                       break;
                 case T_Path:
                         retval = _copyPath(from);
                         break;
@@ -2754,6 +2778,9 @@ copyObject(void *from)
                 case T_ResultPath:
                         retval = _copyResultPath(from);
                         break;
+               case T_MaterialPath:
+                       retval = _copyMaterialPath(from);
+                       break;
                 case T_NestPath:
                         retval = _copyNestPath(from);
                         break;
@@ -2772,9 +2799,6 @@ copyObject(void *from)
                 case T_JoinInfo:
                         retval = _copyJoinInfo(from);
                         break;
-               case T_IndexOptInfo:
-                       retval = _copyIndexOptInfo(from);
-                       break;
                 case T_InnerIndexscanInfo:
                         retval = _copyInnerIndexscanInfo(from);
                         break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index 83c2cb4..c5a492d 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -20,7 +20,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.169 2002/11/25 21:29:36 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.170 2002/11/30 05:21:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -457,6 +457,16 @@ _equalResultPath(ResultPath *a, ResultPath *b)
  }
  
  static bool
+_equalMaterialPath(MaterialPath *a, MaterialPath *b)
+{
+       if (!_equalPath((Path *) a, (Path *) b))
+               return false;
+       COMPARE_NODE_FIELD(subpath);
+
+       return true;
+}
+
+static bool
  _equalJoinPath(JoinPath *a, JoinPath *b)
  {
         if (!_equalPath((Path *) a, (Path *) b))
@@ -1704,12 +1714,27 @@ equal(void *a, void *b)
                 case T_RelOptInfo:
                         retval = _equalRelOptInfo(a, b);
                         break;
+               case T_IndexOptInfo:
+                       retval = _equalIndexOptInfo(a, b);
+                       break;
                 case T_Path:
                         retval = _equalPath(a, b);
                         break;
                 case T_IndexPath:
                         retval = _equalIndexPath(a, b);
                         break;
+               case T_TidPath:
+                       retval = _equalTidPath(a, b);
+                       break;
+               case T_AppendPath:
+                       retval = _equalAppendPath(a, b);
+                       break;
+               case T_ResultPath:
+                       retval = _equalResultPath(a, b);
+                       break;
+               case T_MaterialPath:
+                       retval = _equalMaterialPath(a, b);
+                       break;
                 case T_NestPath:
                         retval = _equalNestPath(a, b);
                         break;
@@ -1731,18 +1756,6 @@ equal(void *a, void *b)
                 case T_InnerIndexscanInfo:
                         retval = _equalInnerIndexscanInfo(a, b);
                         break;
-               case T_TidPath:
-                       retval = _equalTidPath(a, b);
-                       break;
-               case T_AppendPath:
-                       retval = _equalAppendPath(a, b);
-                       break;
-               case T_ResultPath:
-                       retval = _equalResultPath(a, b);
-                       break;
-               case T_IndexOptInfo:
-                       retval = _equalIndexOptInfo(a, b);
-                       break;
  
                 case T_List:
                         {
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 528148f..3a07433 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.184 2002/11/30 00:08:16 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.185 2002/11/30 05:21:02 tgl Exp $
   *
   * NOTES
   *       Every node type that can appear in stored rules' parsetrees *must*
@@ -1011,6 +1011,16 @@ _outResultPath(StringInfo str, ResultPath *node)
  }
  
  static void
+_outMaterialPath(StringInfo str, MaterialPath *node)
+{
+       WRITE_NODE_TYPE("MATERIALPATH");
+
+       _outPathInfo(str, (Path *) node);
+
+       WRITE_NODE_FIELD(subpath);
+}
+
+static void
  _outNestPath(StringInfo str, NestPath *node)
  {
         WRITE_NODE_TYPE("NESTPATH");
@@ -1557,6 +1567,9 @@ _outNode(StringInfo str, void *obj)
                         case T_ResultPath:
                                 _outResultPath(str, obj);
                                 break;
+                       case T_MaterialPath:
+                               _outMaterialPath(str, obj);
+                               break;
                         case T_NestPath:
                                 _outNestPath(str, obj);
                                 break;
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README

index 698b831..f4d64eb 100644 (file)
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -259,7 +259,8 @@ RelOptInfo      - a relation or joined relations
    IndexPath     - index scans
    TidPath       - scan by CTID
    AppendPath    - append multiple subpaths together
-  ResultPath    - a Result plan (used for variable-free tlist or qual)
+  ResultPath    - a Result plan node (used for variable-free tlist or qual)
+  MaterialPath  - a Material plan node
    NestPath      - nested-loop joins
    MergePath     - merge joins
    HashPath      - hash joins
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c

index 1076413..c0b3ab4 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.92 2002/11/13 00:39:47 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.93 2002/11/30 05:21:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -724,33 +724,34 @@ static void
  print_path(Query *root, Path *path, int indent)
  {
         const char *ptype;
-       bool            join;
+       bool            join = false;
+       Path       *subpath = NULL;
         int                     i;
  
         switch (nodeTag(path))
         {
                 case T_Path:
                         ptype = "SeqScan";
-                       join = false;
                         break;
                 case T_IndexPath:
                         ptype = "IdxScan";
-                       join = false;
                         break;
                 case T_TidPath:
                         ptype = "TidScan";
-                       join = false;
                         break;
                 case T_AppendPath:
                         ptype = "Append";
-                       join = false;
                         break;
                 case T_ResultPath:
                         ptype = "Result";
-                       join = false;
+                       subpath = ((ResultPath *) path)->subpath;
+                       break;
+               case T_MaterialPath:
+                       ptype = "Material";
+                       subpath = ((MaterialPath *) path)->subpath;
                         break;
                 case T_NestPath:
-                       ptype = "Nestloop";
+                       ptype = "NestLoop";
                         join = true;
                         break;
                 case T_MergePath:
@@ -763,7 +764,6 @@ print_path(Query *root, Path *path, int indent)
                         break;
                 default:
                         ptype = "???Path";
-                       join = false;
                         break;
         }
  
@@ -814,6 +814,9 @@ print_path(Query *root, Path *path, int indent)
                 print_path(root, jp->outerjoinpath, indent + 1);
                 print_path(root, jp->innerjoinpath, indent + 1);
         }
+
+       if (subpath)
+               print_path(root, subpath, indent + 1);
  }
  
  void
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index fbdeea4..1db310f 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.92 2002/11/30 00:08:16 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.93 2002/11/30 05:21:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -230,7 +230,7 @@ cost_index(Path *path, Query *root,
         Assert(length(baserel->relids) == 1);
         Assert(baserel->rtekind == RTE_RELATION);
  
-       if (!enable_indexscan && !is_injoin)
+       if (!enable_indexscan)
                 startup_cost += disable_cost;
  
         /*
@@ -514,6 +514,43 @@ cost_sort(Path *path, Query *root,
  }
  
  /*
+ * cost_material
+ *       Determines and returns the cost of materializing a relation, including
+ *       the cost of reading the input data.
+ *
+ * If the total volume of data to materialize exceeds SortMem, we will need
+ * to write it to disk, so the cost is much higher in that case.
+ */
+void
+cost_material(Path *path,
+                         Cost input_cost, double tuples, int width)
+{
+       Cost            startup_cost = input_cost;
+       Cost            run_cost = 0;
+       double          nbytes = relation_byte_size(tuples, width);
+       long            sortmembytes = SortMem * 1024L;
+
+       /* disk costs */
+       if (nbytes > sortmembytes)
+       {
+               double          npages = ceil(nbytes / BLCKSZ);
+
+               /* We'll write during startup and read during retrieval */
+               startup_cost += npages;
+               run_cost += npages;
+       }
+
+       /*
+        * Also charge a small amount per extracted tuple.  We use cpu_tuple_cost
+        * so that it doesn't appear worthwhile to materialize a bare seqscan.
+        */
+       run_cost += cpu_tuple_cost * tuples;
+
+       path->startup_cost = startup_cost;
+       path->total_cost = startup_cost + run_cost;
+}
+
+/*
   * cost_agg
   *             Determines and returns the cost of performing an Agg plan node,
   *             including the cost of its input.
@@ -630,19 +667,17 @@ cost_nestloop(Path *path, Query *root,
          * before we can start returning tuples, so the join's startup cost is
          * their sum.  What's not so clear is whether the inner path's
          * startup_cost must be paid again on each rescan of the inner path.
-        * This is not true if the inner path is materialized, but probably is
-        * true otherwise.      Since we don't yet have clean handling of the
-        * decision whether to materialize a path, we can't tell here which
-        * will happen.  As a compromise, charge 50% of the inner startup cost
-        * for each restart.
+        * This is not true if the inner path is materialized or is a hashjoin,
+        * but probably is true otherwise.
          */
         startup_cost += outer_path->startup_cost + inner_path->startup_cost;
         run_cost += outer_path->total_cost - outer_path->startup_cost;
         run_cost += outer_path->parent->rows *
                 (inner_path->total_cost - inner_path->startup_cost);
-       if (outer_path->parent->rows > 1)
-               run_cost += (outer_path->parent->rows - 1) *
-                       inner_path->startup_cost * 0.5;
+       if (!(IsA(inner_path, MaterialPath) ||
+                 IsA(inner_path, HashPath)) &&
+               outer_path->parent->rows > 1)
+               run_cost += (outer_path->parent->rows - 1) * inner_path->startup_cost;
  
         /*
          * Number of tuples processed (not number emitted!).  If inner path is
@@ -1544,7 +1579,7 @@ set_rel_width(Query *root, RelOptInfo *rel)
  static double
  relation_byte_size(double tuples, int width)
  {
-       return tuples * ((double) MAXALIGN(width + sizeof(HeapTupleData)));
+       return tuples * (MAXALIGN(width) + MAXALIGN(sizeof(HeapTupleData)));
  }
  
  /*
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index 6069a34..65d0d8f 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.73 2002/11/30 00:08:16 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.74 2002/11/30 05:21:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -281,8 +281,9 @@ sort_inner_and_outer(Query *root,
   *       only outer paths that are already ordered well enough for merging).
   *
   * We always generate a nestloop path for each available outer path.
- * In fact we may generate as many as three: one on the cheapest-total-cost
- * inner path, one on the cheapest-startup-cost inner path (if different),
+ * In fact we may generate as many as four: one on the cheapest-total-cost
+ * inner path, one on the same with materialization, one on the
+ * cheapest-startup-cost inner path (if different), 
   * and one on the best inner-indexscan path (if any).
   *
   * We also consider mergejoins if mergejoin clauses are available.     We have
@@ -315,7 +316,8 @@ match_unsorted_outer(Query *root,
  {
         bool            nestjoinOK;
         bool            useallclauses;
-       Path       *bestinnerjoin;
+       Path       *matpath = NULL;
+       Path       *bestinnerjoin = NULL;
         List       *i;
  
         /*
@@ -345,12 +347,26 @@ match_unsorted_outer(Query *root,
                         break;
         }
  
-       /*
-        * Get the best innerjoin indexpath (if any) for this outer rel. It's
-        * the same for all outer paths.
-        */
-       bestinnerjoin = best_inner_indexscan(root, innerrel,
-                                                                                outerrel->relids, jointype);
+       if (nestjoinOK)
+       {
+               /*
+                * If the cheapest inner path is a join or seqscan, we should consider
+                * materializing it.  (This is a heuristic: we could consider it
+                * always, but for inner indexscans it's probably a waste of time.)
+                */
+               if (!(IsA(innerrel->cheapest_total_path, IndexPath) ||
+                         IsA(innerrel->cheapest_total_path, TidPath)))
+                       matpath = (Path *)
+                               create_material_path(innerrel, 
+                                                                        innerrel->cheapest_total_path);
+
+               /*
+                * Get the best innerjoin indexpath (if any) for this outer rel. It's
+                * the same for all outer paths.
+                */
+               bestinnerjoin = best_inner_indexscan(root, innerrel,
+                                                                                        outerrel->relids, jointype);
+       }
  
         foreach(i, outerrel->pathlist)
         {
@@ -376,8 +392,9 @@ match_unsorted_outer(Query *root,
                 {
                         /*
                          * Always consider a nestloop join with this outer and
-                        * cheapest-total-cost inner.   Consider nestloops using the
-                        * cheapest-startup-cost inner as well, and the best innerjoin
+                        * cheapest-total-cost inner.  When appropriate, also consider
+                        * using the materialized form of the cheapest inner, the
+                        * cheapest-startup-cost inner path, and the best innerjoin
                          * indexpath.
                          */
                         add_path(joinrel, (Path *)
@@ -388,6 +405,15 @@ match_unsorted_outer(Query *root,
                                                                                   innerrel->cheapest_total_path,
                                                                                   restrictlist,
                                                                                   merge_pathkeys));
+                       if (matpath != NULL)
+                               add_path(joinrel, (Path *)
+                                                create_nestloop_path(root,
+                                                                                         joinrel,
+                                                                                         jointype,
+                                                                                         outerpath,
+                                                                                         matpath,
+                                                                                         restrictlist,
+                                                                                         merge_pathkeys));
                         if (innerrel->cheapest_startup_path !=
                                 innerrel->cheapest_total_path)
                                 add_path(joinrel, (Path *)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index d43e327..148bd86 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.125 2002/11/30 00:08:17 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.126 2002/11/30 05:21:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -35,6 +35,7 @@ static Scan *create_scan_plan(Query *root, Path *best_path);
  static Join *create_join_plan(Query *root, JoinPath *best_path);
  static Append *create_append_plan(Query *root, AppendPath *best_path);
  static Result *create_result_plan(Query *root, ResultPath *best_path);
+static Material *create_material_plan(Query *root, MaterialPath *best_path);
  static SeqScan *create_seqscan_plan(Path *best_path, List *tlist,
                                         List *scan_clauses);
  static IndexScan *create_indexscan_plan(Query *root, IndexPath *best_path,
@@ -141,6 +142,10 @@ create_plan(Query *root, Path *best_path)
                         plan = (Plan *) create_result_plan(root,
                                                                                            (ResultPath *) best_path);
                         break;
+               case T_Material:
+                       plan = (Plan *) create_material_plan(root,
+                                                                                                (MaterialPath *) best_path);
+                       break;
                 default:
                         elog(ERROR, "create_plan: unknown pathtype %d",
                                  best_path->pathtype);
@@ -383,6 +388,28 @@ create_result_plan(Query *root, ResultPath *best_path)
         return plan;
  }
  
+/*
+ * create_material_plan
+ *       Create a Material plan for 'best_path' and (recursively) plans
+ *       for its subpaths.
+ *
+ *       Returns a Plan node.
+ */
+static Material *
+create_material_plan(Query *root, MaterialPath *best_path)
+{
+       Material   *plan;
+       Plan       *subplan;
+
+       subplan = create_plan(root, best_path->subpath);
+
+       plan = make_material(best_path->path.parent->targetlist, subplan);
+
+       copy_path_costsize(&plan->plan, (Path *) best_path);
+
+       return plan;
+}
+
  
  /*****************************************************************************
   *
@@ -739,18 +766,6 @@ create_nestloop_plan(Query *root,
                                                                                          inner_tlist,
                                                                                          innerscan->scan.scanrelid);
         }
-       else if (IsA_Join(inner_plan))
-       {
-               /*
-                * Materialize the inner join for speed reasons.
-                *
-                * XXX It is probably *not* always fastest to materialize an inner
-                * join --- how can we estimate whether this is a good thing to
-                * do?
-                */
-               inner_plan = (Plan *) make_material(inner_tlist,
-                                                                                       inner_plan);
-       }
  
         /*
          * Set quals to contain INNER/OUTER var references.
@@ -843,44 +858,6 @@ create_mergejoin_plan(Query *root,
                                                                         best_path->innersortkeys);
  
         /*
-        * The executor requires the inner side of a mergejoin to support
-        * "mark" and "restore" operations.  Not all plan types do, so we must
-        * be careful not to generate an invalid plan.  If necessary, an
-        * invalid inner plan can be handled by inserting a Materialize node.
-        *
-        * Since the inner side must be ordered, and only Sorts and IndexScans
-        * can create order to begin with, you might think there's no problem
-        * --- but you'd be wrong.  Nestloop and merge joins can *preserve*
-        * the order of their inputs, so they can be selected as the input of
-        * a mergejoin, and that won't work in the present executor.
-        *
-        * Doing this here is a bit of a kluge since the cost of the Materialize
-        * wasn't taken into account in our earlier decisions.  But
-        * Materialize is hard to estimate a cost for, and the above
-        * consideration shows that this is a rare case anyway, so this seems
-        * an acceptable way to proceed.
-        *
-        * This check must agree with ExecMarkPos/ExecRestrPos in
-        * executor/execAmi.c!
-        */
-       switch (nodeTag(inner_plan))
-       {
-               case T_SeqScan:
-               case T_IndexScan:
-               case T_FunctionScan:
-               case T_Material:
-               case T_Sort:
-                       /* OK, these inner plans support mark/restore */
-                       break;
-
-               default:
-                       /* Ooops, need to materialize the inner plan */
-                       inner_plan = (Plan *) make_material(inner_tlist,
-                                                                                               inner_plan);
-                       break;
-       }
-
-       /*
          * Now we can build the mergejoin node.
          */
         join_plan = make_mergejoin(tlist,
@@ -1668,15 +1645,7 @@ make_material(List *tlist, Plan *lefttree)
         Material   *node = makeNode(Material);
         Plan       *plan = &node->plan;
  
-       copy_plan_costsize(plan, lefttree);
-
-       /*
-        * For plausibility, make startup & total costs equal total cost of
-        * input plan; this only affects EXPLAIN display not decisions.
-        *
-        * XXX shouldn't we charge some additional cost for materialization?
-        */
-       plan->startup_cost = plan->total_cost;
+       /* cost should be inserted by caller */
         plan->state = (EState *) NULL;
         plan->targetlist = tlist;
         plan->qual = NIL;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c

index 61476a6..e4bbd29 100644 (file)
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.57 2002/11/30 00:08:18 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.58 2002/11/30 05:21:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -328,9 +328,17 @@ make_subplan(SubLink *slink)
                         if (use_material)
                         {
                                 Plan       *matplan;
+                               Path            matpath; /* dummy for result of cost_material */
  
                                 matplan = (Plan *) make_material(plan->targetlist, plan);
-                               /* kluge --- see comments above */
+                               /* need to calculate costs */
+                               cost_material(&matpath,
+                                                         plan->total_cost,
+                                                         plan->plan_rows,
+                                                         plan->plan_width);
+                               matplan->startup_cost = matpath.startup_cost;
+                               matplan->total_cost = matpath.total_cost;
+                               /* parameter kluge --- see comments above */
                                 matplan->extParam = listCopy(plan->extParam);
                                 matplan->locParam = listCopy(plan->locParam);
                                 node->plan = plan = matplan;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index 9822735..84896b9 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.81 2002/11/30 00:08:20 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.82 2002/11/30 05:21:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -16,6 +16,7 @@
  
  #include <math.h>
  
+#include "executor/executor.h"
  #include "nodes/plannodes.h"
  #include "optimizer/cost.h"
  #include "optimizer/pathnode.h"
@@ -450,6 +451,7 @@ create_result_path(RelOptInfo *rel, Path *subpath, List *constantqual)
         pathnode->subpath = subpath;
         pathnode->constantqual = constantqual;
  
+       /* Ideally should define cost_result(), but I'm too lazy */
         if (subpath)
         {
                 pathnode->path.startup_cost = subpath->startup_cost;
@@ -465,6 +467,31 @@ create_result_path(RelOptInfo *rel, Path *subpath, List *constantqual)
  }
  
  /*
+ * create_material_path
+ *       Creates a path corresponding to a Material plan, returning the
+ *       pathnode.
+ */
+MaterialPath *
+create_material_path(RelOptInfo *rel, Path *subpath)
+{
+       MaterialPath *pathnode = makeNode(MaterialPath);
+
+       pathnode->path.pathtype = T_Material;
+       pathnode->path.parent = rel;
+
+       pathnode->path.pathkeys = subpath->pathkeys;
+
+       pathnode->subpath = subpath;
+
+       cost_material(&pathnode->path,
+                                 subpath->total_cost,
+                                 rel->rows,
+                                 rel->width);
+
+       return pathnode;
+}
+
+/*
   * create_subqueryscan_path
   *       Creates a path corresponding to a sequential scan of a subquery,
   *       returning the pathnode.
@@ -583,6 +610,21 @@ create_mergejoin_path(Query *root,
         if (innersortkeys &&
                 pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
                 innersortkeys = NIL;
+       /*
+        * If we are not sorting the inner path, we may need a materialize
+        * node to ensure it can be marked/restored.  (Sort does support
+        * mark/restore, so no materialize is needed in that case.)
+        *
+        * Since the inner side must be ordered, and only Sorts and IndexScans
+        * can create order to begin with, you might think there's no problem
+        * --- but you'd be wrong.  Nestloop and merge joins can *preserve*
+        * the order of their inputs, so they can be selected as the input of
+        * a mergejoin, and they don't support mark/restore at present.
+        */
+       if (innersortkeys == NIL &&
+               !ExecSupportsMarkRestore(inner_path->pathtype))
+               inner_path = (Path *)
+                       create_material_path(inner_path->parent, inner_path);
  
         pathnode->jpath.path.pathtype = T_MergeJoin;
         pathnode->jpath.path.parent = joinrel;
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h

index a12c31b..312c56f 100644 (file)
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: executor.h,v 1.78 2002/09/04 20:31:42 momjian Exp $
+ * $Id: executor.h,v 1.79 2002/11/30 05:21:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -33,6 +33,7 @@
  extern void ExecReScan(Plan *node, ExprContext *exprCtxt, Plan *parent);
  extern void ExecMarkPos(Plan *node);
  extern void ExecRestrPos(Plan *node);
+extern bool ExecSupportsMarkRestore(NodeTag plantype);
  
  /*
   * prototypes from functions in execJunk.c
diff --git a/src/include/executor/nodeTidscan.h b/src/include/executor/nodeTidscan.h

index b164100..72e6c3e 100644 (file)
--- a/src/include/executor/nodeTidscan.h
+++ b/src/include/executor/nodeTidscan.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: nodeTidscan.h,v 1.9 2002/06/20 20:29:49 momjian Exp $
+ * $Id: nodeTidscan.h,v 1.10 2002/11/30 05:21:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -20,6 +20,7 @@ extern TupleTableSlot *ExecTidScan(TidScan *node);
  extern void ExecTidReScan(TidScan *node, ExprContext *exprCtxt, Plan *parent);
  extern void ExecEndTidScan(TidScan *node);
  extern void ExecTidMarkPos(TidScan *node);
+extern void ExecTidRestrPos(TidScan *node);
  extern bool ExecInitTidScan(TidScan *node, EState *estate, Plan *parent);
  extern int     ExecCountSlotsTidScan(TidScan *node);
  extern void ExecTidReScan(TidScan *node, ExprContext *exprCtxt, Plan *parent);
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h

index d2b984d..a9cd095 100644 (file)
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: nodes.h,v 1.124 2002/11/24 21:52:14 tgl Exp $
+ * $Id: nodes.h,v 1.125 2002/11/30 05:21:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -75,6 +75,7 @@ typedef enum NodeTag
          * TAGS FOR PLANNER NODES (relation.h)
          */
         T_RelOptInfo = 200,
+       T_IndexOptInfo,
         T_Path,
         T_IndexPath,
         T_NestPath,
@@ -83,10 +84,10 @@ typedef enum NodeTag
         T_TidPath,
         T_AppendPath,
         T_ResultPath,
+       T_MaterialPath,
         T_PathKeyItem,
         T_RestrictInfo,
         T_JoinInfo,
-       T_IndexOptInfo,
         T_InnerIndexscanInfo,
  
         /*
@@ -289,17 +290,6 @@ extern Node *newNodeMacroHolder;
  #define IsA(nodeptr,_type_)            (nodeTag(nodeptr) == T_##_type_)
  
  /* ----------------------------------------------------------------
- *                                       IsA functions (no inheritance any more)
- * ----------------------------------------------------------------
- */
-#define IsA_JoinPath(jp) \
-       (IsA(jp, NestPath) || IsA(jp, MergePath) || IsA(jp, HashPath))
-
-#define IsA_Join(jp) \
-       (IsA(jp, Join) || IsA(jp, NestLoop) || \
-        IsA(jp, MergeJoin) || IsA(jp, HashJoin))
-
-/* ----------------------------------------------------------------
   *                                       extern declarations follow
   * ----------------------------------------------------------------
   */
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index 4c06224..5f6ad68 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: relation.h,v 1.71 2002/11/30 00:08:22 tgl Exp $
+ * $Id: relation.h,v 1.72 2002/11/30 05:21:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -406,6 +406,18 @@ typedef struct ResultPath
  } ResultPath;
  
  /*
+ * MaterialPath represents use of a Material plan node, i.e., caching of
+ * the output of its subpath.  This is used when the subpath is expensive
+ * and needs to be scanned repeatedly, or when we need mark/restore ability
+ * and the subpath doesn't have it.
+ */
+typedef struct MaterialPath
+{
+       Path            path;
+       Path       *subpath;
+} MaterialPath;
+
+/*
   * All join-type paths share these fields.
   */
  
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index fd9a3c1..5bb5092 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: cost.h,v 1.48 2002/11/21 00:42:19 tgl Exp $
+ * $Id: cost.h,v 1.49 2002/11/30 05:21:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -60,6 +60,8 @@ extern void cost_functionscan(Path *path, Query *root,
                                   RelOptInfo *baserel);
  extern void cost_sort(Path *path, Query *root,
                   List *pathkeys, Cost input_cost, double tuples, int width);
+extern void cost_material(Path *path,
+                                                 Cost input_cost, double tuples, int width);
  extern void cost_agg(Path *path, Query *root,
                                          AggStrategy aggstrategy, int numAggs,
                                          int numGroupCols, double numGroups,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index 61c433e..65abeb0 100644 (file)
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: pathnode.h,v 1.45 2002/11/06 00:00:45 tgl Exp $
+ * $Id: pathnode.h,v 1.46 2002/11/30 05:21:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -37,6 +37,7 @@ extern TidPath *create_tidscan_path(Query *root, RelOptInfo *rel,
  extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths);
  extern ResultPath *create_result_path(RelOptInfo *rel, Path *subpath,
                                                                           List *constantqual);
+extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
  extern Path *create_subqueryscan_path(RelOptInfo *rel);
  extern Path *create_functionscan_path(Query *root, RelOptInfo *rel);
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 30 Nov 2002 05:21:03 +0000 (05:21 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 30 Nov 2002 05:21:03 +0000 (05:21 +0000)
src/backend/executor/execAmi.c		patch \| blob \| history
src/backend/executor/nodeSeqscan.c		patch \| blob \| history
src/backend/executor/nodeTidscan.c		patch \| blob \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| history
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/optimizer/README		patch \| blob \| history
src/backend/optimizer/path/allpaths.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/path/joinpath.c		patch \| blob \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| history
src/backend/optimizer/plan/subselect.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/include/executor/executor.h		patch \| blob \| history
src/include/executor/nodeTidscan.h		patch \| blob \| history
src/include/nodes/nodes.h		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history
src/include/optimizer/cost.h		patch \| blob \| history
src/include/optimizer/pathnode.h		patch \| blob \| history