OSDN Git Service

Extend CTE patch to support recursive UNION (ie, without ALL). The
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 7 Oct 2008 19:27:04 +0000 (19:27 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 7 Oct 2008 19:27:04 +0000 (19:27 +0000)
implementation uses an in-memory hash table, so it will poop out for very
large recursive results ... but the performance characteristics of a
sort-based implementation would be pretty unpleasant too.

13 files changed:
doc/src/sgml/queries.sgml
doc/src/sgml/ref/select.sgml
src/backend/executor/nodeRecursiveunion.c
src/backend/nodes/copyfuncs.c
src/backend/nodes/outfuncs.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/prep/prepunion.c
src/backend/parser/parse_cte.c
src/include/nodes/execnodes.h
src/include/nodes/plannodes.h
src/include/optimizer/planmain.h
src/test/regress/expected/with.out
src/test/regress/sql/with.sql

index b3d72ce..c2e3807 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/queries.sgml,v 1.46 2008/10/04 21:56:52 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/queries.sgml,v 1.47 2008/10/07 19:27:03 tgl Exp $ -->
 
 <chapter id="queries">
  <title>Queries</title>
@@ -1519,7 +1519,8 @@ SELECT sum(n) FROM t;
 </programlisting>
 
    The general form of a recursive <literal>WITH</> query is always a
-   <firstterm>non-recursive term</>, then <literal>UNION ALL</>, then a
+   <firstterm>non-recursive term</>, then <literal>UNION</> (or
+   <literal>UNION ALL</>), then a
    <firstterm>recursive term</>, where only the recursive term can contain
    a reference to the query's own output.  Such a query is executed as
    follows:
@@ -1530,9 +1531,10 @@ SELECT sum(n) FROM t;
 
    <step performance="required">
     <para>
-     Evaluate the non-recursive term.  Include all its output rows in the
-     result of the recursive query, and also place them in a temporary
-     <firstterm>working table</>.
+     Evaluate the non-recursive term.  For <literal>UNION</> (but not
+     <literal>UNION ALL</>), discard duplicate rows.  Include all remaining
+     rows in the result of the recursive query, and also place them in a
+     temporary <firstterm>working table</>.
     </para>
    </step>
 
@@ -1544,9 +1546,11 @@ SELECT sum(n) FROM t;
      <step performance="required">
       <para>
        Evaluate the recursive term, substituting the current contents of
-       the working table for the recursive self-reference.  Include all its
-       output rows in the result of the recursive query, and also place them
-       in a temporary <firstterm>intermediate table</>.
+       the working table for the recursive self-reference.
+       For <literal>UNION</> (but not <literal>UNION ALL</>), discard
+       duplicate rows and rows that duplicate any previous result row.
+       Include all remaining rows in the result of the recursive query, and
+       also place them in a temporary <firstterm>intermediate table</>.
       </para>
      </step>
 
@@ -1598,10 +1602,13 @@ GROUP BY sub_part
   <para>
    When working with recursive queries it is important to be sure that
    the recursive part of the query will eventually return no tuples,
-   or else the query will loop indefinitely.  A useful trick for
-   development purposes is to place a <literal>LIMIT</> in the parent
-   query.  For example, this query would loop forever without the
-   <literal>LIMIT</>:
+   or else the query will loop indefinitely.  Sometimes, using
+   <literal>UNION</> instead of <literal>UNION ALL</> can accomplish this
+   by discarding rows that duplicate previous output rows; this catches
+   cycles that would otherwise repeat.  A useful trick for testing queries
+   when you are not certain if they might loop is to place a <literal>LIMIT</>
+   in the parent query.  For example, this query would loop forever without
+   the <literal>LIMIT</>:
 
 <programlisting>
 WITH RECURSIVE t(n) AS (
@@ -1614,7 +1621,7 @@ SELECT n FROM t LIMIT 100;
 
    This works because <productname>PostgreSQL</productname>'s implementation
    evaluates only as many rows of a <literal>WITH</> query as are actually
-   demanded by the parent query.  Using this trick in production is not
+   fetched by the parent query.  Using this trick in production is not
    recommended, because other systems might work differently.
   </para>
 
index e72d9c1..f73ca6e 100644 (file)
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.105 2008/10/04 21:56:52 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.106 2008/10/07 19:27:04 tgl Exp $
 PostgreSQL documentation
 -->
 
@@ -202,10 +202,10 @@ and <replaceable class="parameter">with_query</replaceable> is:
     subquery to reference itself by name.  Such a subquery must have
     the form
 <synopsis>
-<replaceable class="parameter">non_recursive_term</replaceable> UNION ALL <replaceable class="parameter">recursive_term</replaceable>
+<replaceable class="parameter">non_recursive_term</replaceable> UNION [ ALL ] <replaceable class="parameter">recursive_term</replaceable>
 </synopsis>
     where the recursive self-reference must appear on the right-hand
-    side of <literal>UNION ALL</>.  Only one recursive self-reference
+    side of the <literal>UNION</>.  Only one recursive self-reference
     is permitted per query.
    </para>
 
@@ -1234,7 +1234,7 @@ SELECT distance, employee_name FROM employee_recursive;
 </programlisting>
 
    Notice the typical form of recursive queries:
-   an initial condition, followed by <literal>UNION ALL</literal>,
+   an initial condition, followed by <literal>UNION</literal>,
    followed by the recursive part of the query. Be sure that the
    recursive part of the query will eventually return no tuples, or
    else the query will loop indefinitely.  (See <xref linkend="queries-with">
index 7136a62..81ac703 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/executor/nodeRecursiveunion.c,v 1.1 2008/10/04 21:56:53 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/executor/nodeRecursiveunion.c,v 1.2 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "executor/execdebug.h"
 #include "executor/nodeRecursiveunion.h"
 #include "miscadmin.h"
+#include "utils/memutils.h"
+
+
+/*
+ * To implement UNION (without ALL), we need a hashtable that stores tuples
+ * already seen.  The hash key is computed from the grouping columns.
+ */
+typedef struct RUHashEntryData *RUHashEntry;
+
+typedef struct RUHashEntryData
+{
+       TupleHashEntryData shared;      /* common header for hash table entries */
+} RUHashEntryData;
+
+
+/*
+ * Initialize the hash table to empty.
+ */
+static void
+build_hash_table(RecursiveUnionState *rustate)
+{
+       RecursiveUnion     *node = (RecursiveUnion *) rustate->ps.plan;
+
+       Assert(node->numCols > 0);
+       Assert(node->numGroups > 0);
+
+       rustate->hashtable = BuildTupleHashTable(node->numCols,
+                                                                                        node->dupColIdx,
+                                                                                        rustate->eqfunctions,
+                                                                                        rustate->hashfunctions,
+                                                                                        node->numGroups,
+                                                                                        sizeof(RUHashEntryData),
+                                                                                        rustate->tableContext,
+                                                                                        rustate->tempContext);
+}
 
 
 /* ----------------------------------------------------------------
@@ -44,49 +79,85 @@ ExecRecursiveUnion(RecursiveUnionState *node)
        PlanState  *innerPlan = innerPlanState(node);
        RecursiveUnion *plan = (RecursiveUnion *) node->ps.plan;
        TupleTableSlot *slot;
+       RUHashEntry entry;
+       bool            isnew;
 
        /* 1. Evaluate non-recursive term */
        if (!node->recursing)
        {
-               slot = ExecProcNode(outerPlan);
-               if (!TupIsNull(slot))
+               for (;;)
                {
+                       slot = ExecProcNode(outerPlan);
+                       if (TupIsNull(slot))
+                               break;
+                       if (plan->numCols > 0)
+                       {
+                               /* Find or build hashtable entry for this tuple's group */
+                               entry = (RUHashEntry)
+                                       LookupTupleHashEntry(node->hashtable, slot, &isnew);
+                               /* Must reset temp context after each hashtable lookup */
+                               MemoryContextReset(node->tempContext);
+                               /* Ignore tuple if already seen */
+                               if (!isnew)
+                                       continue;
+                       }
+                       /* Each non-duplicate tuple goes to the working table ... */
                        tuplestore_puttupleslot(node->working_table, slot);
+                       /* ... and to the caller */
                        return slot;
                }
                node->recursing = true;
        }
 
-retry:
        /* 2. Execute recursive term */
-       slot = ExecProcNode(innerPlan);
-       if (TupIsNull(slot))
+       for (;;)
        {
-               if (node->intermediate_empty)
-                       return NULL;
+               slot = ExecProcNode(innerPlan);
+               if (TupIsNull(slot))
+               {
+                       /* Done if there's nothing in the intermediate table */
+                       if (node->intermediate_empty)
+                               break;
 
-               /* done with old working table ... */
-               tuplestore_end(node->working_table);
+                       /* done with old working table ... */
+                       tuplestore_end(node->working_table);
 
-               /* intermediate table becomes working table */
-               node->working_table = node->intermediate_table;
+                       /* intermediate table becomes working table */
+                       node->working_table = node->intermediate_table;
 
-               /* create new empty intermediate table */
-               node->intermediate_table = tuplestore_begin_heap(false, false, work_mem);
-               node->intermediate_empty = true;
+                       /* create new empty intermediate table */
+                       node->intermediate_table = tuplestore_begin_heap(false, false,
+                                                                                                                        work_mem);
+                       node->intermediate_empty = true;
 
-               /* and reset the inner plan */
-               innerPlan->chgParam = bms_add_member(innerPlan->chgParam,
-                                                                                        plan->wtParam);
-               goto retry;
-       }
-       else
-       {
+                       /* reset the recursive term */
+                       innerPlan->chgParam = bms_add_member(innerPlan->chgParam,
+                                                                                                plan->wtParam);
+
+                       /* and continue fetching from recursive term */
+                       continue;
+               }
+
+               if (plan->numCols > 0)
+               {
+                       /* Find or build hashtable entry for this tuple's group */
+                       entry = (RUHashEntry)
+                               LookupTupleHashEntry(node->hashtable, slot, &isnew);
+                       /* Must reset temp context after each hashtable lookup */
+                       MemoryContextReset(node->tempContext);
+                       /* Ignore tuple if already seen */
+                       if (!isnew)
+                               continue;
+               }
+
+               /* Else, tuple is good; stash it in intermediate table ... */
                node->intermediate_empty = false;
                tuplestore_puttupleslot(node->intermediate_table, slot);
-       }
+               /* ... and return it */
+               return slot;
+       }
 
-       return slot;
+       return NULL;
 }
 
 /* ----------------------------------------------------------------
@@ -109,6 +180,12 @@ ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags)
        rustate->ps.plan = (Plan *) node;
        rustate->ps.state = estate;
 
+       rustate->eqfunctions = NULL;
+       rustate->hashfunctions = NULL;
+       rustate->hashtable = NULL;
+       rustate->tempContext = NULL;
+       rustate->tableContext = NULL;
+
        /* initialize processing state */
        rustate->recursing = false;
        rustate->intermediate_empty = true;
@@ -116,6 +193,28 @@ ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags)
        rustate->intermediate_table = tuplestore_begin_heap(false, false, work_mem);
 
        /*
+        * If hashing, we need a per-tuple memory context for comparisons, and a
+        * longer-lived context to store the hash table.  The table can't just be
+        * kept in the per-query context because we want to be able to throw it
+        * away when rescanning.
+        */
+       if (node->numCols > 0)
+       {
+               rustate->tempContext =
+                       AllocSetContextCreate(CurrentMemoryContext,
+                                                                 "RecursiveUnion",
+                                                                 ALLOCSET_DEFAULT_MINSIZE,
+                                                                 ALLOCSET_DEFAULT_INITSIZE,
+                                                                 ALLOCSET_DEFAULT_MAXSIZE);
+               rustate->tableContext =
+                       AllocSetContextCreate(CurrentMemoryContext,
+                                                                 "RecursiveUnion hash table",
+                                                                 ALLOCSET_DEFAULT_MINSIZE,
+                                                                 ALLOCSET_DEFAULT_INITSIZE,
+                                                                 ALLOCSET_DEFAULT_MAXSIZE);
+       }
+
+       /*
         * Make the state structure available to descendant WorkTableScan nodes
         * via the Param slot reserved for it.
         */
@@ -154,6 +253,19 @@ ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags)
        outerPlanState(rustate) = ExecInitNode(outerPlan(node), estate, eflags);
        innerPlanState(rustate) = ExecInitNode(innerPlan(node), estate, eflags);
 
+       /*
+        * If hashing, precompute fmgr lookup data for inner loop, and create
+        * the hash table.
+        */
+       if (node->numCols > 0)
+       {
+               execTuplesHashPrepare(node->numCols,
+                                                         node->dupOperators,
+                                                         &rustate->eqfunctions,
+                                                         &rustate->hashfunctions);
+               build_hash_table(rustate);
+       }
+
        return rustate;
 }
 
@@ -178,6 +290,12 @@ ExecEndRecursiveUnion(RecursiveUnionState *node)
        tuplestore_end(node->working_table);
        tuplestore_end(node->intermediate_table);
 
+       /* free subsidiary stuff including hashtable */
+       if (node->tempContext)
+               MemoryContextDelete(node->tempContext);
+       if (node->tableContext)
+               MemoryContextDelete(node->tableContext);
+
        /*
         * clean out the upper tuple table
         */
@@ -217,6 +335,14 @@ ExecRecursiveUnionReScan(RecursiveUnionState *node, ExprContext *exprCtxt)
        if (outerPlan->chgParam == NULL)
                ExecReScan(outerPlan, exprCtxt);
 
+       /* Release any hashtable storage */
+       if (node->tableContext)
+               MemoryContextResetAndDeleteChildren(node->tableContext);
+
+       /* And rebuild empty hashtable if needed */
+       if (plan->numCols > 0)
+               build_hash_table(node);
+
        /* reset processing state */
        node->recursing = false;
        node->intermediate_empty = true;
index 1e7eb60..f0a4915 100644 (file)
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.407 2008/10/06 17:39:25 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.408 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -193,6 +193,13 @@ _copyRecursiveUnion(RecursiveUnion *from)
         * copy remainder of node
         */
        COPY_SCALAR_FIELD(wtParam);
+       COPY_SCALAR_FIELD(numCols);
+       if (from->numCols > 0)
+       {
+               COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
+               COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
+       }
+       COPY_SCALAR_FIELD(numGroups);
 
        return newnode;
 }
index b25ef4b..75a04ce 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.341 2008/10/06 17:39:26 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.342 2008/10/07 19:27:04 tgl Exp $
  *
  * NOTES
  *       Every node type that can appear in stored rules' parsetrees *must*
@@ -334,11 +334,24 @@ _outAppend(StringInfo str, Append *node)
 static void
 _outRecursiveUnion(StringInfo str, RecursiveUnion *node)
 {
+       int i;
+
        WRITE_NODE_TYPE("RECURSIVEUNION");
 
        _outPlanInfo(str, (Plan *) node);
 
        WRITE_INT_FIELD(wtParam);
+       WRITE_INT_FIELD(numCols);
+
+       appendStringInfo(str, " :dupColIdx");
+       for (i = 0; i < node->numCols; i++)
+               appendStringInfo(str, " %d", node->dupColIdx[i]);
+
+       appendStringInfo(str, " :dupOperators");
+       for (i = 0; i < node->numCols; i++)
+               appendStringInfo(str, " %u", node->dupOperators[i]);
+
+       WRITE_LONG_FIELD(numGroups);
 }
 
 static void
index aabbf64..f812c93 100644 (file)
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.249 2008/10/04 21:56:53 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.250 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2545,10 +2545,13 @@ RecursiveUnion *
 make_recursive_union(List *tlist,
                                         Plan *lefttree,
                                         Plan *righttree,
-                                        int wtParam)
+                                        int wtParam,
+                                        List *distinctList,
+                                        long numGroups)
 {
        RecursiveUnion *node = makeNode(RecursiveUnion);
        Plan       *plan = &node->plan;
+       int                     numCols = list_length(distinctList);
 
        cost_recursive_union(plan, lefttree, righttree);
 
@@ -2558,6 +2561,37 @@ make_recursive_union(List *tlist,
        plan->righttree = righttree;
        node->wtParam = wtParam;
 
+       /*
+        * convert SortGroupClause list into arrays of attr indexes and equality
+        * operators, as wanted by executor
+        */
+       node->numCols = numCols;
+       if (numCols > 0)
+       {
+               int                     keyno = 0;
+               AttrNumber *dupColIdx;
+               Oid                *dupOperators;
+               ListCell   *slitem;
+
+               dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+               dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+
+               foreach(slitem, distinctList)
+               {
+                       SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
+                       TargetEntry *tle = get_sortgroupclause_tle(sortcl,
+                                                                                                          plan->targetlist);
+
+                       dupColIdx[keyno] = tle->resno;
+                       dupOperators[keyno] = sortcl->eqop;
+                       Assert(OidIsValid(dupOperators[keyno]));
+                       keyno++;
+               }
+               node->dupColIdx = dupColIdx;
+               node->dupOperators = dupOperators;
+       }
+       node->numGroups = numGroups;
+
        return node;
 }
 
index e8b7850..2e22d09 100644 (file)
@@ -22,7 +22,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.157 2008/10/06 17:39:26 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.158 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -318,10 +318,12 @@ generate_recursion_plan(SetOperationStmt *setOp, PlannerInfo *root,
        Plan       *lplan;
        Plan       *rplan;
        List       *tlist;
+       List       *groupList;
+       long            numGroups;
 
        /* Parser should have rejected other cases */
-       if (setOp->op != SETOP_UNION || !setOp->all)
-               elog(ERROR, "only UNION ALL queries can be recursive");
+       if (setOp->op != SETOP_UNION)
+               elog(ERROR, "only UNION queries can be recursive");
        /* Worktable ID should be assigned */
        Assert(root->wt_param_id >= 0);
 
@@ -347,12 +349,45 @@ generate_recursion_plan(SetOperationStmt *setOp, PlannerInfo *root,
                                                                  refnames_tlist);
 
        /*
+        * If UNION, identify the grouping operators
+        */
+       if (setOp->all)
+       {
+               groupList = NIL;
+               numGroups = 0;
+       }
+       else
+       {
+               double  dNumGroups;
+
+               /* Identify the grouping semantics */
+               groupList = generate_setop_grouplist(setOp, tlist);
+
+               /* We only support hashing here */
+               if (!grouping_is_hashable(groupList))
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("could not implement recursive UNION"),
+                                        errdetail("All column datatypes must be hashable.")));
+
+               /*
+                * For the moment, take the number of distinct groups as equal to
+                * the total input size, ie, the worst case.
+                */
+               dNumGroups = lplan->plan_rows + rplan->plan_rows * 10;
+
+               /* Also convert to long int --- but 'ware overflow! */
+               numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
+       }
+
+       /*
         * And make the plan node.
         */
        plan = (Plan *) make_recursive_union(tlist, lplan, rplan,
-                                                                                root->wt_param_id);
+                                                                                root->wt_param_id,
+                                                                                groupList, numGroups);
 
-       *sortClauses = NIL;                     /* result of UNION ALL is always unsorted */
+       *sortClauses = NIL;                     /* RecursiveUnion result is always unsorted */
 
        return plan;
 }
index 29111ac..3971dac 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/parser/parse_cte.c,v 2.2 2008/10/05 22:50:55 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/parser/parse_cte.c,v 2.3 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -601,11 +601,11 @@ checkWellFormedRecursion(CteState *cstate)
                if (!cte->cterecursive)
                        continue;
 
-               /* Must have top-level UNION ALL */
-               if (stmt->op != SETOP_UNION || !stmt->all)
+               /* Must have top-level UNION */
+               if (stmt->op != SETOP_UNION)
                        ereport(ERROR,
                                        (errcode(ERRCODE_INVALID_RECURSION),
-                                        errmsg("recursive query \"%s\" does not have the form non-recursive-term UNION ALL recursive-term",
+                                        errmsg("recursive query \"%s\" does not have the form non-recursive-term UNION [ALL] recursive-term",
                                                        cte->ctename),
                                         parser_errposition(cstate->pstate, cte->location)));
 
@@ -628,7 +628,7 @@ checkWellFormedRecursion(CteState *cstate)
                        elog(ERROR, "missing recursive reference");
 
                /*
-                * Disallow ORDER BY and similar decoration atop the UNION ALL.
+                * Disallow ORDER BY and similar decoration atop the UNION.
                 * These don't make sense because it's impossible to figure out what
                 * they mean when we have only part of the recursive query's results.
                 * (If we did allow them, we'd have to check for recursive references
index 02c9c8f..c10204a 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.189 2008/10/04 21:56:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.190 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -964,6 +964,12 @@ typedef struct RecursiveUnionState
        bool            intermediate_empty;
        Tuplestorestate *working_table;
        Tuplestorestate *intermediate_table;
+       /* Remaining fields are unused in UNION ALL case */
+       FmgrInfo   *eqfunctions;        /* per-grouping-field equality fns */
+       FmgrInfo   *hashfunctions;      /* per-grouping-field hash fns */
+       MemoryContext tempContext;      /* short-term context for comparisons */
+       TupleHashTable hashtable;       /* hash table for tuples already seen */
+       MemoryContext tableContext;     /* memory context containing hash table */
 } RecursiveUnionState;
 
 /* ----------------
index cdf06b0..16c25fd 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.104 2008/10/04 21:56:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.105 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -194,6 +194,12 @@ typedef struct RecursiveUnion
 {
        Plan            plan;
        int                     wtParam;                /* ID of Param representing work table */
+       /* Remaining fields are zero/null in UNION ALL case */
+       int                     numCols;                /* number of columns to check for
+                                                                * duplicate-ness */
+       AttrNumber *dupColIdx;          /* their indexes in the target list */
+       Oid                *dupOperators;       /* equality operators to compare with */
+       long            numGroups;              /* estimated number of groups in input */
 } RecursiveUnion;
 
 /* ----------------
index 1e8017f..641ebe4 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.113 2008/10/04 21:56:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.114 2008/10/07 19:27:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,7 +43,8 @@ extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual,
                                  Index scanrelid, Plan *subplan, List *subrtable);
 extern Append *make_append(List *appendplans, bool isTarget, List *tlist);
 extern RecursiveUnion *make_recursive_union(List *tlist,
-                          Plan *lefttree, Plan *righttree, int wtParam);
+                          Plan *lefttree, Plan *righttree, int wtParam,
+                          List *distinctList, long numGroups);
 extern Sort *make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree,
                                                List *pathkeys, double limit_tuples);
 extern Sort *make_sort_from_sortclauses(PlannerInfo *root, List *sortcls,
index 5b45ac8..4760aa9 100644 (file)
@@ -49,6 +49,18 @@ SELECT * FROM t;
  5
 (5 rows)
 
+-- This is an infinite loop with UNION ALL, but not with UNION
+WITH RECURSIVE t(n) AS (
+    SELECT 1
+UNION
+    SELECT 10-n FROM t)
+SELECT * FROM t;
+ n 
+---
+ 1
+ 9
+(2 rows)
+
 -- This'd be an infinite loop, but outside query reads only as much as needed
 WITH RECURSIVE t(n) AS (
     VALUES (1)
@@ -69,6 +81,26 @@ SELECT * FROM t LIMIT 10;
  10
 (10 rows)
 
+-- UNION case should have same property
+WITH RECURSIVE t(n) AS (
+    SELECT 1
+UNION
+    SELECT n+1 FROM t)
+SELECT * FROM t LIMIT 10;
+ n  
+----
+  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+(10 rows)
+
 -- Test behavior with an unknown-type literal in the WITH
 WITH q AS (SELECT 'foo' AS x)
 SELECT x, x IS OF (unknown) as is_unknown FROM q;
@@ -510,38 +542,32 @@ WITH RECURSIVE
 --
 -- error cases
 --
--- UNION (should be supported someday)
-WITH RECURSIVE x(n) AS (SELECT 1 UNION SELECT n+1 FROM x)
-       SELECT * FROM x;
-ERROR:  recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
-LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 UNION SELECT n+1 FROM x)
-                       ^
 -- INTERSECT
 WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT SELECT n+1 FROM x)
        SELECT * FROM x;
-ERROR:  recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
+ERROR:  recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
 LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT SELECT n+1 FROM x...
                        ^
 WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT ALL SELECT n+1 FROM x)
        SELECT * FROM x;
-ERROR:  recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
+ERROR:  recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
 LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT ALL SELECT n+1 FR...
                        ^
 -- EXCEPT
 WITH RECURSIVE x(n) AS (SELECT 1 EXCEPT SELECT n+1 FROM x)
        SELECT * FROM x;
-ERROR:  recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
+ERROR:  recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
 LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 EXCEPT SELECT n+1 FROM x)
                        ^
 WITH RECURSIVE x(n) AS (SELECT 1 EXCEPT ALL SELECT n+1 FROM x)
        SELECT * FROM x;
-ERROR:  recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
+ERROR:  recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
 LINE 1: WITH RECURSIVE x(n) AS (SELECT 1 EXCEPT ALL SELECT n+1 FROM ...
                        ^
 -- no non-recursive term
 WITH RECURSIVE x(n) AS (SELECT n FROM x)
        SELECT * FROM x;
-ERROR:  recursive query "x" does not have the form non-recursive-term UNION ALL recursive-term
+ERROR:  recursive query "x" does not have the form non-recursive-term UNION [ALL] recursive-term
 LINE 1: WITH RECURSIVE x(n) AS (SELECT n FROM x)
                        ^
 -- recursive term in the left hand side (strictly speaking, should allow this)
index c3ff5e2..60c545d 100644 (file)
@@ -31,6 +31,13 @@ UNION ALL
 )
 SELECT * FROM t;
 
+-- This is an infinite loop with UNION ALL, but not with UNION
+WITH RECURSIVE t(n) AS (
+    SELECT 1
+UNION
+    SELECT 10-n FROM t)
+SELECT * FROM t;
+
 -- This'd be an infinite loop, but outside query reads only as much as needed
 WITH RECURSIVE t(n) AS (
     VALUES (1)
@@ -38,6 +45,13 @@ UNION ALL
     SELECT n+1 FROM t)
 SELECT * FROM t LIMIT 10;
 
+-- UNION case should have same property
+WITH RECURSIVE t(n) AS (
+    SELECT 1
+UNION
+    SELECT n+1 FROM t)
+SELECT * FROM t LIMIT 10;
+
 -- Test behavior with an unknown-type literal in the WITH
 WITH q AS (SELECT 'foo' AS x)
 SELECT x, x IS OF (unknown) as is_unknown FROM q;
@@ -265,10 +279,6 @@ WITH RECURSIVE
 -- error cases
 --
 
--- UNION (should be supported someday)
-WITH RECURSIVE x(n) AS (SELECT 1 UNION SELECT n+1 FROM x)
-       SELECT * FROM x;
-
 -- INTERSECT
 WITH RECURSIVE x(n) AS (SELECT 1 INTERSECT SELECT n+1 FROM x)
        SELECT * FROM x;