1 /*-------------------------------------------------------------------------
4 * Routines to handle aggregate nodes.
6 * ExecAgg evaluates each aggregate in the following steps:
8 * transvalue = initcond
9 * foreach input_value do
10 * transvalue = transfunc(transvalue, input_value)
11 * result = finalfunc(transvalue)
13 * If a finalfunc is not supplied then the result is just the ending
14 * value of transvalue.
16 * If transfunc is marked "strict" in pg_proc and initcond is NULL,
17 * then the first non-NULL input_value is assigned directly to transvalue,
18 * and transfunc isn't applied until the second non-NULL input_value.
19 * The agg's input type and transtype must be the same in this case!
21 * If transfunc is marked "strict" then NULL input_values are skipped,
22 * keeping the previous transvalue. If transfunc is not strict then it
23 * is called for every input tuple and must deal with NULL initcond
24 * or NULL input_value for itself.
26 * If finalfunc is marked "strict" then it is not called when the
27 * ending transvalue is NULL, instead a NULL result is created
28 * automatically (this is just the usual handling of strict functions,
29 * of course). A non-strict finalfunc can make its own choice of
30 * what to return for a NULL ending transvalue.
32 * We compute aggregate input expressions and run the transition functions
33 * in a temporary econtext (aggstate->tmpcontext). This is reset at
34 * least once per input tuple, so when the transvalue datatype is
35 * pass-by-reference, we have to be careful to copy it into a longer-lived
36 * memory context, and free the prior value to avoid memory leakage.
37 * We store transvalues in the memory context aggstate->aggcontext,
38 * which is also used for the hashtable structures in AGG_HASHED mode.
39 * The node's regular econtext (aggstate->csstate.cstate.cs_ExprContext)
40 * is used to run finalize functions and compute the output tuple;
41 * this context can be reset once per output tuple.
44 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
45 * Portions Copyright (c) 1994, Regents of the University of California
48 * $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.100 2002/12/13 19:45:52 tgl Exp $
50 *-------------------------------------------------------------------------
55 #include "access/heapam.h"
56 #include "catalog/pg_aggregate.h"
57 #include "catalog/pg_operator.h"
58 #include "executor/executor.h"
59 #include "executor/nodeAgg.h"
60 #include "executor/nodeGroup.h"
61 #include "executor/nodeHash.h"
62 #include "miscadmin.h"
63 #include "optimizer/clauses.h"
64 #include "parser/parse_coerce.h"
65 #include "parser/parse_expr.h"
66 #include "parser/parse_oper.h"
67 #include "utils/acl.h"
68 #include "utils/builtins.h"
69 #include "utils/lsyscache.h"
70 #include "utils/syscache.h"
71 #include "utils/tuplesort.h"
72 #include "utils/datum.h"
76 * AggStatePerAggData - per-aggregate working state for the Agg scan
78 typedef struct AggStatePerAggData
81 * These values are set up during ExecInitAgg() and do not change
85 /* Links to Aggref expr and state nodes this working state is for */
86 AggrefExprState *aggrefstate;
89 /* Oids of transfer functions */
91 Oid finalfn_oid; /* may be InvalidOid */
94 * fmgr lookup data for transfer functions --- only valid when
95 * corresponding oid is not InvalidOid. Note in particular that
96 * fn_strict flags are kept here.
102 * Type of input data and Oid of sort operator to use for it; only
103 * set/used when aggregate has DISTINCT flag. (These are not used
104 * directly by nodeAgg, but must be passed to the Tuplesort object.)
110 * fmgr lookup data for input type's equality operator --- only
111 * set/used when aggregate has DISTINCT flag.
116 * initial value from pg_aggregate entry
119 bool initValueIsNull;
122 * We need the len and byval info for the agg's input, result, and
123 * transition data types in order to know how to copy/delete values.
133 * These values are working state that is initialized at the start of
134 * an input tuple group and updated for each input tuple.
136 * For a simple (non DISTINCT) aggregate, we just feed the input values
137 * straight to the transition function. If it's DISTINCT, we pass the
138 * input values into a Tuplesort object; then at completion of the
139 * input tuple group, we scan the sorted values, eliminate duplicates,
140 * and run the transition function on the rest.
143 Tuplesortstate *sortstate; /* sort object, if a DISTINCT agg */
144 } AggStatePerAggData;
147 * AggStatePerGroupData - per-aggregate-per-group working state
149 * These values are working state that is initialized at the start of
150 * an input tuple group and updated for each input tuple.
152 * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these
153 * structs (pointed to by aggstate->pergroup); we re-use the array for
154 * each input group, if it's AGG_SORTED mode. In AGG_HASHED mode, the
155 * hash table contains an array of these structs for each tuple group.
157 * Logically, the sortstate field belongs in this struct, but we do not
158 * keep it here for space reasons: we don't support DISTINCT aggregates
159 * in AGG_HASHED mode, so there's no reason to use up a pointer field
160 * in every entry of the hashtable.
162 typedef struct AggStatePerGroupData
164 Datum transValue; /* current transition value */
165 bool transValueIsNull;
167 bool noTransValue; /* true if transValue not set yet */
170 * Note: noTransValue initially has the same value as
171 * transValueIsNull, and if true both are cleared to false at the same
172 * time. They are not the same though: if transfn later returns a
173 * NULL, we want to keep that NULL and not auto-replace it with a
174 * later input value. Only the first non-NULL input will be
177 } AggStatePerGroupData;
180 * To implement hashed aggregation, we need a hashtable that stores a
181 * representative tuple and an array of AggStatePerGroup structs for each
182 * distinct set of GROUP BY column values. We compute the hash key from
183 * the GROUP BY columns.
185 typedef struct AggHashEntryData
187 AggHashEntry next; /* next entry in same hash bucket */
188 uint32 hashkey; /* exact hash key of this entry */
189 HeapTuple firstTuple; /* copy of first tuple in this group */
190 /* per-aggregate transition status array - must be last! */
191 AggStatePerGroupData pergroup[1]; /* VARIABLE LENGTH ARRAY */
192 } AggHashEntryData; /* VARIABLE LENGTH STRUCT */
194 typedef struct AggHashTableData
196 int nbuckets; /* number of buckets in hash table */
197 AggHashEntry buckets[1]; /* VARIABLE LENGTH ARRAY */
198 } AggHashTableData; /* VARIABLE LENGTH STRUCT */
201 static void initialize_aggregates(AggState *aggstate,
202 AggStatePerAgg peragg,
203 AggStatePerGroup pergroup);
204 static void advance_transition_function(AggState *aggstate,
205 AggStatePerAgg peraggstate,
206 AggStatePerGroup pergroupstate,
207 Datum newVal, bool isNull);
208 static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
209 static void process_sorted_aggregate(AggState *aggstate,
210 AggStatePerAgg peraggstate,
211 AggStatePerGroup pergroupstate);
212 static void finalize_aggregate(AggState *aggstate,
213 AggStatePerAgg peraggstate,
214 AggStatePerGroup pergroupstate,
215 Datum *resultVal, bool *resultIsNull);
216 static void build_hash_table(AggState *aggstate);
217 static AggHashEntry lookup_hash_entry(AggState *aggstate,
218 TupleTableSlot *slot);
219 static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
220 static void agg_fill_hash_table(AggState *aggstate);
221 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
222 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
226 * Initialize all aggregates for a new group of input values.
228 * When called, CurrentMemoryContext should be the per-query context.
231 initialize_aggregates(AggState *aggstate,
232 AggStatePerAgg peragg,
233 AggStatePerGroup pergroup)
237 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
239 AggStatePerAgg peraggstate = &peragg[aggno];
240 AggStatePerGroup pergroupstate = &pergroup[aggno];
241 Aggref *aggref = peraggstate->aggref;
244 * Start a fresh sort operation for each DISTINCT aggregate.
246 if (aggref->aggdistinct)
249 * In case of rescan, maybe there could be an uncompleted sort
250 * operation? Clean it up if so.
252 if (peraggstate->sortstate)
253 tuplesort_end(peraggstate->sortstate);
255 peraggstate->sortstate =
256 tuplesort_begin_datum(peraggstate->inputType,
257 peraggstate->sortOperator,
262 * (Re)set transValue to the initial value.
264 * Note that when the initial value is pass-by-ref, we must copy it
265 * (into the aggcontext) since we will pfree the transValue later.
267 if (peraggstate->initValueIsNull)
268 pergroupstate->transValue = peraggstate->initValue;
271 MemoryContext oldContext;
273 oldContext = MemoryContextSwitchTo(aggstate->aggcontext);
274 pergroupstate->transValue = datumCopy(peraggstate->initValue,
275 peraggstate->transtypeByVal,
276 peraggstate->transtypeLen);
277 MemoryContextSwitchTo(oldContext);
279 pergroupstate->transValueIsNull = peraggstate->initValueIsNull;
282 * If the initial value for the transition state doesn't exist in the
283 * pg_aggregate table then we will let the first non-NULL value
284 * returned from the outer procNode become the initial value. (This is
285 * useful for aggregates like max() and min().) The noTransValue flag
286 * signals that we still need to do this.
288 pergroupstate->noTransValue = peraggstate->initValueIsNull;
293 * Given a new input value, advance the transition function of an aggregate.
295 * It doesn't matter which memory context this is called in.
298 advance_transition_function(AggState *aggstate,
299 AggStatePerAgg peraggstate,
300 AggStatePerGroup pergroupstate,
301 Datum newVal, bool isNull)
303 FunctionCallInfoData fcinfo;
304 MemoryContext oldContext;
306 if (peraggstate->transfn.fn_strict)
309 * For a strict transfn, nothing happens at a NULL input
310 * tuple; we just keep the prior transValue.
314 if (pergroupstate->noTransValue)
317 * transValue has not been initialized. This is the first
318 * non-NULL input value. We use it as the initial value for
319 * transValue. (We already checked that the agg's input type
320 * is binary-compatible with its transtype, so straight copy
323 * We must copy the datum into aggcontext if it is pass-by-ref.
324 * We do not need to pfree the old transValue, since it's NULL.
326 oldContext = MemoryContextSwitchTo(aggstate->aggcontext);
327 pergroupstate->transValue = datumCopy(newVal,
328 peraggstate->transtypeByVal,
329 peraggstate->transtypeLen);
330 pergroupstate->transValueIsNull = false;
331 pergroupstate->noTransValue = false;
332 MemoryContextSwitchTo(oldContext);
335 if (pergroupstate->transValueIsNull)
338 * Don't call a strict function with NULL inputs. Note it is
339 * possible to get here despite the above tests, if the
340 * transfn is strict *and* returned a NULL on a prior cycle.
341 * If that happens we will propagate the NULL all the way to
348 /* We run the transition functions in per-input-tuple memory context */
349 oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory);
352 * OK to call the transition function
354 * This is heavily-used code, so manually zero just the necessary fields
355 * instead of using MemSet(). Compare FunctionCall2().
358 /* MemSet(&fcinfo, 0, sizeof(fcinfo)); */
359 fcinfo.context = NULL;
360 fcinfo.resultinfo = NULL;
361 fcinfo.isnull = false;
363 fcinfo.flinfo = &peraggstate->transfn;
365 fcinfo.arg[0] = pergroupstate->transValue;
366 fcinfo.argnull[0] = pergroupstate->transValueIsNull;
367 fcinfo.arg[1] = newVal;
368 fcinfo.argnull[1] = isNull;
370 newVal = FunctionCallInvoke(&fcinfo);
373 * If pass-by-ref datatype, must copy the new value into aggcontext and
374 * pfree the prior transValue. But if transfn returned a pointer to its
375 * first input, we don't need to do anything.
377 if (!peraggstate->transtypeByVal &&
378 DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->transValue))
382 MemoryContextSwitchTo(aggstate->aggcontext);
383 newVal = datumCopy(newVal,
384 peraggstate->transtypeByVal,
385 peraggstate->transtypeLen);
387 if (!pergroupstate->transValueIsNull)
388 pfree(DatumGetPointer(pergroupstate->transValue));
391 pergroupstate->transValue = newVal;
392 pergroupstate->transValueIsNull = fcinfo.isnull;
394 MemoryContextSwitchTo(oldContext);
398 * Advance all the aggregates for one input tuple. The input tuple
399 * has been stored in tmpcontext->ecxt_scantuple, so that it is accessible
400 * to ExecEvalExpr. pergroup is the array of per-group structs to use
401 * (this might be in a hashtable entry).
403 * When called, CurrentMemoryContext should be the per-query context.
406 advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
408 ExprContext *econtext = aggstate->tmpcontext;
411 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
413 AggStatePerAgg peraggstate = &aggstate->peragg[aggno];
414 AggStatePerGroup pergroupstate = &pergroup[aggno];
415 AggrefExprState *aggrefstate = peraggstate->aggrefstate;
416 Aggref *aggref = peraggstate->aggref;
420 newVal = ExecEvalExprSwitchContext(aggrefstate->target, econtext,
423 if (aggref->aggdistinct)
425 /* in DISTINCT mode, we may ignore nulls */
428 tuplesort_putdatum(peraggstate->sortstate, newVal, isNull);
432 advance_transition_function(aggstate, peraggstate, pergroupstate,
439 * Run the transition function for a DISTINCT aggregate. This is called
440 * after we have completed entering all the input values into the sort
441 * object. We complete the sort, read out the values in sorted order,
442 * and run the transition function on each non-duplicate value.
444 * When called, CurrentMemoryContext should be the per-query context.
447 process_sorted_aggregate(AggState *aggstate,
448 AggStatePerAgg peraggstate,
449 AggStatePerGroup pergroupstate)
451 Datum oldVal = (Datum) 0;
452 bool haveOldVal = false;
453 MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
454 MemoryContext oldContext;
458 tuplesort_performsort(peraggstate->sortstate);
461 * Note: if input type is pass-by-ref, the datums returned by the sort
462 * are freshly palloc'd in the per-query context, so we must be
463 * careful to pfree them when they are no longer needed.
466 while (tuplesort_getdatum(peraggstate->sortstate, true,
470 * DISTINCT always suppresses nulls, per SQL spec, regardless of
471 * the transition function's strictness.
477 * Clear and select the working context for evaluation of
478 * the equality function and transition function.
480 MemoryContextReset(workcontext);
481 oldContext = MemoryContextSwitchTo(workcontext);
484 DatumGetBool(FunctionCall2(&peraggstate->equalfn,
487 /* equal to prior, so forget this one */
488 if (!peraggstate->inputtypeByVal)
489 pfree(DatumGetPointer(newVal));
493 advance_transition_function(aggstate, peraggstate, pergroupstate,
495 /* forget the old value, if any */
496 if (haveOldVal && !peraggstate->inputtypeByVal)
497 pfree(DatumGetPointer(oldVal));
498 /* and remember the new one for subsequent equality checks */
503 MemoryContextSwitchTo(oldContext);
506 if (haveOldVal && !peraggstate->inputtypeByVal)
507 pfree(DatumGetPointer(oldVal));
509 tuplesort_end(peraggstate->sortstate);
510 peraggstate->sortstate = NULL;
514 * Compute the final value of one aggregate.
516 * The finalfunction will be run, and the result delivered, in the
517 * output-tuple context; caller's CurrentMemoryContext does not matter.
520 finalize_aggregate(AggState *aggstate,
521 AggStatePerAgg peraggstate,
522 AggStatePerGroup pergroupstate,
523 Datum *resultVal, bool *resultIsNull)
525 MemoryContext oldContext;
527 oldContext = MemoryContextSwitchTo(aggstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
530 * Apply the agg's finalfn if one is provided, else return transValue.
532 if (OidIsValid(peraggstate->finalfn_oid))
534 FunctionCallInfoData fcinfo;
536 MemSet(&fcinfo, 0, sizeof(fcinfo));
537 fcinfo.flinfo = &peraggstate->finalfn;
539 fcinfo.arg[0] = pergroupstate->transValue;
540 fcinfo.argnull[0] = pergroupstate->transValueIsNull;
541 if (fcinfo.flinfo->fn_strict && pergroupstate->transValueIsNull)
543 /* don't call a strict function with NULL inputs */
544 *resultVal = (Datum) 0;
545 *resultIsNull = true;
549 *resultVal = FunctionCallInvoke(&fcinfo);
550 *resultIsNull = fcinfo.isnull;
555 *resultVal = pergroupstate->transValue;
556 *resultIsNull = pergroupstate->transValueIsNull;
560 * If result is pass-by-ref, make sure it is in the right context.
562 if (!peraggstate->resulttypeByVal && !*resultIsNull &&
563 !MemoryContextContains(CurrentMemoryContext,
564 DatumGetPointer(*resultVal)))
565 *resultVal = datumCopy(*resultVal,
566 peraggstate->resulttypeByVal,
567 peraggstate->resulttypeLen);
569 MemoryContextSwitchTo(oldContext);
573 * Initialize the hash table to empty.
575 * The hash table always lives in the aggcontext memory context.
578 build_hash_table(AggState *aggstate)
580 Agg *node = (Agg *) aggstate->ss.ps.plan;
581 AggHashTable hashtable;
584 Assert(node->aggstrategy == AGG_HASHED);
585 Assert(node->numGroups > 0);
586 tabsize = sizeof(AggHashTableData) +
587 (node->numGroups - 1) * sizeof(AggHashEntry);
588 hashtable = (AggHashTable) MemoryContextAlloc(aggstate->aggcontext,
590 MemSet(hashtable, 0, tabsize);
591 hashtable->nbuckets = node->numGroups;
592 aggstate->hashtable = hashtable;
596 * Find or create a hashtable entry for the tuple group containing the
599 * When called, CurrentMemoryContext should be the per-query context.
602 lookup_hash_entry(AggState *aggstate, TupleTableSlot *slot)
604 Agg *node = (Agg *) aggstate->ss.ps.plan;
605 AggHashTable hashtable = aggstate->hashtable;
606 MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
607 HeapTuple tuple = slot->val;
608 TupleDesc tupdesc = slot->ttc_tupleDescriptor;
613 MemoryContext oldContext;
616 /* Need to run the hash function in short-lived context */
617 oldContext = MemoryContextSwitchTo(tmpmem);
619 for (i = 0; i < node->numCols; i++)
621 AttrNumber att = node->grpColIdx[i];
625 /* rotate hashkey left 1 bit at each step */
626 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
628 attr = heap_getattr(tuple, att, tupdesc, &isNull);
630 continue; /* treat nulls as having hash key 0 */
631 hashkey ^= ComputeHashFunc(attr,
632 (int) tupdesc->attrs[att - 1]->attlen,
633 tupdesc->attrs[att - 1]->attbyval);
635 bucketno = hashkey % (uint32) hashtable->nbuckets;
637 for (entry = hashtable->buckets[bucketno];
641 /* Quick check using hashkey */
642 if (entry->hashkey != hashkey)
644 if (execTuplesMatch(entry->firstTuple,
647 node->numCols, node->grpColIdx,
648 aggstate->eqfunctions,
651 MemoryContextSwitchTo(oldContext);
656 /* Not there, so build a new one */
657 MemoryContextSwitchTo(aggstate->aggcontext);
658 entrysize = sizeof(AggHashEntryData) +
659 (aggstate->numaggs - 1) * sizeof(AggStatePerGroupData);
660 entry = (AggHashEntry) palloc0(entrysize);
662 entry->hashkey = hashkey;
663 entry->firstTuple = heap_copytuple(tuple);
665 entry->next = hashtable->buckets[bucketno];
666 hashtable->buckets[bucketno] = entry;
668 MemoryContextSwitchTo(oldContext);
670 /* initialize aggregates for new tuple group */
671 initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
679 * ExecAgg receives tuples from its outer subplan and aggregates over
680 * the appropriate attribute for each aggregate function use (Aggref
681 * node) appearing in the targetlist or qual of the node. The number
682 * of tuples to aggregate over depends on whether grouped or plain
683 * aggregation is selected. In grouped aggregation, we produce a result
684 * row for each group; in plain aggregation there's a single result row
685 * for the whole query. In either case, the value of each aggregate is
686 * stored in the expression context to be used when ExecProject evaluates
690 ExecAgg(AggState *node)
695 if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
697 if (!node->table_filled)
698 agg_fill_hash_table(node);
699 return agg_retrieve_hash_table(node);
703 return agg_retrieve_direct(node);
708 * ExecAgg for non-hashed case
710 static TupleTableSlot *
711 agg_retrieve_direct(AggState *aggstate)
713 Agg *node = (Agg *) aggstate->ss.ps.plan;
714 PlanState *outerPlan;
715 ExprContext *econtext;
716 ExprContext *tmpcontext;
717 ProjectionInfo *projInfo;
720 AggStatePerAgg peragg;
721 AggStatePerGroup pergroup;
722 TupleTableSlot *outerslot;
723 TupleTableSlot *firstSlot;
724 TupleTableSlot *resultSlot;
728 * get state info from node
730 outerPlan = outerPlanState(aggstate);
731 /* econtext is the per-output-tuple expression context */
732 econtext = aggstate->ss.ps.ps_ExprContext;
733 aggvalues = econtext->ecxt_aggvalues;
734 aggnulls = econtext->ecxt_aggnulls;
735 /* tmpcontext is the per-input-tuple expression context */
736 tmpcontext = aggstate->tmpcontext;
737 projInfo = aggstate->ss.ps.ps_ProjInfo;
738 peragg = aggstate->peragg;
739 pergroup = aggstate->pergroup;
740 firstSlot = aggstate->ss.ss_ScanTupleSlot;
743 * We loop retrieving groups until we find one matching
744 * aggstate->ss.ps.qual
748 if (aggstate->agg_done)
752 * If we don't already have the first tuple of the new group,
753 * fetch it from the outer plan.
755 if (aggstate->grp_firstTuple == NULL)
757 outerslot = ExecProcNode(outerPlan);
758 if (!TupIsNull(outerslot))
761 * Make a copy of the first input tuple; we will use this
762 * for comparisons (in group mode) and for projection.
764 aggstate->grp_firstTuple = heap_copytuple(outerslot->val);
768 /* outer plan produced no tuples at all */
769 aggstate->agg_done = true;
770 /* If we are grouping, we should produce no tuples too */
771 if (node->aggstrategy != AGG_PLAIN)
777 * Clear the per-output-tuple context for each group
779 ResetExprContext(econtext);
782 * Initialize working state for a new input tuple group
784 initialize_aggregates(aggstate, peragg, pergroup);
786 if (aggstate->grp_firstTuple != NULL)
789 * Store the copied first input tuple in the tuple table slot
790 * reserved for it. The tuple will be deleted when it is
791 * cleared from the slot.
793 ExecStoreTuple(aggstate->grp_firstTuple,
797 aggstate->grp_firstTuple = NULL; /* don't keep two pointers */
799 /* set up for first advance_aggregates call */
800 tmpcontext->ecxt_scantuple = firstSlot;
803 * Process each outer-plan tuple, and then fetch the next one,
804 * until we exhaust the outer plan or cross a group boundary.
808 advance_aggregates(aggstate, pergroup);
810 /* Reset per-input-tuple context after each tuple */
811 ResetExprContext(tmpcontext);
813 outerslot = ExecProcNode(outerPlan);
814 if (TupIsNull(outerslot))
816 /* no more outer-plan tuples available */
817 aggstate->agg_done = true;
820 /* set up for next advance_aggregates call */
821 tmpcontext->ecxt_scantuple = outerslot;
824 * If we are grouping, check whether we've crossed a group
827 if (node->aggstrategy == AGG_SORTED)
829 if (!execTuplesMatch(firstSlot->val,
831 firstSlot->ttc_tupleDescriptor,
832 node->numCols, node->grpColIdx,
833 aggstate->eqfunctions,
834 tmpcontext->ecxt_per_tuple_memory))
837 * Save the first input tuple of the next group.
839 aggstate->grp_firstTuple = heap_copytuple(outerslot->val);
847 * Done scanning input tuple group. Finalize each aggregate
848 * calculation, and stash results in the per-output-tuple context.
850 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
852 AggStatePerAgg peraggstate = &peragg[aggno];
853 AggStatePerGroup pergroupstate = &pergroup[aggno];
855 if (peraggstate->aggref->aggdistinct)
856 process_sorted_aggregate(aggstate, peraggstate, pergroupstate);
858 finalize_aggregate(aggstate, peraggstate, pergroupstate,
859 &aggvalues[aggno], &aggnulls[aggno]);
863 * If we have no first tuple (ie, the outerPlan didn't return
864 * anything), create a dummy all-nulls input tuple for use by
865 * ExecProject. 99.44% of the time this is a waste of cycles,
866 * because ordinarily the projected output tuple's targetlist
867 * cannot contain any direct (non-aggregated) references to
868 * input columns, so the dummy tuple will not be referenced.
869 * However there are special cases where this isn't so --- in
870 * particular an UPDATE involving an aggregate will have a
871 * targetlist reference to ctid. We need to return a null for
872 * ctid in that situation, not coredump.
874 * The values returned for the aggregates will be the initial
875 * values of the transition functions.
877 if (TupIsNull(firstSlot))
881 /* Should only happen in non-grouped mode */
882 Assert(node->aggstrategy == AGG_PLAIN);
883 Assert(aggstate->agg_done);
885 tupType = firstSlot->ttc_tupleDescriptor;
886 /* watch out for zero-column input tuples, though... */
887 if (tupType && tupType->natts > 0)
889 HeapTuple nullsTuple;
893 dvalues = (Datum *) palloc0(sizeof(Datum) * tupType->natts);
894 dnulls = (char *) palloc(sizeof(char) * tupType->natts);
895 MemSet(dnulls, 'n', sizeof(char) * tupType->natts);
896 nullsTuple = heap_formtuple(tupType, dvalues, dnulls);
897 ExecStoreTuple(nullsTuple,
907 * Form a projection tuple using the aggregate results and the
908 * representative input tuple. Store it in the result tuple slot.
909 * Note we do not support aggregates returning sets ...
911 econtext->ecxt_scantuple = firstSlot;
912 resultSlot = ExecProject(projInfo, NULL);
915 * If the completed tuple does not match the qualifications, it is
916 * ignored and we loop back to try to process another group.
917 * Otherwise, return the tuple.
920 while (!ExecQual(aggstate->ss.ps.qual, econtext, false));
926 * ExecAgg for hashed case: phase 1, read input and build hash table
929 agg_fill_hash_table(AggState *aggstate)
931 PlanState *outerPlan;
932 ExprContext *tmpcontext;
934 TupleTableSlot *outerslot;
937 * get state info from node
939 outerPlan = outerPlanState(aggstate);
940 /* tmpcontext is the per-input-tuple expression context */
941 tmpcontext = aggstate->tmpcontext;
944 * Process each outer-plan tuple, and then fetch the next one,
945 * until we exhaust the outer plan.
949 outerslot = ExecProcNode(outerPlan);
950 if (TupIsNull(outerslot))
952 /* set up for advance_aggregates call */
953 tmpcontext->ecxt_scantuple = outerslot;
955 /* Find or build hashtable entry for this tuple's group */
956 entry = lookup_hash_entry(aggstate, outerslot);
958 /* Advance the aggregates */
959 advance_aggregates(aggstate, entry->pergroup);
961 /* Reset per-input-tuple context after each tuple */
962 ResetExprContext(tmpcontext);
965 aggstate->table_filled = true;
966 /* Initialize to walk the hash table */
967 aggstate->next_hash_entry = NULL;
968 aggstate->next_hash_bucket = 0;
972 * ExecAgg for hashed case: phase 2, retrieving groups from hash table
974 static TupleTableSlot *
975 agg_retrieve_hash_table(AggState *aggstate)
977 ExprContext *econtext;
978 ProjectionInfo *projInfo;
981 AggStatePerAgg peragg;
982 AggStatePerGroup pergroup;
983 AggHashTable hashtable;
985 TupleTableSlot *firstSlot;
986 TupleTableSlot *resultSlot;
990 * get state info from node
992 /* econtext is the per-output-tuple expression context */
993 econtext = aggstate->ss.ps.ps_ExprContext;
994 aggvalues = econtext->ecxt_aggvalues;
995 aggnulls = econtext->ecxt_aggnulls;
996 projInfo = aggstate->ss.ps.ps_ProjInfo;
997 peragg = aggstate->peragg;
998 hashtable = aggstate->hashtable;
999 firstSlot = aggstate->ss.ss_ScanTupleSlot;
1002 * We loop retrieving groups until we find one satisfying
1003 * aggstate->ss.ps.qual
1007 if (aggstate->agg_done)
1011 * Find the next entry in the hash table
1013 entry = aggstate->next_hash_entry;
1014 while (entry == NULL)
1016 if (aggstate->next_hash_bucket >= hashtable->nbuckets)
1018 /* No more entries in hashtable, so done */
1019 aggstate->agg_done = TRUE;
1022 entry = hashtable->buckets[aggstate->next_hash_bucket++];
1024 aggstate->next_hash_entry = entry->next;
1027 * Clear the per-output-tuple context for each group
1029 ResetExprContext(econtext);
1032 * Store the copied first input tuple in the tuple table slot
1033 * reserved for it, so that it can be used in ExecProject.
1035 ExecStoreTuple(entry->firstTuple,
1040 pergroup = entry->pergroup;
1043 * Finalize each aggregate calculation, and stash results in the
1044 * per-output-tuple context.
1046 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
1048 AggStatePerAgg peraggstate = &peragg[aggno];
1049 AggStatePerGroup pergroupstate = &pergroup[aggno];
1051 Assert(!peraggstate->aggref->aggdistinct);
1052 finalize_aggregate(aggstate, peraggstate, pergroupstate,
1053 &aggvalues[aggno], &aggnulls[aggno]);
1057 * Form a projection tuple using the aggregate results and the
1058 * representative input tuple. Store it in the result tuple slot.
1059 * Note we do not support aggregates returning sets ...
1061 econtext->ecxt_scantuple = firstSlot;
1062 resultSlot = ExecProject(projInfo, NULL);
1065 * If the completed tuple does not match the qualifications, it is
1066 * ignored and we loop back to try to process another group.
1067 * Otherwise, return the tuple.
1070 while (!ExecQual(aggstate->ss.ps.qual, econtext, false));
1075 /* -----------------
1078 * Creates the run-time information for the agg node produced by the
1079 * planner and initializes its outer subtree
1083 ExecInitAgg(Agg *node, EState *estate)
1086 AggStatePerAgg peragg;
1088 ExprContext *econtext;
1094 * create state structure
1096 aggstate = makeNode(AggState);
1097 aggstate->ss.ps.plan = (Plan *) node;
1098 aggstate->ss.ps.state = estate;
1100 aggstate->aggs = NIL;
1101 aggstate->numaggs = 0;
1102 aggstate->eqfunctions = NULL;
1103 aggstate->peragg = NULL;
1104 aggstate->agg_done = false;
1105 aggstate->pergroup = NULL;
1106 aggstate->grp_firstTuple = NULL;
1107 aggstate->hashtable = NULL;
1110 * Create expression contexts. We need two, one for per-input-tuple
1111 * processing and one for per-output-tuple processing. We cheat a little
1112 * by using ExecAssignExprContext() to build both.
1114 ExecAssignExprContext(estate, &aggstate->ss.ps);
1115 aggstate->tmpcontext = aggstate->ss.ps.ps_ExprContext;
1116 ExecAssignExprContext(estate, &aggstate->ss.ps);
1119 * We also need a long-lived memory context for holding hashtable
1120 * data structures and transition values. NOTE: the details of what
1121 * is stored in aggcontext and what is stored in the regular per-query
1122 * memory context are driven by a simple decision: we want to reset the
1123 * aggcontext in ExecReScanAgg to recover no-longer-wanted space.
1125 aggstate->aggcontext =
1126 AllocSetContextCreate(CurrentMemoryContext,
1128 ALLOCSET_DEFAULT_MINSIZE,
1129 ALLOCSET_DEFAULT_INITSIZE,
1130 ALLOCSET_DEFAULT_MAXSIZE);
1132 #define AGG_NSLOTS 2
1135 * tuple table initialization
1137 ExecInitScanTupleSlot(estate, &aggstate->ss);
1138 ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
1141 * initialize child expressions
1143 * Note: ExecInitExpr finds Aggrefs for us, and also checks that no aggs
1144 * contain other agg calls in their arguments. This would make no sense
1145 * under SQL semantics anyway (and it's forbidden by the spec). Because
1146 * that is true, we don't need to worry about evaluating the aggs in any
1149 aggstate->ss.ps.targetlist = (List *)
1150 ExecInitExpr((Expr *) node->plan.targetlist,
1151 (PlanState *) aggstate);
1152 aggstate->ss.ps.qual = (List *)
1153 ExecInitExpr((Expr *) node->plan.qual,
1154 (PlanState *) aggstate);
1157 * initialize child nodes
1159 outerPlan = outerPlan(node);
1160 outerPlanState(aggstate) = ExecInitNode(outerPlan, estate);
1163 * initialize source tuple type.
1165 ExecAssignScanTypeFromOuterPlan(&aggstate->ss);
1168 * Initialize result tuple type and projection info.
1170 ExecAssignResultTypeFromTL(&aggstate->ss.ps);
1171 ExecAssignProjectionInfo(&aggstate->ss.ps);
1174 * get the count of aggregates in targetlist and quals
1176 numaggs = aggstate->numaggs;
1177 Assert(numaggs == length(aggstate->aggs));
1181 * This is not an error condition: we might be using the Agg node just
1182 * to do hash-based grouping. Even in the regular case,
1183 * constant-expression simplification could optimize away all of the
1184 * Aggrefs in the targetlist and qual. So keep going, but force local
1185 * copy of numaggs positive so that palloc()s below don't choke.
1191 * Set up aggregate-result storage in the output expr context, and also
1192 * allocate my private per-agg working storage
1194 econtext = aggstate->ss.ps.ps_ExprContext;
1195 econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numaggs);
1196 econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
1198 peragg = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
1199 aggstate->peragg = peragg;
1201 if (node->aggstrategy == AGG_HASHED)
1203 build_hash_table(aggstate);
1204 aggstate->table_filled = false;
1208 AggStatePerGroup pergroup;
1210 pergroup = (AggStatePerGroup) palloc0(sizeof(AggStatePerGroupData) * numaggs);
1211 aggstate->pergroup = pergroup;
1215 * If we are grouping, precompute fmgr lookup data for inner loop
1217 if (node->numCols > 0)
1219 aggstate->eqfunctions =
1220 execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
1226 * Perform lookups of aggregate function info, and initialize the
1227 * unchanging fields of the per-agg data
1230 foreach(alist, aggstate->aggs)
1232 AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(alist);
1233 Aggref *aggref = (Aggref *) aggrefstate->xprstate.expr;
1234 AggStatePerAgg peraggstate = &peragg[++aggno];
1236 Form_pg_aggregate aggform;
1237 AclResult aclresult;
1242 /* Mark Aggref state node with assigned index in the result array */
1243 aggrefstate->aggno = aggno;
1245 /* Fill in the peraggstate data */
1246 peraggstate->aggrefstate = aggrefstate;
1247 peraggstate->aggref = aggref;
1249 aggTuple = SearchSysCache(AGGFNOID,
1250 ObjectIdGetDatum(aggref->aggfnoid),
1252 if (!HeapTupleIsValid(aggTuple))
1253 elog(ERROR, "ExecAgg: cache lookup failed for aggregate %u",
1255 aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
1257 /* Check permission to call aggregate function */
1258 aclresult = pg_proc_aclcheck(aggref->aggfnoid, GetUserId(),
1260 if (aclresult != ACLCHECK_OK)
1261 aclcheck_error(aclresult, get_func_name(aggref->aggfnoid));
1263 get_typlenbyval(aggref->aggtype,
1264 &peraggstate->resulttypeLen,
1265 &peraggstate->resulttypeByVal);
1266 get_typlenbyval(aggform->aggtranstype,
1267 &peraggstate->transtypeLen,
1268 &peraggstate->transtypeByVal);
1271 * initval is potentially null, so don't try to access it as a
1272 * struct field. Must do it the hard way with SysCacheGetAttr.
1274 textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
1275 Anum_pg_aggregate_agginitval,
1276 &peraggstate->initValueIsNull);
1278 if (peraggstate->initValueIsNull)
1279 peraggstate->initValue = (Datum) 0;
1281 peraggstate->initValue = GetAggInitVal(textInitVal,
1282 aggform->aggtranstype);
1284 peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn;
1285 peraggstate->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
1287 fmgr_info(transfn_oid, &peraggstate->transfn);
1288 if (OidIsValid(finalfn_oid))
1289 fmgr_info(finalfn_oid, &peraggstate->finalfn);
1292 * If the transfn is strict and the initval is NULL, make sure
1293 * input type and transtype are the same (or at least binary-
1294 * compatible), so that it's OK to use the first input value as
1295 * the initial transValue. This should have been checked at agg
1296 * definition time, but just in case...
1298 if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull)
1301 * Note: use the type from the input expression here, not from
1302 * pg_proc.proargtypes, because the latter might be 0.
1303 * (Consider COUNT(*).)
1305 Oid inputType = exprType((Node *) aggref->target);
1307 if (!IsBinaryCoercible(inputType, aggform->aggtranstype))
1308 elog(ERROR, "Aggregate %u needs to have compatible input type and transition type",
1312 if (aggref->aggdistinct)
1315 * Note: use the type from the input expression here, not from
1316 * pg_proc.proargtypes, because the latter might be a pseudotype.
1317 * (Consider COUNT(*).)
1319 Oid inputType = exprType((Node *) aggref->target);
1322 /* We don't implement DISTINCT aggs in the HASHED case */
1323 Assert(node->aggstrategy != AGG_HASHED);
1325 peraggstate->inputType = inputType;
1326 get_typlenbyval(inputType,
1327 &peraggstate->inputtypeLen,
1328 &peraggstate->inputtypeByVal);
1330 eq_function = equality_oper_funcid(inputType);
1331 fmgr_info(eq_function, &(peraggstate->equalfn));
1332 peraggstate->sortOperator = ordering_oper_opid(inputType);
1333 peraggstate->sortstate = NULL;
1336 ReleaseSysCache(aggTuple);
1343 GetAggInitVal(Datum textInitVal, Oid transtype)
1351 strInitVal = DatumGetCString(DirectFunctionCall1(textout, textInitVal));
1353 tup = SearchSysCache(TYPEOID,
1354 ObjectIdGetDatum(transtype),
1356 if (!HeapTupleIsValid(tup))
1357 elog(ERROR, "GetAggInitVal: cache lookup failed on aggregate transition function return type %u", transtype);
1359 typinput = ((Form_pg_type) GETSTRUCT(tup))->typinput;
1360 typelem = ((Form_pg_type) GETSTRUCT(tup))->typelem;
1361 ReleaseSysCache(tup);
1363 initVal = OidFunctionCall3(typinput,
1364 CStringGetDatum(strInitVal),
1365 ObjectIdGetDatum(typelem),
1373 ExecCountSlotsAgg(Agg *node)
1375 return ExecCountSlotsNode(outerPlan(node)) +
1376 ExecCountSlotsNode(innerPlan(node)) +
1381 ExecEndAgg(AggState *node)
1383 PlanState *outerPlan;
1386 /* Make sure we have closed any open tuplesorts */
1387 for (aggno = 0; aggno < node->numaggs; aggno++)
1389 AggStatePerAgg peraggstate = &node->peragg[aggno];
1391 if (peraggstate->sortstate)
1392 tuplesort_end(peraggstate->sortstate);
1395 ExecFreeProjectionInfo(&node->ss.ps);
1398 * Free both the expr contexts.
1400 ExecFreeExprContext(&node->ss.ps);
1401 node->ss.ps.ps_ExprContext = node->tmpcontext;
1402 ExecFreeExprContext(&node->ss.ps);
1404 MemoryContextDelete(node->aggcontext);
1406 outerPlan = outerPlanState(node);
1407 ExecEndNode(outerPlan);
1409 /* clean up tuple table */
1410 ExecClearTuple(node->ss.ss_ScanTupleSlot);
1411 if (node->grp_firstTuple != NULL)
1413 heap_freetuple(node->grp_firstTuple);
1414 node->grp_firstTuple = NULL;
1419 ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
1421 ExprContext *econtext = node->ss.ps.ps_ExprContext;
1424 /* Make sure we have closed any open tuplesorts */
1425 for (aggno = 0; aggno < node->numaggs; aggno++)
1427 AggStatePerAgg peraggstate = &node->peragg[aggno];
1429 if (peraggstate->sortstate)
1430 tuplesort_end(peraggstate->sortstate);
1431 peraggstate->sortstate = NULL;
1434 node->agg_done = false;
1435 if (node->grp_firstTuple != NULL)
1437 heap_freetuple(node->grp_firstTuple);
1438 node->grp_firstTuple = NULL;
1440 MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
1441 MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
1443 MemoryContextReset(node->aggcontext);
1445 if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1447 build_hash_table(node);
1448 node->table_filled = false;
1452 * if chgParam of subnode is not null then plan will be re-scanned by
1453 * first ExecProcNode.
1455 if (((PlanState *) node)->lefttree->chgParam == NIL)
1456 ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
1460 * aggregate_dummy - dummy execution routine for aggregate functions
1462 * This function is listed as the implementation (prosrc field) of pg_proc
1463 * entries for aggregate functions. Its only purpose is to throw an error
1464 * if someone mistakenly executes such a function in the normal way.
1466 * Perhaps someday we could assign real meaning to the prosrc field of
1470 aggregate_dummy(PG_FUNCTION_ARGS)
1472 elog(ERROR, "Aggregate function %u called as normal function",
1473 fcinfo->flinfo->fn_oid);
1474 return (Datum) 0; /* keep compiler quiet */