1 /*-------------------------------------------------------------------------
4 * routines to handle WindowAgg nodes.
6 * A WindowAgg node evaluates "window functions" across suitable partitions
7 * of the input tuple set. Any one WindowAgg works for just a single window
8 * specification, though it can evaluate multiple window functions sharing
9 * identical window specifications. The input tuples are required to be
10 * delivered in sorted order, with the PARTITION BY columns (if any) as
11 * major sort keys and the ORDER BY columns (if any) as minor sort keys.
12 * (The planner generates a stack of WindowAggs with intervening Sort nodes
13 * as needed, if a query involves more than one window specification.)
15 * Since window functions can require access to any or all of the rows in
16 * the current partition, we accumulate rows of the partition into a
17 * tuplestore. The window functions are called using the WindowObject API
18 * so that they can access those rows as needed.
20 * We also support using plain aggregate functions as window functions.
21 * For these, the regular Agg-node environment is emulated for each partition.
22 * As required by the SQL spec, the output represents the value of the
23 * aggregate function over all rows in the current row's window frame.
26 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
27 * Portions Copyright (c) 1994, Regents of the University of California
30 * $PostgreSQL: pgsql/src/backend/executor/nodeWindowAgg.c,v 1.5 2009/06/11 14:48:57 momjian Exp $
32 *-------------------------------------------------------------------------
36 #include "catalog/pg_aggregate.h"
37 #include "catalog/pg_proc.h"
38 #include "catalog/pg_type.h"
39 #include "executor/executor.h"
40 #include "executor/nodeWindowAgg.h"
41 #include "miscadmin.h"
42 #include "nodes/nodeFuncs.h"
43 #include "optimizer/clauses.h"
44 #include "parser/parse_agg.h"
45 #include "parser/parse_coerce.h"
46 #include "utils/acl.h"
47 #include "utils/builtins.h"
48 #include "utils/datum.h"
49 #include "utils/lsyscache.h"
50 #include "utils/memutils.h"
51 #include "utils/syscache.h"
52 #include "windowapi.h"
55 * All the window function APIs are called with this object, which is passed
56 * to window functions as fcinfo->context.
58 typedef struct WindowObjectData
61 WindowAggState *winstate; /* parent WindowAggState */
62 List *argstates; /* ExprState trees for fn's arguments */
63 void *localmem; /* WinGetPartitionLocalMemory's chunk */
64 int markptr; /* tuplestore mark pointer for this fn */
65 int readptr; /* tuplestore read pointer for this fn */
66 int64 markpos; /* row that markptr is positioned on */
67 int64 seekpos; /* row that readptr is positioned on */
71 * We have one WindowStatePerFunc struct for each window function and
72 * window aggregate handled by this node.
74 typedef struct WindowStatePerFuncData
76 /* Links to WindowFunc expr and state nodes this working state is for */
77 WindowFuncExprState *wfuncstate;
80 int numArguments; /* number of arguments */
82 FmgrInfo flinfo; /* fmgr lookup data for window function */
85 * We need the len and byval info for the result of each function in order
86 * to know how to copy/delete values.
91 bool plain_agg; /* is it just a plain aggregate function? */
92 int aggno; /* if so, index of its PerAggData */
94 WindowObject winobj; /* object used in window function API */
95 } WindowStatePerFuncData;
98 * For plain aggregate window functions, we also have one of these.
100 typedef struct WindowStatePerAggData
102 /* Oids of transfer functions */
104 Oid finalfn_oid; /* may be InvalidOid */
107 * fmgr lookup data for transfer functions --- only valid when
108 * corresponding oid is not InvalidOid. Note in particular that fn_strict
109 * flags are kept here.
115 * initial value from pg_aggregate entry
118 bool initValueIsNull;
121 * cached value for current frame boundaries
124 bool resultValueIsNull;
127 * We need the len and byval info for the agg's input, result, and
128 * transition data types in order to know how to copy/delete values.
137 int wfuncno; /* index of associated PerFuncData */
139 /* Current transition value */
140 Datum transValue; /* current transition value */
141 bool transValueIsNull;
143 bool noTransValue; /* true if transValue not set yet */
144 } WindowStatePerAggData;
146 static void initialize_windowaggregate(WindowAggState *winstate,
147 WindowStatePerFunc perfuncstate,
148 WindowStatePerAgg peraggstate);
149 static void advance_windowaggregate(WindowAggState *winstate,
150 WindowStatePerFunc perfuncstate,
151 WindowStatePerAgg peraggstate);
152 static void finalize_windowaggregate(WindowAggState *winstate,
153 WindowStatePerFunc perfuncstate,
154 WindowStatePerAgg peraggstate,
155 Datum *result, bool *isnull);
157 static void eval_windowaggregates(WindowAggState *winstate);
158 static void eval_windowfunction(WindowAggState *winstate,
159 WindowStatePerFunc perfuncstate,
160 Datum *result, bool *isnull);
162 static void begin_partition(WindowAggState *winstate);
163 static void spool_tuples(WindowAggState *winstate, int64 pos);
164 static void release_partition(WindowAggState *winstate);
166 static bool row_is_in_frame(WindowAggState *winstate, int64 pos,
167 TupleTableSlot *slot);
168 static void update_frametailpos(WindowObject winobj, TupleTableSlot *slot);
170 static WindowStatePerAggData *initialize_peragg(WindowAggState *winstate,
172 WindowStatePerAgg peraggstate);
173 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
175 static bool are_peers(WindowAggState *winstate, TupleTableSlot *slot1,
176 TupleTableSlot *slot2);
177 static bool window_gettupleslot(WindowObject winobj, int64 pos,
178 TupleTableSlot *slot);
182 * initialize_windowaggregate
183 * parallel to initialize_aggregate in nodeAgg.c
186 initialize_windowaggregate(WindowAggState *winstate,
187 WindowStatePerFunc perfuncstate,
188 WindowStatePerAgg peraggstate)
190 MemoryContext oldContext;
192 if (peraggstate->initValueIsNull)
193 peraggstate->transValue = peraggstate->initValue;
196 oldContext = MemoryContextSwitchTo(winstate->wincontext);
197 peraggstate->transValue = datumCopy(peraggstate->initValue,
198 peraggstate->transtypeByVal,
199 peraggstate->transtypeLen);
200 MemoryContextSwitchTo(oldContext);
202 peraggstate->transValueIsNull = peraggstate->initValueIsNull;
203 peraggstate->noTransValue = peraggstate->initValueIsNull;
204 peraggstate->resultValueIsNull = true;
208 * advance_windowaggregate
209 * parallel to advance_aggregate in nodeAgg.c
212 advance_windowaggregate(WindowAggState *winstate,
213 WindowStatePerFunc perfuncstate,
214 WindowStatePerAgg peraggstate)
216 WindowFuncExprState *wfuncstate = perfuncstate->wfuncstate;
217 int numArguments = perfuncstate->numArguments;
218 FunctionCallInfoData fcinfodata;
219 FunctionCallInfo fcinfo = &fcinfodata;
223 MemoryContext oldContext;
224 ExprContext *econtext = winstate->tmpcontext;
226 oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
228 /* We start from 1, since the 0th arg will be the transition value */
230 foreach(arg, wfuncstate->args)
232 ExprState *argstate = (ExprState *) lfirst(arg);
234 fcinfo->arg[i] = ExecEvalExpr(argstate, econtext,
235 &fcinfo->argnull[i], NULL);
239 if (peraggstate->transfn.fn_strict)
242 * For a strict transfn, nothing happens when there's a NULL input; we
243 * just keep the prior transValue.
245 for (i = 1; i <= numArguments; i++)
247 if (fcinfo->argnull[i])
249 MemoryContextSwitchTo(oldContext);
253 if (peraggstate->noTransValue)
256 * transValue has not been initialized. This is the first non-NULL
257 * input value. We use it as the initial value for transValue. (We
258 * already checked that the agg's input type is binary-compatible
259 * with its transtype, so straight copy here is OK.)
261 * We must copy the datum into wincontext if it is pass-by-ref. We
262 * do not need to pfree the old transValue, since it's NULL.
264 MemoryContextSwitchTo(winstate->wincontext);
265 peraggstate->transValue = datumCopy(fcinfo->arg[1],
266 peraggstate->transtypeByVal,
267 peraggstate->transtypeLen);
268 peraggstate->transValueIsNull = false;
269 peraggstate->noTransValue = false;
270 MemoryContextSwitchTo(oldContext);
273 if (peraggstate->transValueIsNull)
276 * Don't call a strict function with NULL inputs. Note it is
277 * possible to get here despite the above tests, if the transfn is
278 * strict *and* returned a NULL on a prior cycle. If that happens
279 * we will propagate the NULL all the way to the end.
281 MemoryContextSwitchTo(oldContext);
287 * OK to call the transition function
289 InitFunctionCallInfoData(*fcinfo, &(peraggstate->transfn),
291 (void *) winstate, NULL);
292 fcinfo->arg[0] = peraggstate->transValue;
293 fcinfo->argnull[0] = peraggstate->transValueIsNull;
294 newVal = FunctionCallInvoke(fcinfo);
297 * If pass-by-ref datatype, must copy the new value into wincontext and
298 * pfree the prior transValue. But if transfn returned a pointer to its
299 * first input, we don't need to do anything.
301 if (!peraggstate->transtypeByVal &&
302 DatumGetPointer(newVal) != DatumGetPointer(peraggstate->transValue))
306 MemoryContextSwitchTo(winstate->wincontext);
307 newVal = datumCopy(newVal,
308 peraggstate->transtypeByVal,
309 peraggstate->transtypeLen);
311 if (!peraggstate->transValueIsNull)
312 pfree(DatumGetPointer(peraggstate->transValue));
315 MemoryContextSwitchTo(oldContext);
316 peraggstate->transValue = newVal;
317 peraggstate->transValueIsNull = fcinfo->isnull;
321 * finalize_windowaggregate
322 * parallel to finalize_aggregate in nodeAgg.c
325 finalize_windowaggregate(WindowAggState *winstate,
326 WindowStatePerFunc perfuncstate,
327 WindowStatePerAgg peraggstate,
328 Datum *result, bool *isnull)
330 MemoryContext oldContext;
332 oldContext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
335 * Apply the agg's finalfn if one is provided, else return transValue.
337 if (OidIsValid(peraggstate->finalfn_oid))
339 FunctionCallInfoData fcinfo;
341 InitFunctionCallInfoData(fcinfo, &(peraggstate->finalfn), 1,
342 (void *) winstate, NULL);
343 fcinfo.arg[0] = peraggstate->transValue;
344 fcinfo.argnull[0] = peraggstate->transValueIsNull;
345 if (fcinfo.flinfo->fn_strict && peraggstate->transValueIsNull)
347 /* don't call a strict function with NULL inputs */
353 *result = FunctionCallInvoke(&fcinfo);
354 *isnull = fcinfo.isnull;
359 *result = peraggstate->transValue;
360 *isnull = peraggstate->transValueIsNull;
364 * If result is pass-by-ref, make sure it is in the right context.
366 if (!peraggstate->resulttypeByVal && !*isnull &&
367 !MemoryContextContains(CurrentMemoryContext,
368 DatumGetPointer(*result)))
369 *result = datumCopy(*result,
370 peraggstate->resulttypeByVal,
371 peraggstate->resulttypeLen);
372 MemoryContextSwitchTo(oldContext);
376 * eval_windowaggregates
377 * evaluate plain aggregates being used as window functions
379 * Much of this is duplicated from nodeAgg.c. But NOTE that we expect to be
380 * able to call aggregate final functions repeatedly after aggregating more
381 * data onto the same transition value. This is not a behavior required by
385 eval_windowaggregates(WindowAggState *winstate)
387 WindowStatePerAgg peraggstate;
391 MemoryContext oldContext;
392 ExprContext *econtext;
393 TupleTableSlot *agg_row_slot;
395 numaggs = winstate->numaggs;
397 return; /* nothing to do */
399 /* final output execution is in ps_ExprContext */
400 econtext = winstate->ss.ps.ps_ExprContext;
403 * Currently, we support only a subset of the SQL-standard window framing
404 * rules. In all the supported cases, the window frame always consists of
405 * a contiguous group of rows extending forward from the start of the
406 * partition, and rows only enter the frame, never exit it, as the current
407 * row advances forward. This makes it possible to use an incremental
408 * strategy for evaluating aggregates: we run the transition function for
409 * each row added to the frame, and run the final function whenever we
410 * need the current aggregate value. This is considerably more efficient
411 * than the naive approach of re-running the entire aggregate calculation
412 * for each current row. It does assume that the final function doesn't
413 * damage the running transition value. (Some C-coded aggregates do that
414 * for efficiency's sake --- but they are supposed to do so only when
415 * their fcinfo->context is an AggState, not a WindowAggState.)
417 * In many common cases, multiple rows share the same frame and hence the
418 * same aggregate value. (In particular, if there's no ORDER BY in a RANGE
419 * window, then all rows are peers and so they all have window frame equal
420 * to the whole partition.) We optimize such cases by calculating the
421 * aggregate value once when we reach the first row of a peer group, and
422 * then returning the saved value for all subsequent rows.
424 * 'aggregatedupto' keeps track of the first row that has not yet been
425 * accumulated into the aggregate transition values. Whenever we start a
426 * new peer group, we accumulate forward to the end of the peer group.
428 * TODO: In the future, we should implement the full SQL-standard set of
429 * framing rules. We could implement the other cases by recalculating the
430 * aggregates whenever a row exits the frame. That would be pretty slow,
431 * though. For aggregates like SUM and COUNT we could implement a
432 * "negative transition function" that would be called for each row as it
433 * exits the frame. We'd have to think about avoiding recalculation of
434 * volatile arguments of aggregate functions, too.
438 * If we've already aggregated up through current row, reuse the saved
439 * result values. NOTE: this test works for the currently supported
440 * framing rules, but will need fixing when more are added.
442 if (winstate->aggregatedupto > winstate->currentpos)
444 for (i = 0; i < numaggs; i++)
446 peraggstate = &winstate->peragg[i];
447 wfuncno = peraggstate->wfuncno;
448 econtext->ecxt_aggvalues[wfuncno] = peraggstate->resultValue;
449 econtext->ecxt_aggnulls[wfuncno] = peraggstate->resultValueIsNull;
454 /* Initialize aggregates on first call for partition */
455 if (winstate->currentpos == 0)
457 for (i = 0; i < numaggs; i++)
459 peraggstate = &winstate->peragg[i];
460 wfuncno = peraggstate->wfuncno;
461 initialize_windowaggregate(winstate,
462 &winstate->perfunc[wfuncno],
468 * Advance until we reach a row not in frame (or end of partition).
470 * Note the loop invariant: agg_row_slot is either empty or holds the row
471 * at position aggregatedupto. The agg_ptr read pointer must always point
472 * to the next row to read into agg_row_slot.
474 agg_row_slot = winstate->agg_row_slot;
477 /* Fetch next row if we didn't already */
478 if (TupIsNull(agg_row_slot))
480 spool_tuples(winstate, winstate->aggregatedupto);
481 tuplestore_select_read_pointer(winstate->buffer,
483 if (!tuplestore_gettupleslot(winstate->buffer, true, true,
485 break; /* must be end of partition */
488 /* Exit loop (for now) if not in frame */
489 if (!row_is_in_frame(winstate, winstate->aggregatedupto, agg_row_slot))
492 /* Set tuple context for evaluation of aggregate arguments */
493 winstate->tmpcontext->ecxt_outertuple = agg_row_slot;
495 /* Accumulate row into the aggregates */
496 for (i = 0; i < numaggs; i++)
498 peraggstate = &winstate->peragg[i];
499 wfuncno = peraggstate->wfuncno;
500 advance_windowaggregate(winstate,
501 &winstate->perfunc[wfuncno],
505 /* Reset per-input-tuple context after each tuple */
506 ResetExprContext(winstate->tmpcontext);
508 /* And advance the aggregated-row state */
509 winstate->aggregatedupto++;
510 ExecClearTuple(agg_row_slot);
514 * finalize aggregates and fill result/isnull fields.
516 for (i = 0; i < numaggs; i++)
521 peraggstate = &winstate->peragg[i];
522 wfuncno = peraggstate->wfuncno;
523 result = &econtext->ecxt_aggvalues[wfuncno];
524 isnull = &econtext->ecxt_aggnulls[wfuncno];
525 finalize_windowaggregate(winstate,
526 &winstate->perfunc[wfuncno],
531 * save the result in case next row shares the same frame.
533 * XXX in some framing modes, eg ROWS/END_CURRENT_ROW, we can know in
534 * advance that the next row can't possibly share the same frame. Is
535 * it worth detecting that and skipping this code?
537 if (!peraggstate->resulttypeByVal)
540 * clear old resultValue in order not to leak memory. (Note: the
541 * new result can't possibly be the same datum as old resultValue,
542 * because we never passed it to the trans function.)
544 if (!peraggstate->resultValueIsNull)
545 pfree(DatumGetPointer(peraggstate->resultValue));
548 * If pass-by-ref, copy it into our global context.
552 oldContext = MemoryContextSwitchTo(winstate->wincontext);
553 peraggstate->resultValue =
555 peraggstate->resulttypeByVal,
556 peraggstate->resulttypeLen);
557 MemoryContextSwitchTo(oldContext);
562 peraggstate->resultValue = *result;
564 peraggstate->resultValueIsNull = *isnull;
569 * eval_windowfunction
571 * Arguments of window functions are not evaluated here, because a window
572 * function can need random access to arbitrary rows in the partition.
573 * The window function uses the special WinGetFuncArgInPartition and
574 * WinGetFuncArgInFrame functions to evaluate the arguments for the rows
578 eval_windowfunction(WindowAggState *winstate, WindowStatePerFunc perfuncstate,
579 Datum *result, bool *isnull)
581 FunctionCallInfoData fcinfo;
582 MemoryContext oldContext;
584 oldContext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
587 * We don't pass any normal arguments to a window function, but we do pass
588 * it the number of arguments, in order to permit window function
589 * implementations to support varying numbers of arguments. The real info
590 * goes through the WindowObject, which is passed via fcinfo->context.
592 InitFunctionCallInfoData(fcinfo, &(perfuncstate->flinfo),
593 perfuncstate->numArguments,
594 (void *) perfuncstate->winobj, NULL);
595 /* Just in case, make all the regular argument slots be null */
596 memset(fcinfo.argnull, true, perfuncstate->numArguments);
598 *result = FunctionCallInvoke(&fcinfo);
599 *isnull = fcinfo.isnull;
602 * Make sure pass-by-ref data is allocated in the appropriate context. (We
603 * need this in case the function returns a pointer into some short-lived
604 * tuple, as is entirely possible.)
606 if (!perfuncstate->resulttypeByVal && !fcinfo.isnull &&
607 !MemoryContextContains(CurrentMemoryContext,
608 DatumGetPointer(*result)))
609 *result = datumCopy(*result,
610 perfuncstate->resulttypeByVal,
611 perfuncstate->resulttypeLen);
613 MemoryContextSwitchTo(oldContext);
618 * Start buffering rows of the next partition.
621 begin_partition(WindowAggState *winstate)
623 PlanState *outerPlan = outerPlanState(winstate);
624 int numfuncs = winstate->numfuncs;
627 winstate->partition_spooled = false;
628 winstate->frametail_valid = false;
629 winstate->spooled_rows = 0;
630 winstate->currentpos = 0;
631 winstate->frametailpos = -1;
632 winstate->aggregatedupto = 0;
633 ExecClearTuple(winstate->agg_row_slot);
636 * If this is the very first partition, we need to fetch the first input
637 * row to store in first_part_slot.
639 if (TupIsNull(winstate->first_part_slot))
641 TupleTableSlot *outerslot = ExecProcNode(outerPlan);
643 if (!TupIsNull(outerslot))
644 ExecCopySlot(winstate->first_part_slot, outerslot);
647 /* outer plan is empty, so we have nothing to do */
648 winstate->partition_spooled = true;
649 winstate->more_partitions = false;
654 /* Create new tuplestore for this partition */
655 winstate->buffer = tuplestore_begin_heap(false, false, work_mem);
658 * Set up read pointers for the tuplestore. The current and agg pointers
659 * don't need BACKWARD capability, but the per-window-function read
662 winstate->current_ptr = 0; /* read pointer 0 is pre-allocated */
664 /* reset default REWIND capability bit for current ptr */
665 tuplestore_set_eflags(winstate->buffer, 0);
667 /* create a read pointer for aggregates, if needed */
668 if (winstate->numaggs > 0)
669 winstate->agg_ptr = tuplestore_alloc_read_pointer(winstate->buffer, 0);
671 /* create mark and read pointers for each real window function */
672 for (i = 0; i < numfuncs; i++)
674 WindowStatePerFunc perfuncstate = &(winstate->perfunc[i]);
676 if (!perfuncstate->plain_agg)
678 WindowObject winobj = perfuncstate->winobj;
680 winobj->markptr = tuplestore_alloc_read_pointer(winstate->buffer,
682 winobj->readptr = tuplestore_alloc_read_pointer(winstate->buffer,
684 winobj->markpos = -1;
685 winobj->seekpos = -1;
690 * Store the first tuple into the tuplestore (it's always available now;
691 * we either read it above, or saved it at the end of previous partition)
693 tuplestore_puttupleslot(winstate->buffer, winstate->first_part_slot);
694 winstate->spooled_rows++;
698 * Read tuples from the outer node, up to position 'pos', and store them
699 * into the tuplestore. If pos is -1, reads the whole partition.
702 spool_tuples(WindowAggState *winstate, int64 pos)
704 WindowAgg *node = (WindowAgg *) winstate->ss.ps.plan;
705 PlanState *outerPlan;
706 TupleTableSlot *outerslot;
707 MemoryContext oldcontext;
709 if (!winstate->buffer)
710 return; /* just a safety check */
711 if (winstate->partition_spooled)
712 return; /* whole partition done already */
715 * If the tuplestore has spilled to disk, alternate reading and writing
716 * becomes quite expensive due to frequent buffer flushes. It's cheaper
717 * to force the entire partition to get spooled in one go.
719 * XXX this is a horrid kluge --- it'd be better to fix the performance
720 * problem inside tuplestore. FIXME
722 if (!tuplestore_in_memory(winstate->buffer))
725 outerPlan = outerPlanState(winstate);
727 /* Must be in query context to call outerplan or touch tuplestore */
728 oldcontext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_query_memory);
730 while (winstate->spooled_rows <= pos || pos == -1)
732 outerslot = ExecProcNode(outerPlan);
733 if (TupIsNull(outerslot))
735 /* reached the end of the last partition */
736 winstate->partition_spooled = true;
737 winstate->more_partitions = false;
741 if (node->partNumCols > 0)
743 /* Check if this tuple still belongs to the current partition */
744 if (!execTuplesMatch(winstate->first_part_slot,
746 node->partNumCols, node->partColIdx,
747 winstate->partEqfunctions,
748 winstate->tmpcontext->ecxt_per_tuple_memory))
751 * end of partition; copy the tuple for the next cycle.
753 ExecCopySlot(winstate->first_part_slot, outerslot);
754 winstate->partition_spooled = true;
755 winstate->more_partitions = true;
760 /* Still in partition, so save it into the tuplestore */
761 tuplestore_puttupleslot(winstate->buffer, outerslot);
762 winstate->spooled_rows++;
765 MemoryContextSwitchTo(oldcontext);
770 * clear information kept within a partition, including
771 * tuplestore and aggregate results.
774 release_partition(WindowAggState *winstate)
778 for (i = 0; i < winstate->numfuncs; i++)
780 WindowStatePerFunc perfuncstate = &(winstate->perfunc[i]);
782 /* Release any partition-local state of this window function */
783 if (perfuncstate->winobj)
784 perfuncstate->winobj->localmem = NULL;
788 * Release all partition-local memory (in particular, any partition-local
789 * state that we might have trashed our pointers to in the above loop, and
790 * any aggregate temp data). We don't rely on retail pfree because some
791 * aggregates might have allocated data we don't have direct pointers to.
793 MemoryContextResetAndDeleteChildren(winstate->wincontext);
795 if (winstate->buffer)
796 tuplestore_end(winstate->buffer);
797 winstate->buffer = NULL;
798 winstate->partition_spooled = false;
803 * Determine whether a row is in the current row's window frame according
804 * to our window framing rule
806 * The caller must have already determined that the row is in the partition
807 * and fetched it into a slot. This function just encapsulates the framing
811 row_is_in_frame(WindowAggState *winstate, int64 pos, TupleTableSlot *slot)
813 WindowAgg *node = (WindowAgg *) winstate->ss.ps.plan;
814 int frameOptions = node->frameOptions;
816 Assert(pos >= 0); /* else caller error */
818 /* We only support frame start mode UNBOUNDED PRECEDING for now */
819 Assert(frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING);
821 /* In UNBOUNDED FOLLOWING mode, all partition rows are in frame */
822 if (frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING)
825 /* Else frame tail mode must be CURRENT ROW */
826 Assert(frameOptions & FRAMEOPTION_END_CURRENT_ROW);
828 /* if row is current row or a predecessor, it must be in frame */
829 if (pos <= winstate->currentpos)
832 /* In ROWS mode, *only* such rows are in frame */
833 if (frameOptions & FRAMEOPTION_ROWS)
836 /* Else must be RANGE mode */
837 Assert(frameOptions & FRAMEOPTION_RANGE);
839 /* In frame iff it's a peer of current row */
840 return are_peers(winstate, slot, winstate->ss.ss_ScanTupleSlot);
844 * update_frametailpos
845 * make frametailpos valid for the current row
847 * Uses the winobj's read pointer for any required fetches; the winobj's
848 * mark must not be past the currently known frame tail. Also uses the
849 * specified slot for any required fetches.
852 update_frametailpos(WindowObject winobj, TupleTableSlot *slot)
854 WindowAggState *winstate = winobj->winstate;
855 WindowAgg *node = (WindowAgg *) winstate->ss.ps.plan;
856 int frameOptions = node->frameOptions;
859 if (winstate->frametail_valid)
860 return; /* already known for current row */
862 /* We only support frame start mode UNBOUNDED PRECEDING for now */
863 Assert(frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING);
865 /* In UNBOUNDED FOLLOWING mode, all partition rows are in frame */
866 if (frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING)
868 spool_tuples(winstate, -1);
869 winstate->frametailpos = winstate->spooled_rows - 1;
870 winstate->frametail_valid = true;
874 /* Else frame tail mode must be CURRENT ROW */
875 Assert(frameOptions & FRAMEOPTION_END_CURRENT_ROW);
877 /* In ROWS mode, exactly the rows up to current are in frame */
878 if (frameOptions & FRAMEOPTION_ROWS)
880 winstate->frametailpos = winstate->currentpos;
881 winstate->frametail_valid = true;
885 /* Else must be RANGE mode */
886 Assert(frameOptions & FRAMEOPTION_RANGE);
888 /* If no ORDER BY, all rows are peers with each other */
889 if (node->ordNumCols == 0)
891 spool_tuples(winstate, -1);
892 winstate->frametailpos = winstate->spooled_rows - 1;
893 winstate->frametail_valid = true;
898 * Else we have to search for the first non-peer of the current row. We
899 * assume the current value of frametailpos is a lower bound on the
900 * possible frame tail location, ie, frame tail never goes backward, and
901 * that currentpos is also a lower bound, ie, current row is always in
904 ftnext = Max(winstate->frametailpos, winstate->currentpos) + 1;
907 if (!window_gettupleslot(winobj, ftnext, slot))
908 break; /* end of partition */
909 if (!are_peers(winstate, slot, winstate->ss.ss_ScanTupleSlot))
910 break; /* not peer of current row */
913 winstate->frametailpos = ftnext - 1;
914 winstate->frametail_valid = true;
921 * ExecWindowAgg receives tuples from its outer subplan and
922 * stores them into a tuplestore, then processes window functions.
923 * This node doesn't reduce nor qualify any row so the number of
924 * returned rows is exactly the same as its outer subplan's result
925 * (ignoring the case of SRFs in the targetlist, that is).
929 ExecWindowAgg(WindowAggState *winstate)
931 TupleTableSlot *result;
933 ExprContext *econtext;
937 if (winstate->all_done)
941 * Check to see if we're still projecting out tuples from a previous
942 * output tuple (because there is a function-returning-set in the
943 * projection expressions). If so, try to project another one.
945 if (winstate->ss.ps.ps_TupFromTlist)
947 TupleTableSlot *result;
950 result = ExecProject(winstate->ss.ps.ps_ProjInfo, &isDone);
951 if (isDone == ExprMultipleResult)
953 /* Done with that source tuple... */
954 winstate->ss.ps.ps_TupFromTlist = false;
958 if (winstate->buffer == NULL)
960 /* Initialize for first partition and set current row = 0 */
961 begin_partition(winstate);
962 /* If there are no input rows, we'll detect that and exit below */
966 /* Advance current row within partition */
967 winstate->currentpos++;
968 /* This might mean that the frame tail moves, too */
969 winstate->frametail_valid = false;
973 * Spool all tuples up to and including the current row, if we haven't
976 spool_tuples(winstate, winstate->currentpos);
978 /* Move to the next partition if we reached the end of this partition */
979 if (winstate->partition_spooled &&
980 winstate->currentpos >= winstate->spooled_rows)
982 release_partition(winstate);
984 if (winstate->more_partitions)
986 begin_partition(winstate);
987 Assert(winstate->spooled_rows > 0);
991 winstate->all_done = true;
996 /* final output execution is in ps_ExprContext */
997 econtext = winstate->ss.ps.ps_ExprContext;
999 /* Clear the per-output-tuple context for current row */
1000 ResetExprContext(econtext);
1003 * Read the current row from the tuplestore, and save in ScanTupleSlot.
1004 * (We can't rely on the outerplan's output slot because we may have to
1005 * read beyond the current row. Also, we have to actually copy the row
1006 * out of the tuplestore, since window function evaluation might cause the
1007 * tuplestore to dump its state to disk.)
1009 * Current row must be in the tuplestore, since we spooled it above.
1011 tuplestore_select_read_pointer(winstate->buffer, winstate->current_ptr);
1012 if (!tuplestore_gettupleslot(winstate->buffer, true, true,
1013 winstate->ss.ss_ScanTupleSlot))
1014 elog(ERROR, "unexpected end of tuplestore");
1017 * Evaluate true window functions
1019 numfuncs = winstate->numfuncs;
1020 for (i = 0; i < numfuncs; i++)
1022 WindowStatePerFunc perfuncstate = &(winstate->perfunc[i]);
1024 if (perfuncstate->plain_agg)
1026 eval_windowfunction(winstate, perfuncstate,
1027 &(econtext->ecxt_aggvalues[perfuncstate->wfuncstate->wfuncno]),
1028 &(econtext->ecxt_aggnulls[perfuncstate->wfuncstate->wfuncno]));
1032 * Evaluate aggregates
1034 if (winstate->numaggs > 0)
1035 eval_windowaggregates(winstate);
1038 * Truncate any no-longer-needed rows from the tuplestore.
1040 tuplestore_trim(winstate->buffer);
1043 * Form and return a projection tuple using the windowfunc results and the
1044 * current row. Setting ecxt_outertuple arranges that any Vars will be
1045 * evaluated with respect to that row.
1047 econtext->ecxt_outertuple = winstate->ss.ss_ScanTupleSlot;
1048 result = ExecProject(winstate->ss.ps.ps_ProjInfo, &isDone);
1050 if (isDone == ExprEndResult)
1052 /* SRF in tlist returned no rows, so advance to next input tuple */
1056 winstate->ss.ps.ps_TupFromTlist =
1057 (isDone == ExprMultipleResult);
1061 /* -----------------
1064 * Creates the run-time information for the WindowAgg node produced by the
1065 * planner and initializes its outer subtree
1069 ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
1071 WindowAggState *winstate;
1073 ExprContext *econtext;
1074 ExprContext *tmpcontext;
1075 WindowStatePerFunc perfunc;
1076 WindowStatePerAgg peragg;
1083 /* check for unsupported flags */
1084 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
1087 * create state structure
1089 winstate = makeNode(WindowAggState);
1090 winstate->ss.ps.plan = (Plan *) node;
1091 winstate->ss.ps.state = estate;
1094 * Create expression contexts. We need two, one for per-input-tuple
1095 * processing and one for per-output-tuple processing. We cheat a little
1096 * by using ExecAssignExprContext() to build both.
1098 ExecAssignExprContext(estate, &winstate->ss.ps);
1099 tmpcontext = winstate->ss.ps.ps_ExprContext;
1100 winstate->tmpcontext = tmpcontext;
1101 ExecAssignExprContext(estate, &winstate->ss.ps);
1103 /* Create long-lived context for storage of aggregate transvalues etc */
1104 winstate->wincontext =
1105 AllocSetContextCreate(CurrentMemoryContext,
1107 ALLOCSET_DEFAULT_MINSIZE,
1108 ALLOCSET_DEFAULT_INITSIZE,
1109 ALLOCSET_DEFAULT_MAXSIZE);
1111 #define WINDOWAGG_NSLOTS 6
1114 * tuple table initialization
1116 ExecInitScanTupleSlot(estate, &winstate->ss);
1117 ExecInitResultTupleSlot(estate, &winstate->ss.ps);
1118 winstate->first_part_slot = ExecInitExtraTupleSlot(estate);
1119 winstate->agg_row_slot = ExecInitExtraTupleSlot(estate);
1120 winstate->temp_slot_1 = ExecInitExtraTupleSlot(estate);
1121 winstate->temp_slot_2 = ExecInitExtraTupleSlot(estate);
1123 winstate->ss.ps.targetlist = (List *)
1124 ExecInitExpr((Expr *) node->plan.targetlist,
1125 (PlanState *) winstate);
1128 * WindowAgg nodes never have quals, since they can only occur at the
1129 * logical top level of a query (ie, after any WHERE or HAVING filters)
1131 Assert(node->plan.qual == NIL);
1132 winstate->ss.ps.qual = NIL;
1135 * initialize child nodes
1137 outerPlan = outerPlan(node);
1138 outerPlanState(winstate) = ExecInitNode(outerPlan, estate, eflags);
1141 * initialize source tuple type (which is also the tuple type that we'll
1142 * store in the tuplestore and use in all our working slots).
1144 ExecAssignScanTypeFromOuterPlan(&winstate->ss);
1146 ExecSetSlotDescriptor(winstate->first_part_slot,
1147 winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor);
1148 ExecSetSlotDescriptor(winstate->agg_row_slot,
1149 winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor);
1150 ExecSetSlotDescriptor(winstate->temp_slot_1,
1151 winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor);
1152 ExecSetSlotDescriptor(winstate->temp_slot_2,
1153 winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor);
1156 * Initialize result tuple type and projection info.
1158 ExecAssignResultTypeFromTL(&winstate->ss.ps);
1159 ExecAssignProjectionInfo(&winstate->ss.ps, NULL);
1161 winstate->ss.ps.ps_TupFromTlist = false;
1163 /* Set up data for comparing tuples */
1164 if (node->partNumCols > 0)
1165 winstate->partEqfunctions = execTuplesMatchPrepare(node->partNumCols,
1166 node->partOperators);
1167 if (node->ordNumCols > 0)
1168 winstate->ordEqfunctions = execTuplesMatchPrepare(node->ordNumCols,
1169 node->ordOperators);
1172 * WindowAgg nodes use aggvalues and aggnulls as well as Agg nodes.
1174 numfuncs = winstate->numfuncs;
1175 numaggs = winstate->numaggs;
1176 econtext = winstate->ss.ps.ps_ExprContext;
1177 econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numfuncs);
1178 econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numfuncs);
1181 * allocate per-wfunc/per-agg state information.
1183 perfunc = (WindowStatePerFunc) palloc0(sizeof(WindowStatePerFuncData) * numfuncs);
1184 peragg = (WindowStatePerAgg) palloc0(sizeof(WindowStatePerAggData) * numaggs);
1185 winstate->perfunc = perfunc;
1186 winstate->peragg = peragg;
1190 foreach(l, winstate->funcs)
1192 WindowFuncExprState *wfuncstate = (WindowFuncExprState *) lfirst(l);
1193 WindowFunc *wfunc = (WindowFunc *) wfuncstate->xprstate.expr;
1194 WindowStatePerFunc perfuncstate;
1195 AclResult aclresult;
1198 if (wfunc->winref != node->winref) /* planner screwed up? */
1199 elog(ERROR, "WindowFunc with winref %u assigned to WindowAgg with winref %u",
1200 wfunc->winref, node->winref);
1202 /* Look for a previous duplicate window function */
1203 for (i = 0; i <= wfuncno; i++)
1205 if (equal(wfunc, perfunc[i].wfunc) &&
1206 !contain_volatile_functions((Node *) wfunc))
1211 /* Found a match to an existing entry, so just mark it */
1212 wfuncstate->wfuncno = i;
1216 /* Nope, so assign a new PerAgg record */
1217 perfuncstate = &perfunc[++wfuncno];
1219 /* Mark WindowFunc state node with assigned index in the result array */
1220 wfuncstate->wfuncno = wfuncno;
1222 /* Check permission to call window function */
1223 aclresult = pg_proc_aclcheck(wfunc->winfnoid, GetUserId(),
1225 if (aclresult != ACLCHECK_OK)
1226 aclcheck_error(aclresult, ACL_KIND_PROC,
1227 get_func_name(wfunc->winfnoid));
1229 /* Fill in the perfuncstate data */
1230 perfuncstate->wfuncstate = wfuncstate;
1231 perfuncstate->wfunc = wfunc;
1232 perfuncstate->numArguments = list_length(wfuncstate->args);
1234 fmgr_info_cxt(wfunc->winfnoid, &perfuncstate->flinfo,
1235 tmpcontext->ecxt_per_query_memory);
1236 perfuncstate->flinfo.fn_expr = (Node *) wfunc;
1237 get_typlenbyval(wfunc->wintype,
1238 &perfuncstate->resulttypeLen,
1239 &perfuncstate->resulttypeByVal);
1242 * If it's really just a plain aggregate function, we'll emulate the
1243 * Agg environment for it.
1245 perfuncstate->plain_agg = wfunc->winagg;
1248 WindowStatePerAgg peraggstate;
1250 perfuncstate->aggno = ++aggno;
1251 peraggstate = &winstate->peragg[aggno];
1252 initialize_peragg(winstate, wfunc, peraggstate);
1253 peraggstate->wfuncno = wfuncno;
1257 WindowObject winobj = makeNode(WindowObjectData);
1259 winobj->winstate = winstate;
1260 winobj->argstates = wfuncstate->args;
1261 winobj->localmem = NULL;
1262 perfuncstate->winobj = winobj;
1266 /* Update numfuncs, numaggs to match number of unique functions found */
1267 winstate->numfuncs = wfuncno + 1;
1268 winstate->numaggs = aggno + 1;
1270 winstate->partition_spooled = false;
1271 winstate->more_partitions = false;
1276 /* -----------------
1277 * ExecCountSlotsWindowAgg
1281 ExecCountSlotsWindowAgg(WindowAgg *node)
1283 return ExecCountSlotsNode(outerPlan(node)) +
1284 ExecCountSlotsNode(innerPlan(node)) +
1288 /* -----------------
1293 ExecEndWindowAgg(WindowAggState *node)
1295 PlanState *outerPlan;
1297 release_partition(node);
1299 pfree(node->perfunc);
1300 pfree(node->peragg);
1302 ExecClearTuple(node->ss.ss_ScanTupleSlot);
1303 ExecClearTuple(node->first_part_slot);
1304 ExecClearTuple(node->agg_row_slot);
1305 ExecClearTuple(node->temp_slot_1);
1306 ExecClearTuple(node->temp_slot_2);
1309 * Free both the expr contexts.
1311 ExecFreeExprContext(&node->ss.ps);
1312 node->ss.ps.ps_ExprContext = node->tmpcontext;
1313 ExecFreeExprContext(&node->ss.ps);
1315 MemoryContextDelete(node->wincontext);
1317 outerPlan = outerPlanState(node);
1318 ExecEndNode(outerPlan);
1321 /* -----------------
1322 * ExecRescanWindowAgg
1326 ExecReScanWindowAgg(WindowAggState *node, ExprContext *exprCtxt)
1328 ExprContext *econtext = node->ss.ps.ps_ExprContext;
1330 node->all_done = false;
1332 node->ss.ps.ps_TupFromTlist = false;
1334 /* release tuplestore et al */
1335 release_partition(node);
1337 /* release all temp tuples, but especially first_part_slot */
1338 ExecClearTuple(node->ss.ss_ScanTupleSlot);
1339 ExecClearTuple(node->first_part_slot);
1340 ExecClearTuple(node->agg_row_slot);
1341 ExecClearTuple(node->temp_slot_1);
1342 ExecClearTuple(node->temp_slot_2);
1344 /* Forget current wfunc values */
1345 MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numfuncs);
1346 MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numfuncs);
1349 * if chgParam of subnode is not null then plan will be re-scanned by
1350 * first ExecProcNode.
1352 if (((PlanState *) node)->lefttree->chgParam == NULL)
1353 ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
1359 * Almost same as in nodeAgg.c, except we don't support DISTINCT currently.
1361 static WindowStatePerAggData *
1362 initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc,
1363 WindowStatePerAgg peraggstate)
1365 Oid inputTypes[FUNC_MAX_ARGS];
1368 Form_pg_aggregate aggform;
1370 AclResult aclresult;
1379 numArguments = list_length(wfunc->args);
1382 foreach(lc, wfunc->args)
1384 inputTypes[i++] = exprType((Node *) lfirst(lc));
1387 aggTuple = SearchSysCache(AGGFNOID,
1388 ObjectIdGetDatum(wfunc->winfnoid),
1390 if (!HeapTupleIsValid(aggTuple))
1391 elog(ERROR, "cache lookup failed for aggregate %u",
1393 aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
1396 * ExecInitWindowAgg already checked permission to call aggregate function
1397 * ... but we still need to check the component functions
1400 peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn;
1401 peraggstate->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
1403 /* Check that aggregate owner has permission to call component fns */
1405 HeapTuple procTuple;
1408 procTuple = SearchSysCache(PROCOID,
1409 ObjectIdGetDatum(wfunc->winfnoid),
1411 if (!HeapTupleIsValid(procTuple))
1412 elog(ERROR, "cache lookup failed for function %u",
1414 aggOwner = ((Form_pg_proc) GETSTRUCT(procTuple))->proowner;
1415 ReleaseSysCache(procTuple);
1417 aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
1419 if (aclresult != ACLCHECK_OK)
1420 aclcheck_error(aclresult, ACL_KIND_PROC,
1421 get_func_name(transfn_oid));
1422 if (OidIsValid(finalfn_oid))
1424 aclresult = pg_proc_aclcheck(finalfn_oid, aggOwner,
1426 if (aclresult != ACLCHECK_OK)
1427 aclcheck_error(aclresult, ACL_KIND_PROC,
1428 get_func_name(finalfn_oid));
1432 /* resolve actual type of transition state, if polymorphic */
1433 aggtranstype = aggform->aggtranstype;
1434 if (IsPolymorphicType(aggtranstype))
1436 /* have to fetch the agg's declared input types... */
1437 Oid *declaredArgTypes;
1440 get_func_signature(wfunc->winfnoid,
1441 &declaredArgTypes, &agg_nargs);
1442 Assert(agg_nargs == numArguments);
1443 aggtranstype = enforce_generic_type_consistency(inputTypes,
1448 pfree(declaredArgTypes);
1451 /* build expression trees using actual argument & result types */
1452 build_aggregate_fnexprs(inputTypes,
1461 fmgr_info(transfn_oid, &peraggstate->transfn);
1462 peraggstate->transfn.fn_expr = (Node *) transfnexpr;
1464 if (OidIsValid(finalfn_oid))
1466 fmgr_info(finalfn_oid, &peraggstate->finalfn);
1467 peraggstate->finalfn.fn_expr = (Node *) finalfnexpr;
1470 get_typlenbyval(wfunc->wintype,
1471 &peraggstate->resulttypeLen,
1472 &peraggstate->resulttypeByVal);
1473 get_typlenbyval(aggtranstype,
1474 &peraggstate->transtypeLen,
1475 &peraggstate->transtypeByVal);
1478 * initval is potentially null, so don't try to access it as a struct
1479 * field. Must do it the hard way with SysCacheGetAttr.
1481 textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
1482 Anum_pg_aggregate_agginitval,
1483 &peraggstate->initValueIsNull);
1485 if (peraggstate->initValueIsNull)
1486 peraggstate->initValue = (Datum) 0;
1488 peraggstate->initValue = GetAggInitVal(textInitVal,
1492 * If the transfn is strict and the initval is NULL, make sure input type
1493 * and transtype are the same (or at least binary-compatible), so that
1494 * it's OK to use the first input value as the initial transValue. This
1495 * should have been checked at agg definition time, but just in case...
1497 if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull)
1499 if (numArguments < 1 ||
1500 !IsBinaryCoercible(inputTypes[0], aggtranstype))
1502 (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
1503 errmsg("aggregate %u needs to have compatible input type and transition type",
1507 ReleaseSysCache(aggTuple);
1513 GetAggInitVal(Datum textInitVal, Oid transtype)
1520 getTypeInputInfo(transtype, &typinput, &typioparam);
1521 strInitVal = TextDatumGetCString(textInitVal);
1522 initVal = OidInputFunctionCall(typinput, strInitVal,
1530 * compare two rows to see if they are equal according to the ORDER BY clause
1532 * NB: this does not consider the window frame mode.
1535 are_peers(WindowAggState *winstate, TupleTableSlot *slot1,
1536 TupleTableSlot *slot2)
1538 WindowAgg *node = (WindowAgg *) winstate->ss.ps.plan;
1540 /* If no ORDER BY, all rows are peers with each other */
1541 if (node->ordNumCols == 0)
1544 return execTuplesMatch(slot1, slot2,
1545 node->ordNumCols, node->ordColIdx,
1546 winstate->ordEqfunctions,
1547 winstate->tmpcontext->ecxt_per_tuple_memory);
1551 * window_gettupleslot
1552 * Fetch the pos'th tuple of the current partition into the slot,
1553 * using the winobj's read pointer
1555 * Returns true if successful, false if no such row
1558 window_gettupleslot(WindowObject winobj, int64 pos, TupleTableSlot *slot)
1560 WindowAggState *winstate = winobj->winstate;
1561 MemoryContext oldcontext;
1563 /* Don't allow passing -1 to spool_tuples here */
1567 /* If necessary, fetch the tuple into the spool */
1568 spool_tuples(winstate, pos);
1570 if (pos >= winstate->spooled_rows)
1573 if (pos < winobj->markpos)
1574 elog(ERROR, "cannot fetch row before WindowObject's mark position");
1576 oldcontext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_query_memory);
1578 tuplestore_select_read_pointer(winstate->buffer, winobj->readptr);
1581 * There's no API to refetch the tuple at the current position. We have to
1582 * move one tuple forward, and then one backward. (We don't do it the
1583 * other way because we might try to fetch the row before our mark, which
1586 if (winobj->seekpos == pos)
1588 tuplestore_advance(winstate->buffer, true);
1592 while (winobj->seekpos > pos)
1594 if (!tuplestore_gettupleslot(winstate->buffer, false, true, slot))
1595 elog(ERROR, "unexpected end of tuplestore");
1599 while (winobj->seekpos < pos)
1601 if (!tuplestore_gettupleslot(winstate->buffer, true, true, slot))
1602 elog(ERROR, "unexpected end of tuplestore");
1606 MemoryContextSwitchTo(oldcontext);
1612 /***********************************************************************
1613 * API exposed to window functions
1614 ***********************************************************************/
1618 * WinGetPartitionLocalMemory
1619 * Get working memory that lives till end of partition processing
1621 * On first call within a given partition, this allocates and zeroes the
1622 * requested amount of space. Subsequent calls just return the same chunk.
1624 * Memory obtained this way is normally used to hold state that should be
1625 * automatically reset for each new partition. If a window function wants
1626 * to hold state across the whole query, fcinfo->fn_extra can be used in the
1627 * usual way for that.
1630 WinGetPartitionLocalMemory(WindowObject winobj, Size sz)
1632 Assert(WindowObjectIsValid(winobj));
1633 if (winobj->localmem == NULL)
1634 winobj->localmem = MemoryContextAllocZero(winobj->winstate->wincontext,
1636 return winobj->localmem;
1640 * WinGetCurrentPosition
1641 * Return the current row's position (counting from 0) within the current
1645 WinGetCurrentPosition(WindowObject winobj)
1647 Assert(WindowObjectIsValid(winobj));
1648 return winobj->winstate->currentpos;
1652 * WinGetPartitionRowCount
1653 * Return total number of rows contained in the current partition.
1655 * Note: this is a relatively expensive operation because it forces the
1656 * whole partition to be "spooled" into the tuplestore at once. Once
1657 * executed, however, additional calls within the same partition are cheap.
1660 WinGetPartitionRowCount(WindowObject winobj)
1662 Assert(WindowObjectIsValid(winobj));
1663 spool_tuples(winobj->winstate, -1);
1664 return winobj->winstate->spooled_rows;
1668 * WinSetMarkPosition
1669 * Set the "mark" position for the window object, which is the oldest row
1670 * number (counting from 0) it is allowed to fetch during all subsequent
1671 * operations within the current partition.
1673 * Window functions do not have to call this, but are encouraged to move the
1674 * mark forward when possible to keep the tuplestore size down and prevent
1675 * having to spill rows to disk.
1678 WinSetMarkPosition(WindowObject winobj, int64 markpos)
1680 WindowAggState *winstate;
1682 Assert(WindowObjectIsValid(winobj));
1683 winstate = winobj->winstate;
1685 if (markpos < winobj->markpos)
1686 elog(ERROR, "cannot move WindowObject's mark position backward");
1687 tuplestore_select_read_pointer(winstate->buffer, winobj->markptr);
1688 while (markpos > winobj->markpos)
1690 tuplestore_advance(winstate->buffer, true);
1693 tuplestore_select_read_pointer(winstate->buffer, winobj->readptr);
1694 while (markpos > winobj->seekpos)
1696 tuplestore_advance(winstate->buffer, true);
1703 * Compare two rows (specified by absolute position in window) to see
1704 * if they are equal according to the ORDER BY clause.
1706 * NB: this does not consider the window frame mode.
1709 WinRowsArePeers(WindowObject winobj, int64 pos1, int64 pos2)
1711 WindowAggState *winstate;
1713 TupleTableSlot *slot1;
1714 TupleTableSlot *slot2;
1717 Assert(WindowObjectIsValid(winobj));
1718 winstate = winobj->winstate;
1719 node = (WindowAgg *) winstate->ss.ps.plan;
1721 /* If no ORDER BY, all rows are peers; don't bother to fetch them */
1722 if (node->ordNumCols == 0)
1725 slot1 = winstate->temp_slot_1;
1726 slot2 = winstate->temp_slot_2;
1728 if (!window_gettupleslot(winobj, pos1, slot1))
1729 elog(ERROR, "specified position is out of window: " INT64_FORMAT,
1731 if (!window_gettupleslot(winobj, pos2, slot2))
1732 elog(ERROR, "specified position is out of window: " INT64_FORMAT,
1735 res = are_peers(winstate, slot1, slot2);
1737 ExecClearTuple(slot1);
1738 ExecClearTuple(slot2);
1744 * WinGetFuncArgInPartition
1745 * Evaluate a window function's argument expression on a specified
1746 * row of the partition. The row is identified in lseek(2) style,
1747 * i.e. relative to the current, first, or last row.
1749 * argno: argument number to evaluate (counted from 0)
1750 * relpos: signed rowcount offset from the seek position
1751 * seektype: WINDOW_SEEK_CURRENT, WINDOW_SEEK_HEAD, or WINDOW_SEEK_TAIL
1752 * set_mark: If the row is found and set_mark is true, the mark is moved to
1753 * the row as a side-effect.
1754 * isnull: output argument, receives isnull status of result
1755 * isout: output argument, set to indicate whether target row position
1756 * is out of partition (can pass NULL if caller doesn't care about this)
1758 * Specifying a nonexistent row is not an error, it just causes a null result
1759 * (plus setting *isout true, if isout isn't NULL).
1762 WinGetFuncArgInPartition(WindowObject winobj, int argno,
1763 int relpos, int seektype, bool set_mark,
1764 bool *isnull, bool *isout)
1766 WindowAggState *winstate;
1767 ExprContext *econtext;
1768 TupleTableSlot *slot;
1772 Assert(WindowObjectIsValid(winobj));
1773 winstate = winobj->winstate;
1774 econtext = winstate->ss.ps.ps_ExprContext;
1775 slot = winstate->temp_slot_1;
1779 case WINDOW_SEEK_CURRENT:
1780 abs_pos = winstate->currentpos + relpos;
1782 case WINDOW_SEEK_HEAD:
1785 case WINDOW_SEEK_TAIL:
1786 spool_tuples(winstate, -1);
1787 abs_pos = winstate->spooled_rows - 1 + relpos;
1790 elog(ERROR, "unrecognized window seek type: %d", seektype);
1791 abs_pos = 0; /* keep compiler quiet */
1795 gottuple = window_gettupleslot(winobj, abs_pos, slot);
1809 WinSetMarkPosition(winobj, abs_pos);
1810 econtext->ecxt_outertuple = slot;
1811 return ExecEvalExpr((ExprState *) list_nth(winobj->argstates, argno),
1812 econtext, isnull, NULL);
1817 * WinGetFuncArgInFrame
1818 * Evaluate a window function's argument expression on a specified
1819 * row of the window frame. The row is identified in lseek(2) style,
1820 * i.e. relative to the current, first, or last row.
1822 * argno: argument number to evaluate (counted from 0)
1823 * relpos: signed rowcount offset from the seek position
1824 * seektype: WINDOW_SEEK_CURRENT, WINDOW_SEEK_HEAD, or WINDOW_SEEK_TAIL
1825 * set_mark: If the row is found and set_mark is true, the mark is moved to
1826 * the row as a side-effect.
1827 * isnull: output argument, receives isnull status of result
1828 * isout: output argument, set to indicate whether target row position
1829 * is out of frame (can pass NULL if caller doesn't care about this)
1831 * Specifying a nonexistent row is not an error, it just causes a null result
1832 * (plus setting *isout true, if isout isn't NULL).
1835 WinGetFuncArgInFrame(WindowObject winobj, int argno,
1836 int relpos, int seektype, bool set_mark,
1837 bool *isnull, bool *isout)
1839 WindowAggState *winstate;
1840 ExprContext *econtext;
1841 TupleTableSlot *slot;
1845 Assert(WindowObjectIsValid(winobj));
1846 winstate = winobj->winstate;
1847 econtext = winstate->ss.ps.ps_ExprContext;
1848 slot = winstate->temp_slot_1;
1852 case WINDOW_SEEK_CURRENT:
1853 abs_pos = winstate->currentpos + relpos;
1855 case WINDOW_SEEK_HEAD:
1858 case WINDOW_SEEK_TAIL:
1859 update_frametailpos(winobj, slot);
1860 abs_pos = winstate->frametailpos + relpos;
1863 elog(ERROR, "unrecognized window seek type: %d", seektype);
1864 abs_pos = 0; /* keep compiler quiet */
1868 gottuple = window_gettupleslot(winobj, abs_pos, slot);
1870 gottuple = row_is_in_frame(winstate, abs_pos, slot);
1884 WinSetMarkPosition(winobj, abs_pos);
1885 econtext->ecxt_outertuple = slot;
1886 return ExecEvalExpr((ExprState *) list_nth(winobj->argstates, argno),
1887 econtext, isnull, NULL);
1892 * WinGetFuncArgCurrent
1893 * Evaluate a window function's argument expression on the current row.
1895 * argno: argument number to evaluate (counted from 0)
1896 * isnull: output argument, receives isnull status of result
1898 * Note: this isn't quite equivalent to WinGetFuncArgInPartition or
1899 * WinGetFuncArgInFrame targeting the current row, because it will succeed
1900 * even if the WindowObject's mark has been set beyond the current row.
1901 * This should generally be used for "ordinary" arguments of a window
1902 * function, such as the offset argument of lead() or lag().
1905 WinGetFuncArgCurrent(WindowObject winobj, int argno, bool *isnull)
1907 WindowAggState *winstate;
1908 ExprContext *econtext;
1910 Assert(WindowObjectIsValid(winobj));
1911 winstate = winobj->winstate;
1913 econtext = winstate->ss.ps.ps_ExprContext;
1915 econtext->ecxt_outertuple = winstate->ss.ss_ScanTupleSlot;
1916 return ExecEvalExpr((ExprState *) list_nth(winobj->argstates, argno),
1917 econtext, isnull, NULL);