1 /*-------------------------------------------------------------------------
4 * executor utility routines for grouping, hashing, and aggregation
6 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.6 2003/08/04 02:39:58 momjian Exp $
13 *-------------------------------------------------------------------------
17 #include "access/hash.h"
18 #include "access/heapam.h"
19 #include "executor/executor.h"
20 #include "parser/parse_oper.h"
21 #include "utils/memutils.h"
22 #include "utils/lsyscache.h"
23 #include "utils/syscache.h"
26 /*****************************************************************************
27 * Utility routines for grouping tuples together
28 *****************************************************************************/
32 * Return true if two tuples match in all the indicated fields.
34 * This actually implements SQL's notion of "not distinct". Two nulls
35 * match, a null and a not-null don't match.
37 * tuple1, tuple2: the tuples to compare
38 * tupdesc: tuple descriptor applying to both tuples
39 * numCols: the number of attributes to be examined
40 * matchColIdx: array of attribute column numbers
41 * eqFunctions: array of fmgr lookup info for the equality functions to use
42 * evalContext: short-term memory context for executing the functions
44 * NB: evalContext is reset each time!
47 execTuplesMatch(HeapTuple tuple1,
51 AttrNumber *matchColIdx,
52 FmgrInfo *eqfunctions,
53 MemoryContext evalContext)
55 MemoryContext oldContext;
59 /* Reset and switch into the temp context. */
60 MemoryContextReset(evalContext);
61 oldContext = MemoryContextSwitchTo(evalContext);
64 * We cannot report a match without checking all the fields, but we
65 * can report a non-match as soon as we find unequal fields. So,
66 * start comparing at the last field (least significant sort key).
67 * That's the most likely to be different if we are dealing with
72 for (i = numCols; --i >= 0;)
74 AttrNumber att = matchColIdx[i];
80 attr1 = heap_getattr(tuple1,
85 attr2 = heap_getattr(tuple2,
90 if (isNull1 != isNull2)
92 result = false; /* one null and one not; they aren't equal */
97 continue; /* both are null, treat as equal */
99 /* Apply the type-specific equality function */
101 if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
104 result = false; /* they aren't equal */
109 MemoryContextSwitchTo(oldContext);
116 * Return true if two tuples are definitely unequal in the indicated
119 * Nulls are neither equal nor unequal to anything else. A true result
120 * is obtained only if there are non-null fields that compare not-equal.
122 * Parameters are identical to execTuplesMatch.
125 execTuplesUnequal(HeapTuple tuple1,
129 AttrNumber *matchColIdx,
130 FmgrInfo *eqfunctions,
131 MemoryContext evalContext)
133 MemoryContext oldContext;
137 /* Reset and switch into the temp context. */
138 MemoryContextReset(evalContext);
139 oldContext = MemoryContextSwitchTo(evalContext);
142 * We cannot report a match without checking all the fields, but we
143 * can report a non-match as soon as we find unequal fields. So,
144 * start comparing at the last field (least significant sort key).
145 * That's the most likely to be different if we are dealing with
150 for (i = numCols; --i >= 0;)
152 AttrNumber att = matchColIdx[i];
158 attr1 = heap_getattr(tuple1,
164 continue; /* can't prove anything here */
166 attr2 = heap_getattr(tuple2,
172 continue; /* can't prove anything here */
174 /* Apply the type-specific equality function */
176 if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
179 result = true; /* they are unequal */
184 MemoryContextSwitchTo(oldContext);
191 * execTuplesMatchPrepare
192 * Look up the equality functions needed for execTuplesMatch or
195 * The result is a palloc'd array.
198 execTuplesMatchPrepare(TupleDesc tupdesc,
200 AttrNumber *matchColIdx)
202 FmgrInfo *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
205 for (i = 0; i < numCols; i++)
207 AttrNumber att = matchColIdx[i];
208 Oid typid = tupdesc->attrs[att - 1]->atttypid;
211 eq_function = equality_oper_funcid(typid);
212 fmgr_info(eq_function, &eqfunctions[i]);
219 * execTuplesHashPrepare
220 * Look up the equality and hashing functions needed for a TupleHashTable.
222 * This is similar to execTuplesMatchPrepare, but we also need to find the
223 * hash functions associated with the equality operators. *eqfunctions and
224 * *hashfunctions receive the palloc'd result arrays.
227 execTuplesHashPrepare(TupleDesc tupdesc,
229 AttrNumber *matchColIdx,
230 FmgrInfo **eqfunctions,
231 FmgrInfo **hashfunctions)
235 *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
236 *hashfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
238 for (i = 0; i < numCols; i++)
240 AttrNumber att = matchColIdx[i];
241 Oid typid = tupdesc->attrs[att - 1]->atttypid;
247 optup = equality_oper(typid, false);
248 eq_opr = oprid(optup);
249 eq_function = oprfuncid(optup);
250 ReleaseSysCache(optup);
251 hash_function = get_op_hash_function(eq_opr);
252 if (!OidIsValid(hash_function)) /* should not happen */
253 elog(ERROR, "could not find hash function for hash operator %u",
255 fmgr_info(eq_function, &(*eqfunctions)[i]);
256 fmgr_info(hash_function, &(*hashfunctions)[i]);
261 /*****************************************************************************
262 * Utility routines for all-in-memory hash tables
264 * These routines build hash tables for grouping tuples together (eg, for
265 * hash aggregation). There is one entry for each not-distinct set of tuples
267 *****************************************************************************/
270 * Construct an empty TupleHashTable
272 * numCols, keyColIdx: identify the tuple fields to use as lookup key
273 * eqfunctions: equality comparison functions to use
274 * hashfunctions: datatype-specific hashing functions to use
275 * nbuckets: number of buckets to make
276 * entrysize: size of each entry (at least sizeof(TupleHashEntryData))
277 * tablecxt: memory context in which to store table and table entries
278 * tempcxt: short-lived context for evaluation hash and comparison functions
280 * The function arrays may be made with execTuplesHashPrepare().
282 * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
283 * storage that will live as long as the hashtable does.
286 BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
287 FmgrInfo *eqfunctions,
288 FmgrInfo *hashfunctions,
289 int nbuckets, Size entrysize,
290 MemoryContext tablecxt, MemoryContext tempcxt)
292 TupleHashTable hashtable;
295 Assert(nbuckets > 0);
296 Assert(entrysize >= sizeof(TupleHashEntryData));
298 tabsize = sizeof(TupleHashTableData) +
299 (nbuckets - 1) * sizeof(TupleHashEntry);
300 hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
302 hashtable->numCols = numCols;
303 hashtable->keyColIdx = keyColIdx;
304 hashtable->eqfunctions = eqfunctions;
305 hashtable->hashfunctions = hashfunctions;
306 hashtable->tablecxt = tablecxt;
307 hashtable->tempcxt = tempcxt;
308 hashtable->entrysize = entrysize;
309 hashtable->nbuckets = nbuckets;
315 * Find or create a hashtable entry for the tuple group containing the
318 * If isnew is NULL, we do not create new entries; we return NULL if no
321 * If isnew isn't NULL, then a new entry is created if no existing entry
322 * matches. On return, *isnew is true if the entry is newly created,
323 * false if it existed already. Any extra space in a new entry has been
327 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
330 int numCols = hashtable->numCols;
331 AttrNumber *keyColIdx = hashtable->keyColIdx;
332 HeapTuple tuple = slot->val;
333 TupleDesc tupdesc = slot->ttc_tupleDescriptor;
337 TupleHashEntry entry;
338 MemoryContext oldContext;
340 /* Need to run the hash function in short-lived context */
341 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
343 for (i = 0; i < numCols; i++)
345 AttrNumber att = keyColIdx[i];
349 /* rotate hashkey left 1 bit at each step */
350 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
352 attr = heap_getattr(tuple, att, tupdesc, &isNull);
354 if (!isNull) /* treat nulls as having hash key 0 */
358 hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
363 bucketno = hashkey % (uint32) hashtable->nbuckets;
365 for (entry = hashtable->buckets[bucketno];
369 /* Quick check using hashkey */
370 if (entry->hashkey != hashkey)
372 if (execTuplesMatch(entry->firstTuple,
376 hashtable->eqfunctions,
381 MemoryContextSwitchTo(oldContext);
386 /* Not there, so build a new one if requested */
389 MemoryContextSwitchTo(hashtable->tablecxt);
391 entry = (TupleHashEntry) palloc0(hashtable->entrysize);
393 entry->hashkey = hashkey;
394 entry->firstTuple = heap_copytuple(tuple);
396 entry->next = hashtable->buckets[bucketno];
397 hashtable->buckets[bucketno] = entry;
402 MemoryContextSwitchTo(oldContext);
408 * Walk through all the entries of a hash table, in no special order.
409 * Returns NULL when no more entries remain.
411 * Iterator state must be initialized with ResetTupleHashIterator() macro.
414 ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator * state)
416 TupleHashEntry entry;
418 entry = state->next_entry;
419 while (entry == NULL)
421 if (state->next_bucket >= hashtable->nbuckets)
423 /* No more entries in hashtable, so done */
426 entry = hashtable->buckets[state->next_bucket++];
428 state->next_entry = entry->next;