OSDN Git Service

Update copyrights to 2003.
[pg-rex/syncrep.git] / src / backend / executor / execGrouping.c
1 /*-------------------------------------------------------------------------
2  *
3  * execGrouping.c
4  *        executor utility routines for grouping, hashing, and aggregation
5  *
6  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.6 2003/08/04 02:39:58 momjian Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "access/hash.h"
18 #include "access/heapam.h"
19 #include "executor/executor.h"
20 #include "parser/parse_oper.h"
21 #include "utils/memutils.h"
22 #include "utils/lsyscache.h"
23 #include "utils/syscache.h"
24
25
26 /*****************************************************************************
27  *              Utility routines for grouping tuples together
28  *****************************************************************************/
29
30 /*
31  * execTuplesMatch
32  *              Return true if two tuples match in all the indicated fields.
33  *
34  * This actually implements SQL's notion of "not distinct".  Two nulls
35  * match, a null and a not-null don't match.
36  *
37  * tuple1, tuple2: the tuples to compare
38  * tupdesc: tuple descriptor applying to both tuples
39  * numCols: the number of attributes to be examined
40  * matchColIdx: array of attribute column numbers
41  * eqFunctions: array of fmgr lookup info for the equality functions to use
42  * evalContext: short-term memory context for executing the functions
43  *
44  * NB: evalContext is reset each time!
45  */
46 bool
47 execTuplesMatch(HeapTuple tuple1,
48                                 HeapTuple tuple2,
49                                 TupleDesc tupdesc,
50                                 int numCols,
51                                 AttrNumber *matchColIdx,
52                                 FmgrInfo *eqfunctions,
53                                 MemoryContext evalContext)
54 {
55         MemoryContext oldContext;
56         bool            result;
57         int                     i;
58
59         /* Reset and switch into the temp context. */
60         MemoryContextReset(evalContext);
61         oldContext = MemoryContextSwitchTo(evalContext);
62
63         /*
64          * We cannot report a match without checking all the fields, but we
65          * can report a non-match as soon as we find unequal fields.  So,
66          * start comparing at the last field (least significant sort key).
67          * That's the most likely to be different if we are dealing with
68          * sorted input.
69          */
70         result = true;
71
72         for (i = numCols; --i >= 0;)
73         {
74                 AttrNumber      att = matchColIdx[i];
75                 Datum           attr1,
76                                         attr2;
77                 bool            isNull1,
78                                         isNull2;
79
80                 attr1 = heap_getattr(tuple1,
81                                                          att,
82                                                          tupdesc,
83                                                          &isNull1);
84
85                 attr2 = heap_getattr(tuple2,
86                                                          att,
87                                                          tupdesc,
88                                                          &isNull2);
89
90                 if (isNull1 != isNull2)
91                 {
92                         result = false;         /* one null and one not; they aren't equal */
93                         break;
94                 }
95
96                 if (isNull1)
97                         continue;                       /* both are null, treat as equal */
98
99                 /* Apply the type-specific equality function */
100
101                 if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
102                                                                                 attr1, attr2)))
103                 {
104                         result = false;         /* they aren't equal */
105                         break;
106                 }
107         }
108
109         MemoryContextSwitchTo(oldContext);
110
111         return result;
112 }
113
114 /*
115  * execTuplesUnequal
116  *              Return true if two tuples are definitely unequal in the indicated
117  *              fields.
118  *
119  * Nulls are neither equal nor unequal to anything else.  A true result
120  * is obtained only if there are non-null fields that compare not-equal.
121  *
122  * Parameters are identical to execTuplesMatch.
123  */
124 bool
125 execTuplesUnequal(HeapTuple tuple1,
126                                   HeapTuple tuple2,
127                                   TupleDesc tupdesc,
128                                   int numCols,
129                                   AttrNumber *matchColIdx,
130                                   FmgrInfo *eqfunctions,
131                                   MemoryContext evalContext)
132 {
133         MemoryContext oldContext;
134         bool            result;
135         int                     i;
136
137         /* Reset and switch into the temp context. */
138         MemoryContextReset(evalContext);
139         oldContext = MemoryContextSwitchTo(evalContext);
140
141         /*
142          * We cannot report a match without checking all the fields, but we
143          * can report a non-match as soon as we find unequal fields.  So,
144          * start comparing at the last field (least significant sort key).
145          * That's the most likely to be different if we are dealing with
146          * sorted input.
147          */
148         result = false;
149
150         for (i = numCols; --i >= 0;)
151         {
152                 AttrNumber      att = matchColIdx[i];
153                 Datum           attr1,
154                                         attr2;
155                 bool            isNull1,
156                                         isNull2;
157
158                 attr1 = heap_getattr(tuple1,
159                                                          att,
160                                                          tupdesc,
161                                                          &isNull1);
162
163                 if (isNull1)
164                         continue;                       /* can't prove anything here */
165
166                 attr2 = heap_getattr(tuple2,
167                                                          att,
168                                                          tupdesc,
169                                                          &isNull2);
170
171                 if (isNull2)
172                         continue;                       /* can't prove anything here */
173
174                 /* Apply the type-specific equality function */
175
176                 if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
177                                                                                 attr1, attr2)))
178                 {
179                         result = true;          /* they are unequal */
180                         break;
181                 }
182         }
183
184         MemoryContextSwitchTo(oldContext);
185
186         return result;
187 }
188
189
190 /*
191  * execTuplesMatchPrepare
192  *              Look up the equality functions needed for execTuplesMatch or
193  *              execTuplesUnequal.
194  *
195  * The result is a palloc'd array.
196  */
197 FmgrInfo *
198 execTuplesMatchPrepare(TupleDesc tupdesc,
199                                            int numCols,
200                                            AttrNumber *matchColIdx)
201 {
202         FmgrInfo   *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
203         int                     i;
204
205         for (i = 0; i < numCols; i++)
206         {
207                 AttrNumber      att = matchColIdx[i];
208                 Oid                     typid = tupdesc->attrs[att - 1]->atttypid;
209                 Oid                     eq_function;
210
211                 eq_function = equality_oper_funcid(typid);
212                 fmgr_info(eq_function, &eqfunctions[i]);
213         }
214
215         return eqfunctions;
216 }
217
218 /*
219  * execTuplesHashPrepare
220  *              Look up the equality and hashing functions needed for a TupleHashTable.
221  *
222  * This is similar to execTuplesMatchPrepare, but we also need to find the
223  * hash functions associated with the equality operators.  *eqfunctions and
224  * *hashfunctions receive the palloc'd result arrays.
225  */
226 void
227 execTuplesHashPrepare(TupleDesc tupdesc,
228                                           int numCols,
229                                           AttrNumber *matchColIdx,
230                                           FmgrInfo **eqfunctions,
231                                           FmgrInfo **hashfunctions)
232 {
233         int                     i;
234
235         *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
236         *hashfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
237
238         for (i = 0; i < numCols; i++)
239         {
240                 AttrNumber      att = matchColIdx[i];
241                 Oid                     typid = tupdesc->attrs[att - 1]->atttypid;
242                 Operator        optup;
243                 Oid                     eq_opr;
244                 Oid                     eq_function;
245                 Oid                     hash_function;
246
247                 optup = equality_oper(typid, false);
248                 eq_opr = oprid(optup);
249                 eq_function = oprfuncid(optup);
250                 ReleaseSysCache(optup);
251                 hash_function = get_op_hash_function(eq_opr);
252                 if (!OidIsValid(hash_function)) /* should not happen */
253                         elog(ERROR, "could not find hash function for hash operator %u",
254                                  eq_opr);
255                 fmgr_info(eq_function, &(*eqfunctions)[i]);
256                 fmgr_info(hash_function, &(*hashfunctions)[i]);
257         }
258 }
259
260
261 /*****************************************************************************
262  *              Utility routines for all-in-memory hash tables
263  *
264  * These routines build hash tables for grouping tuples together (eg, for
265  * hash aggregation).  There is one entry for each not-distinct set of tuples
266  * presented.
267  *****************************************************************************/
268
269 /*
270  * Construct an empty TupleHashTable
271  *
272  *      numCols, keyColIdx: identify the tuple fields to use as lookup key
273  *      eqfunctions: equality comparison functions to use
274  *      hashfunctions: datatype-specific hashing functions to use
275  *      nbuckets: number of buckets to make
276  *      entrysize: size of each entry (at least sizeof(TupleHashEntryData))
277  *      tablecxt: memory context in which to store table and table entries
278  *      tempcxt: short-lived context for evaluation hash and comparison functions
279  *
280  * The function arrays may be made with execTuplesHashPrepare().
281  *
282  * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
283  * storage that will live as long as the hashtable does.
284  */
285 TupleHashTable
286 BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
287                                         FmgrInfo *eqfunctions,
288                                         FmgrInfo *hashfunctions,
289                                         int nbuckets, Size entrysize,
290                                         MemoryContext tablecxt, MemoryContext tempcxt)
291 {
292         TupleHashTable hashtable;
293         Size            tabsize;
294
295         Assert(nbuckets > 0);
296         Assert(entrysize >= sizeof(TupleHashEntryData));
297
298         tabsize = sizeof(TupleHashTableData) +
299                 (nbuckets - 1) * sizeof(TupleHashEntry);
300         hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
301
302         hashtable->numCols = numCols;
303         hashtable->keyColIdx = keyColIdx;
304         hashtable->eqfunctions = eqfunctions;
305         hashtable->hashfunctions = hashfunctions;
306         hashtable->tablecxt = tablecxt;
307         hashtable->tempcxt = tempcxt;
308         hashtable->entrysize = entrysize;
309         hashtable->nbuckets = nbuckets;
310
311         return hashtable;
312 }
313
314 /*
315  * Find or create a hashtable entry for the tuple group containing the
316  * given tuple.
317  *
318  * If isnew is NULL, we do not create new entries; we return NULL if no
319  * match is found.
320  *
321  * If isnew isn't NULL, then a new entry is created if no existing entry
322  * matches.  On return, *isnew is true if the entry is newly created,
323  * false if it existed already.  Any extra space in a new entry has been
324  * zeroed.
325  */
326 TupleHashEntry
327 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
328                                          bool *isnew)
329 {
330         int                     numCols = hashtable->numCols;
331         AttrNumber *keyColIdx = hashtable->keyColIdx;
332         HeapTuple       tuple = slot->val;
333         TupleDesc       tupdesc = slot->ttc_tupleDescriptor;
334         uint32          hashkey = 0;
335         int                     i;
336         int                     bucketno;
337         TupleHashEntry entry;
338         MemoryContext oldContext;
339
340         /* Need to run the hash function in short-lived context */
341         oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
342
343         for (i = 0; i < numCols; i++)
344         {
345                 AttrNumber      att = keyColIdx[i];
346                 Datum           attr;
347                 bool            isNull;
348
349                 /* rotate hashkey left 1 bit at each step */
350                 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
351
352                 attr = heap_getattr(tuple, att, tupdesc, &isNull);
353
354                 if (!isNull)                    /* treat nulls as having hash key 0 */
355                 {
356                         uint32          hkey;
357
358                         hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
359                                                                                                 attr));
360                         hashkey ^= hkey;
361                 }
362         }
363         bucketno = hashkey % (uint32) hashtable->nbuckets;
364
365         for (entry = hashtable->buckets[bucketno];
366                  entry != NULL;
367                  entry = entry->next)
368         {
369                 /* Quick check using hashkey */
370                 if (entry->hashkey != hashkey)
371                         continue;
372                 if (execTuplesMatch(entry->firstTuple,
373                                                         tuple,
374                                                         tupdesc,
375                                                         numCols, keyColIdx,
376                                                         hashtable->eqfunctions,
377                                                         hashtable->tempcxt))
378                 {
379                         if (isnew)
380                                 *isnew = false;
381                         MemoryContextSwitchTo(oldContext);
382                         return entry;
383                 }
384         }
385
386         /* Not there, so build a new one if requested */
387         if (isnew)
388         {
389                 MemoryContextSwitchTo(hashtable->tablecxt);
390
391                 entry = (TupleHashEntry) palloc0(hashtable->entrysize);
392
393                 entry->hashkey = hashkey;
394                 entry->firstTuple = heap_copytuple(tuple);
395
396                 entry->next = hashtable->buckets[bucketno];
397                 hashtable->buckets[bucketno] = entry;
398
399                 *isnew = true;
400         }
401
402         MemoryContextSwitchTo(oldContext);
403
404         return entry;
405 }
406
407 /*
408  * Walk through all the entries of a hash table, in no special order.
409  * Returns NULL when no more entries remain.
410  *
411  * Iterator state must be initialized with ResetTupleHashIterator() macro.
412  */
413 TupleHashEntry
414 ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator * state)
415 {
416         TupleHashEntry entry;
417
418         entry = state->next_entry;
419         while (entry == NULL)
420         {
421                 if (state->next_bucket >= hashtable->nbuckets)
422                 {
423                         /* No more entries in hashtable, so done */
424                         return NULL;
425                 }
426                 entry = hashtable->buckets[state->next_bucket++];
427         }
428         state->next_entry = entry->next;
429
430         return entry;
431 }