src/backend/executor/execGrouping.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * execGrouping.c
   4  *        executor utility routines for grouping, hashing, and aggregation
   5  *
   6  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.6 2003/08/04 02:39:58 momjian Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include "access/hash.h"
  18 #include "access/heapam.h"
  19 #include "executor/executor.h"
  20 #include "parser/parse_oper.h"
  21 #include "utils/memutils.h"
  22 #include "utils/lsyscache.h"
  23 #include "utils/syscache.h"
  24
  25
  26 /*****************************************************************************
  27  *              Utility routines for grouping tuples together
  28  *****************************************************************************/
  29
  30 /*
  31  * execTuplesMatch
  32  *              Return true if two tuples match in all the indicated fields.
  33  *
  34  * This actually implements SQL's notion of "not distinct".  Two nulls
  35  * match, a null and a not-null don't match.
  36  *
  37  * tuple1, tuple2: the tuples to compare
  38  * tupdesc: tuple descriptor applying to both tuples
  39  * numCols: the number of attributes to be examined
  40  * matchColIdx: array of attribute column numbers
  41  * eqFunctions: array of fmgr lookup info for the equality functions to use
  42  * evalContext: short-term memory context for executing the functions
  43  *
  44  * NB: evalContext is reset each time!
  45  */
  46 bool
  47 execTuplesMatch(HeapTuple tuple1,
  48                                 HeapTuple tuple2,
  49                                 TupleDesc tupdesc,
  50                                 int numCols,
  51                                 AttrNumber *matchColIdx,
  52                                 FmgrInfo *eqfunctions,
  53                                 MemoryContext evalContext)
  54 {
  55         MemoryContext oldContext;
  56         bool            result;
  57         int                     i;
  58
  59         /* Reset and switch into the temp context. */
  60         MemoryContextReset(evalContext);
  61         oldContext = MemoryContextSwitchTo(evalContext);
  62
  63         /*
  64          * We cannot report a match without checking all the fields, but we
  65          * can report a non-match as soon as we find unequal fields.  So,
  66          * start comparing at the last field (least significant sort key).
  67          * That's the most likely to be different if we are dealing with
  68          * sorted input.
  69          */
  70         result = true;
  71
  72         for (i = numCols; --i >= 0;)
  73         {
  74                 AttrNumber      att = matchColIdx[i];
  75                 Datum           attr1,
  76                                         attr2;
  77                 bool            isNull1,
  78                                         isNull2;
  79
  80                 attr1 = heap_getattr(tuple1,
  81                                                          att,
  82                                                          tupdesc,
  83                                                          &isNull1);
  84
  85                 attr2 = heap_getattr(tuple2,
  86                                                          att,
  87                                                          tupdesc,
  88                                                          &isNull2);
  89
  90                 if (isNull1 != isNull2)
  91                 {
  92                         result = false;         /* one null and one not; they aren't equal */
  93                         break;
  94                 }
  95
  96                 if (isNull1)
  97                         continue;                       /* both are null, treat as equal */
  98
  99                 /* Apply the type-specific equality function */
 100
 101                 if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
 102                                                                                 attr1, attr2)))
 103                 {
 104                         result = false;         /* they aren't equal */
 105                         break;
 106                 }
 107         }
 108
 109         MemoryContextSwitchTo(oldContext);
 110
 111         return result;
 112 }
 113
 114 /*
 115  * execTuplesUnequal
 116  *              Return true if two tuples are definitely unequal in the indicated
 117  *              fields.
 118  *
 119  * Nulls are neither equal nor unequal to anything else.  A true result
 120  * is obtained only if there are non-null fields that compare not-equal.
 121  *
 122  * Parameters are identical to execTuplesMatch.
 123  */
 124 bool
 125 execTuplesUnequal(HeapTuple tuple1,
 126                                   HeapTuple tuple2,
 127                                   TupleDesc tupdesc,
 128                                   int numCols,
 129                                   AttrNumber *matchColIdx,
 130                                   FmgrInfo *eqfunctions,
 131                                   MemoryContext evalContext)
 132 {
 133         MemoryContext oldContext;
 134         bool            result;
 135         int                     i;
 136
 137         /* Reset and switch into the temp context. */
 138         MemoryContextReset(evalContext);
 139         oldContext = MemoryContextSwitchTo(evalContext);
 140
 141         /*
 142          * We cannot report a match without checking all the fields, but we
 143          * can report a non-match as soon as we find unequal fields.  So,
 144          * start comparing at the last field (least significant sort key).
 145          * That's the most likely to be different if we are dealing with
 146          * sorted input.
 147          */
 148         result = false;
 149
 150         for (i = numCols; --i >= 0;)
 151         {
 152                 AttrNumber      att = matchColIdx[i];
 153                 Datum           attr1,
 154                                         attr2;
 155                 bool            isNull1,
 156                                         isNull2;
 157
 158                 attr1 = heap_getattr(tuple1,
 159                                                          att,
 160                                                          tupdesc,
 161                                                          &isNull1);
 162
 163                 if (isNull1)
 164                         continue;                       /* can't prove anything here */
 165
 166                 attr2 = heap_getattr(tuple2,
 167                                                          att,
 168                                                          tupdesc,
 169                                                          &isNull2);
 170
 171                 if (isNull2)
 172                         continue;                       /* can't prove anything here */
 173
 174                 /* Apply the type-specific equality function */
 175
 176                 if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
 177                                                                                 attr1, attr2)))
 178                 {
 179                         result = true;          /* they are unequal */
 180                         break;
 181                 }
 182         }
 183
 184         MemoryContextSwitchTo(oldContext);
 185
 186         return result;
 187 }
 188
 189
 190 /*
 191  * execTuplesMatchPrepare
 192  *              Look up the equality functions needed for execTuplesMatch or
 193  *              execTuplesUnequal.
 194  *
 195  * The result is a palloc'd array.
 196  */
 197 FmgrInfo *
 198 execTuplesMatchPrepare(TupleDesc tupdesc,
 199                                            int numCols,
 200                                            AttrNumber *matchColIdx)
 201 {
 202         FmgrInfo   *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
 203         int                     i;
 204
 205         for (i = 0; i < numCols; i++)
 206         {
 207                 AttrNumber      att = matchColIdx[i];
 208                 Oid                     typid = tupdesc->attrs[att - 1]->atttypid;
 209                 Oid                     eq_function;
 210
 211                 eq_function = equality_oper_funcid(typid);
 212                 fmgr_info(eq_function, &eqfunctions[i]);
 213         }
 214
 215         return eqfunctions;
 216 }
 217
 218 /*
 219  * execTuplesHashPrepare
 220  *              Look up the equality and hashing functions needed for a TupleHashTable.
 221  *
 222  * This is similar to execTuplesMatchPrepare, but we also need to find the
 223  * hash functions associated with the equality operators.  *eqfunctions and
 224  * *hashfunctions receive the palloc'd result arrays.
 225  */
 226 void
 227 execTuplesHashPrepare(TupleDesc tupdesc,
 228                                           int numCols,
 229                                           AttrNumber *matchColIdx,
 230                                           FmgrInfo **eqfunctions,
 231                                           FmgrInfo **hashfunctions)
 232 {
 233         int                     i;
 234
 235         *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
 236         *hashfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
 237
 238         for (i = 0; i < numCols; i++)
 239         {
 240                 AttrNumber      att = matchColIdx[i];
 241                 Oid                     typid = tupdesc->attrs[att - 1]->atttypid;
 242                 Operator        optup;
 243                 Oid                     eq_opr;
 244                 Oid                     eq_function;
 245                 Oid                     hash_function;
 246
 247                 optup = equality_oper(typid, false);
 248                 eq_opr = oprid(optup);
 249                 eq_function = oprfuncid(optup);
 250                 ReleaseSysCache(optup);
 251                 hash_function = get_op_hash_function(eq_opr);
 252                 if (!OidIsValid(hash_function)) /* should not happen */
 253                         elog(ERROR, "could not find hash function for hash operator %u",
 254                                  eq_opr);
 255                 fmgr_info(eq_function, &(*eqfunctions)[i]);
 256                 fmgr_info(hash_function, &(*hashfunctions)[i]);
 257         }
 258 }
 259
 260
 261 /*****************************************************************************
 262  *              Utility routines for all-in-memory hash tables
 263  *
 264  * These routines build hash tables for grouping tuples together (eg, for
 265  * hash aggregation).  There is one entry for each not-distinct set of tuples
 266  * presented.
 267  *****************************************************************************/
 268
 269 /*
 270  * Construct an empty TupleHashTable
 271  *
 272  *      numCols, keyColIdx: identify the tuple fields to use as lookup key
 273  *      eqfunctions: equality comparison functions to use
 274  *      hashfunctions: datatype-specific hashing functions to use
 275  *      nbuckets: number of buckets to make
 276  *      entrysize: size of each entry (at least sizeof(TupleHashEntryData))
 277  *      tablecxt: memory context in which to store table and table entries
 278  *      tempcxt: short-lived context for evaluation hash and comparison functions
 279  *
 280  * The function arrays may be made with execTuplesHashPrepare().
 281  *
 282  * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
 283  * storage that will live as long as the hashtable does.
 284  */
 285 TupleHashTable
 286 BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 287                                         FmgrInfo *eqfunctions,
 288                                         FmgrInfo *hashfunctions,
 289                                         int nbuckets, Size entrysize,
 290                                         MemoryContext tablecxt, MemoryContext tempcxt)
 291 {
 292         TupleHashTable hashtable;
 293         Size            tabsize;
 294
 295         Assert(nbuckets > 0);
 296         Assert(entrysize >= sizeof(TupleHashEntryData));
 297
 298         tabsize = sizeof(TupleHashTableData) +
 299                 (nbuckets - 1) * sizeof(TupleHashEntry);
 300         hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
 301
 302         hashtable->numCols = numCols;
 303         hashtable->keyColIdx = keyColIdx;
 304         hashtable->eqfunctions = eqfunctions;
 305         hashtable->hashfunctions = hashfunctions;
 306         hashtable->tablecxt = tablecxt;
 307         hashtable->tempcxt = tempcxt;
 308         hashtable->entrysize = entrysize;
 309         hashtable->nbuckets = nbuckets;
 310
 311         return hashtable;
 312 }
 313
 314 /*
 315  * Find or create a hashtable entry for the tuple group containing the
 316  * given tuple.
 317  *
 318  * If isnew is NULL, we do not create new entries; we return NULL if no
 319  * match is found.
 320  *
 321  * If isnew isn't NULL, then a new entry is created if no existing entry
 322  * matches.  On return, *isnew is true if the entry is newly created,
 323  * false if it existed already.  Any extra space in a new entry has been
 324  * zeroed.
 325  */
 326 TupleHashEntry
 327 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 328                                          bool *isnew)
 329 {
 330         int                     numCols = hashtable->numCols;
 331         AttrNumber *keyColIdx = hashtable->keyColIdx;
 332         HeapTuple       tuple = slot->val;
 333         TupleDesc       tupdesc = slot->ttc_tupleDescriptor;
 334         uint32          hashkey = 0;
 335         int                     i;
 336         int                     bucketno;
 337         TupleHashEntry entry;
 338         MemoryContext oldContext;
 339
 340         /* Need to run the hash function in short-lived context */
 341         oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
 342
 343         for (i = 0; i < numCols; i++)
 344         {
 345                 AttrNumber      att = keyColIdx[i];
 346                 Datum           attr;
 347                 bool            isNull;
 348
 349                 /* rotate hashkey left 1 bit at each step */
 350                 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
 351
 352                 attr = heap_getattr(tuple, att, tupdesc, &isNull);
 353
 354                 if (!isNull)                    /* treat nulls as having hash key 0 */
 355                 {
 356                         uint32          hkey;
 357
 358                         hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
 359                                                                                                 attr));
 360                         hashkey ^= hkey;
 361                 }
 362         }
 363         bucketno = hashkey % (uint32) hashtable->nbuckets;
 364
 365         for (entry = hashtable->buckets[bucketno];
 366                  entry != NULL;
 367                  entry = entry->next)
 368         {
 369                 /* Quick check using hashkey */
 370                 if (entry->hashkey != hashkey)
 371                         continue;
 372                 if (execTuplesMatch(entry->firstTuple,
 373                                                         tuple,
 374                                                         tupdesc,
 375                                                         numCols, keyColIdx,
 376                                                         hashtable->eqfunctions,
 377                                                         hashtable->tempcxt))
 378                 {
 379                         if (isnew)
 380                                 *isnew = false;
 381                         MemoryContextSwitchTo(oldContext);
 382                         return entry;
 383                 }
 384         }
 385
 386         /* Not there, so build a new one if requested */
 387         if (isnew)
 388         {
 389                 MemoryContextSwitchTo(hashtable->tablecxt);
 390
 391                 entry = (TupleHashEntry) palloc0(hashtable->entrysize);
 392
 393                 entry->hashkey = hashkey;
 394                 entry->firstTuple = heap_copytuple(tuple);
 395
 396                 entry->next = hashtable->buckets[bucketno];
 397                 hashtable->buckets[bucketno] = entry;
 398
 399                 *isnew = true;
 400         }
 401
 402         MemoryContextSwitchTo(oldContext);
 403
 404         return entry;
 405 }
 406
 407 /*
 408  * Walk through all the entries of a hash table, in no special order.
 409  * Returns NULL when no more entries remain.
 410  *
 411  * Iterator state must be initialized with ResetTupleHashIterator() macro.
 412  */
 413 TupleHashEntry
 414 ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator * state)
 415 {
 416         TupleHashEntry entry;
 417
 418         entry = state->next_entry;
 419         while (entry == NULL)
 420         {
 421                 if (state->next_bucket >= hashtable->nbuckets)
 422                 {
 423                         /* No more entries in hashtable, so done */
 424                         return NULL;
 425                 }
 426                 entry = hashtable->buckets[state->next_bucket++];
 427         }
 428         state->next_entry = entry->next;
 429
 430         return entry;
 431 }