Code review for improved-hashing patch. Fix some portability issues

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 9 Mar 2002 17:35:37 +0000 (17:35 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 9 Mar 2002 17:35:37 +0000 (17:35 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 9 Mar 2002 17:35:37 +0000 (17:35 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 9 Mar 2002 17:35:37 +0000 (17:35 +0000)
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c

index ebad04c..c0a22b6 100644 (file)
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.55 2002/03/06 20:49:37 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.56 2002/03/09 17:35:35 tgl Exp $
   *
   * NOTES
   *       This file contains only the public interface routines.
@@ -164,6 +164,9 @@ hashinsert(PG_FUNCTION_ARGS)
         Datum      *datum = (Datum *) PG_GETARG_POINTER(1);
         char       *nulls = (char *) PG_GETARG_POINTER(2);
         ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
+#ifdef NOT_USED
+       Relation        heapRel = (Relation) PG_GETARG_POINTER(4);
+#endif
  
         InsertIndexResult res;
         HashItem        hitem;
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c

index 2e0f181..b1e3fbf 100644 (file)
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.32 2002/03/06 20:49:38 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.33 2002/03/09 17:35:35 tgl Exp $
   *
   * NOTES
   *       These functions are stored in pg_amproc.      For each operator class
@@ -58,7 +58,7 @@ hashfloat4(PG_FUNCTION_ARGS)
  {
         float4          key = PG_GETARG_FLOAT4(0);
  
-       return hash_any((char *) &key, sizeof(key));
+       return hash_any((unsigned char *) &key, sizeof(key));
  }
  
  Datum
@@ -66,7 +66,7 @@ hashfloat8(PG_FUNCTION_ARGS)
  {
         float8          key = PG_GETARG_FLOAT8(0);
  
-       return hash_any((char *) &key, sizeof(key));
+       return hash_any((unsigned char *) &key, sizeof(key));
  }
  
  Datum
@@ -74,7 +74,7 @@ hashoidvector(PG_FUNCTION_ARGS)
  {
         Oid                *key = (Oid *) PG_GETARG_POINTER(0);
  
-       return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(Oid));
+       return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(Oid));
  }
  
  /*
@@ -87,17 +87,18 @@ hashint2vector(PG_FUNCTION_ARGS)
  {
         int16      *key = (int16 *) PG_GETARG_POINTER(0);
  
-       return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(int16));
+       return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(int16));
  }
  
  Datum
  hashname(PG_FUNCTION_ARGS)
  {
         char       *key = NameStr(*PG_GETARG_NAME(0));
+       int                     keylen = strlen(key);
  
-       Assert(strlen(key) <= NAMEDATALEN);
+       Assert(keylen < NAMEDATALEN); /* else it's not truncated correctly */
  
-       return hash_any(key, strlen(key));
+       return hash_any((unsigned char *) key, keylen);
  }
  
  /*
@@ -110,7 +111,8 @@ hashvarlena(PG_FUNCTION_ARGS)
         struct varlena *key = PG_GETARG_VARLENA_P(0);
         Datum           result;
  
-       result = hash_any(VARDATA(key), VARSIZE(key) - VARHDRSZ);
+       result = hash_any((unsigned char *) VARDATA(key),
+                                         VARSIZE(key) - VARHDRSZ);
  
         /* Avoid leaking memory for toasted inputs */
         PG_FREE_IF_COPY(key, 0);
@@ -118,13 +120,15 @@ hashvarlena(PG_FUNCTION_ARGS)
         return result;
  }
  
-/* This hash function was written by Bob Jenkins
+/*
+ * This hash function was written by Bob Jenkins
   * (bob_jenkins@burtleburtle.net), and superficially adapted
   * for PostgreSQL by Neil Conway. For more information on this
- * hash function, see http://burtleburtle.net/bob/hash/doobs.html
+ * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
+ * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
   */
  
-/*
+/*----------
   * mix -- mix 3 32-bit values reversibly.
   * For every delta with one or two bits set, and the deltas of all three
   * high bits or all three low bits, whether the original value of a,b,c
@@ -133,6 +137,7 @@ hashvarlena(PG_FUNCTION_ARGS)
   *   have at least 1/4 probability of changing.
   * - If mix() is run forward, every bit of c will change between 1/3 and
   *   2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
+ *----------
   */
  #define mix(a,b,c) \
  { \
@@ -151,56 +156,52 @@ hashvarlena(PG_FUNCTION_ARGS)
   * hash_any() -- hash a variable-length key into a 32-bit value
   *      k       : the key (the unaligned variable-length array of bytes)
   *      len     : the length of the key, counting by bytes
- * Returns a 32-bit value.  Every bit of the key affects every bit of
+ *
+ * Returns a uint32 value.  Every bit of the key affects every bit of
   * the return value.  Every 1-bit and 2-bit delta achieves avalanche.
   * About 6*len+35 instructions. The best hash table sizes are powers
   * of 2.  There is no need to do mod a prime (mod is sooo slow!).
   * If you need less than 32 bits, use a bitmask.
   */
  Datum
-hash_any(register const char *k, register int keylen)
+hash_any(register const unsigned char *k, register int keylen)
  {
-   register Datum a,b,c,len;
-
-   /* Set up the internal state */
-   len = keylen;
-   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
-   /* Another arbitrary value. If the hash function is called
-    * multiple times, this could be the previously generated
-    * hash value; however, the interface currently doesn't allow
-    * this. AFAIK this isn't a big deal.
-    */
-   c = 3923095;
-
-   /* handle most of the key */
-   while (len >= 12)
-   {
-      a += (k[0] +((Datum)k[1]<<8) +((Datum)k[2]<<16) +((Datum)k[3]<<24));
-      b += (k[4] +((Datum)k[5]<<8) +((Datum)k[6]<<16) +((Datum)k[7]<<24));
-      c += (k[8] +((Datum)k[9]<<8) +((Datum)k[10]<<16)+((Datum)k[11]<<24));
-      mix(a,b,c);
-      k += 12; len -= 12;
-   }
-
-   /* handle the last 11 bytes */
-   c += keylen;
-   switch(len)              /* all the case statements fall through */
-   {
-   case 11: c+=((Datum)k[10]<<24);
-   case 10: c+=((Datum)k[9]<<16);
-   case 9 : c+=((Datum)k[8]<<8);
-      /* the first byte of c is reserved for the length */
-   case 8 : b+=((Datum)k[7]<<24);
-   case 7 : b+=((Datum)k[6]<<16);
-   case 6 : b+=((Datum)k[5]<<8);
-   case 5 : b+=k[4];
-   case 4 : a+=((Datum)k[3]<<24);
-   case 3 : a+=((Datum)k[2]<<16);
-   case 2 : a+=((Datum)k[1]<<8);
-   case 1 : a+=k[0];
-     /* case 0: nothing left to add */
-   }
-   mix(a,b,c);
-   /* report the result */
-   return c;
+       register uint32 a,b,c,len;
+
+       /* Set up the internal state */
+       len = keylen;
+       a = b = 0x9e3779b9;                     /* the golden ratio; an arbitrary value */
+       c = 3923095;                            /* initialize with an arbitrary value */
+
+       /* handle most of the key */
+       while (len >= 12)
+       {
+               a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24));
+               b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24));
+               c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24));
+               mix(a,b,c);
+               k += 12; len -= 12;
+       }
+
+       /* handle the last 11 bytes */
+       c += keylen;
+       switch (len)                            /* all the case statements fall through */
+       {
+               case 11: c+=((uint32)k[10]<<24);
+               case 10: c+=((uint32)k[9]<<16);
+               case 9 : c+=((uint32)k[8]<<8);
+                       /* the first byte of c is reserved for the length */
+               case 8 : b+=((uint32)k[7]<<24);
+               case 7 : b+=((uint32)k[6]<<16);
+               case 6 : b+=((uint32)k[5]<<8);
+               case 5 : b+=k[4];
+               case 4 : a+=((uint32)k[3]<<24);
+               case 3 : a+=((uint32)k[2]<<16);
+               case 2 : a+=((uint32)k[1]<<8);
+               case 1 : a+=k[0];
+                       /* case 0: nothing left to add */
+       }
+       mix(a,b,c);
+       /* report the result */
+       return UInt32GetDatum(c);
  }
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c

index 5645eb3..4d6e302 100644 (file)
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
- *     $Id: nodeHash.c,v 1.61 2002/03/06 20:49:44 momjian Exp $
+ *     $Id: nodeHash.c,v 1.62 2002/03/09 17:35:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -22,6 +22,7 @@
  #include <sys/types.h>
  #include <math.h>
  
+#include "access/hash.h"
  #include "executor/execdebug.h"
  #include "executor/nodeHash.h"
  #include "executor/nodeHashjoin.h"
@@ -31,7 +32,7 @@
  #include "utils/lsyscache.h"
  
  
-static int     hashFunc(Datum key, int len, bool byVal);
+static uint32  hashFunc(Datum key, int len, bool byVal);
  
  /* ----------------------------------------------------------------
   *             ExecHash
@@ -553,7 +554,7 @@ ExecHashGetBucket(HashJoinTable hashtable,
                 bucketno = hashFunc(keyval,
                                                         (int) hashtable->typLen,
                                                         hashtable->typByVal)
-                       % hashtable->totalbuckets;
+                       % (uint32) hashtable->totalbuckets;
         }
  
  #ifdef HJDEBUG
@@ -624,30 +625,29 @@ ExecScanHashBucket(HashJoinState *hjstate,
  /* ----------------------------------------------------------------
   *             hashFunc
   *
- *             the hash function, copied from Margo
+ *             the hash function for hash joins
   *
   *             XXX this probably ought to be replaced with datatype-specific
   *             hash functions, such as those already implemented for hash indexes.
   * ----------------------------------------------------------------
   */
-static int
+static uint32
  hashFunc(Datum key, int len, bool byVal)
  {
-       unsigned int h = 0;
+       unsigned char *k;
  
         if (byVal)
         {
                 /*
-                * If it's a by-value data type, use the 'len' least significant
-                * bytes of the Datum value.  This should do the right thing on
-                * either bigendian or littleendian hardware --- see the Datum
-                * access macros in c.h.
+                * If it's a by-value data type, just hash the whole Datum value.
+                * This assumes that datatypes narrower than Datum are consistently
+                * padded (either zero-extended or sign-extended, but not random
+                * bits) to fill Datum; see the XXXGetDatum macros in postgres.h.
+                * NOTE: it would not work to do hash_any(&key, len) since this
+                * would get the wrong bytes on a big-endian machine.
                  */
-               while (len-- > 0)
-               {
-                       h = (h * PRIME1) ^ (key & 0xFF);
-                       key >>= 8;
-               }
+               k = (unsigned char *) &key;
+               len = sizeof(Datum);
         }
         else
         {
@@ -662,8 +662,6 @@ hashFunc(Datum key, int len, bool byVal)
                  * freeing the detoasted copy; that happens for free when the
                  * per-tuple memory context is reset in ExecHashGetBucket.)
                  */
-               unsigned char *k;
-
                 if (len < 0)
                 {
                         struct varlena *vkey = PG_DETOAST_DATUM(key);
@@ -673,12 +671,9 @@ hashFunc(Datum key, int len, bool byVal)
                 }
                 else
                         k = (unsigned char *) DatumGetPointer(key);
-
-               while (len-- > 0)
-                       h = (h * PRIME1) ^ (*k++);
         }
  
-       return h % PRIME2;
+       return DatumGetUInt32(hash_any(k, len));
  }
  
  /* ----------------------------------------------------------------
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c

index 044c310..2c26119 100644 (file)
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.64 2001/11/21 05:57:33 thomas Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.65 2002/03/09 17:35:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1116,7 +1116,7 @@ timetz_hash(PG_FUNCTION_ARGS)
          * sizeof(TimeTzADT), so that any garbage pad bytes in the structure
          * won't be included in the hash!
          */
-       return hash_any((char *) key, sizeof(double) + sizeof(int4));
+       return hash_any((unsigned char *) key, sizeof(double) + sizeof(int4));
  }
  
  Datum
diff --git a/src/backend/utils/adt/mac.c b/src/backend/utils/adt/mac.c

index d9807e7..0abd3a7 100644 (file)
--- a/src/backend/utils/adt/mac.c
+++ b/src/backend/utils/adt/mac.c
@@ -1,7 +1,7 @@
  /*
   *     PostgreSQL type definitions for MAC addresses.
   *
- *     $Header: /cvsroot/pgsql/src/backend/utils/adt/mac.c,v 1.21 2001/08/21 21:23:21 tgl Exp $
+ *     $Header: /cvsroot/pgsql/src/backend/utils/adt/mac.c,v 1.22 2002/03/09 17:35:35 tgl Exp $
   */
  
  #include "postgres.h"
@@ -230,7 +230,7 @@ hashmacaddr(PG_FUNCTION_ARGS)
  {
         macaddr    *key = PG_GETARG_MACADDR_P(0);
  
-       return hash_any((char *) key, sizeof(macaddr));
+       return hash_any((unsigned char *) key, sizeof(macaddr));
  }
  
  /*
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c

index ad36d56..5fbdc5b 100644 (file)
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.64 2002/03/06 06:10:18 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.65 2002/03/09 17:35:36 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1017,7 +1017,7 @@ interval_hash(PG_FUNCTION_ARGS)
          * sizeof(Interval), so that any garbage pad bytes in the structure
          * won't be included in the hash!
          */
-       return hash_any((char *) key, sizeof(double) + sizeof(int4));
+       return hash_any((unsigned char *) key, sizeof(double) + sizeof(int4));
  }
  
  /* overlaps_timestamp() --- implements the SQL92 OVERLAPS operator.
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c

index f25a06e..e384b3f 100644 (file)
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.87 2001/11/18 12:07:07 ishii Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.88 2002/03/09 17:35:36 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -791,7 +791,7 @@ hashbpchar(PG_FUNCTION_ARGS)
         keydata = VARDATA(key);
         keylen = bcTruelen(key);
  
-       result = hash_any(keydata, keylen);
+       result = hash_any((unsigned char *) keydata, keylen);
  
         /* Avoid leaking memory for toasted inputs */
         PG_FREE_IF_COPY(key, 0);
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c

index 7fb811e..4a9c9d8 100644 (file)
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.41 2002/03/02 21:39:33 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.42 2002/03/09 17:35:36 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -329,8 +329,7 @@ init_htab(HTAB *hashp, long nelem)
         }
  
  #if HASH_DEBUG
-       fprintf(stderr, "%s\n%s%p\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
-                       "init_htab:",
+       fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n%s%ld\n",
                         "TABLE POINTER   ", hashp,
                         "DIRECTORY SIZE  ", hctl->dsize,
                         "SEGMENT SIZE    ", hctl->ssize,
@@ -453,7 +452,7 @@ hash_stats(const char *where, HTAB *hashp)
         fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
                         where, hashp->hctl->accesses, hashp->hctl->collisions);
  
-       fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %d segmentcount %d\n",
+       fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n",
                         hashp->hctl->nentries, hashp->hctl->keysize,
                         hashp->hctl->max_bucket, hashp->hctl->nsegs);
         fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
@@ -470,7 +469,7 @@ static uint32
  call_hash(HTAB *hashp, void *k)
  {
         HASHHDR    *hctl = hashp->hctl;
-       long            hash_val,
+       uint32          hash_val,
                                 bucket;
  
         hash_val = hashp->hash(k, (int) hctl->keysize);
@@ -479,7 +478,7 @@ call_hash(HTAB *hashp, void *k)
         if (bucket > hctl->max_bucket)
                 bucket = bucket & hctl->low_mask;
  
-       return (uint32) bucket;
+       return bucket;
  }
  
  /*----------
@@ -647,7 +646,7 @@ hash_search(HTAB *hashp,
                         /* caller is expected to fill the data field on return */
  
                         /* Check if it is time to split the segment */
-                       if (++hctl->nentries / (hctl->max_bucket + 1) > hctl->ffactor)
+                       if (++hctl->nentries / (long) (hctl->max_bucket + 1) > hctl->ffactor)
                         {
                                 /*
                                  * NOTE: failure to expand table is not a fatal error, it
@@ -795,10 +794,10 @@ expand_table(HTAB *hashp)
         /*
          * If we crossed a power of 2, readjust masks.
          */
-       if (new_bucket > hctl->high_mask)
+       if ((uint32) new_bucket > hctl->high_mask)
         {
                 hctl->low_mask = hctl->high_mask;
-               hctl->high_mask = new_bucket | hctl->low_mask;
+               hctl->high_mask = (uint32) new_bucket | hctl->low_mask;
         }
  
         /*
diff --git a/src/backend/utils/hash/hashfn.c b/src/backend/utils/hash/hashfn.c

index 4c7dd4c..f0b8608 100644 (file)
--- a/src/backend/utils/hash/hashfn.c
+++ b/src/backend/utils/hash/hashfn.c
@@ -9,14 +9,16 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.15 2001/10/25 05:49:51 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.16 2002/03/09 17:35:36 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #include "postgres.h"
  
+#include "access/hash.h"
  #include "utils/hsearch.h"
  
+
  /*
   * string_hash: hash function for keys that are null-terminated strings.
   *
@@ -27,91 +29,17 @@
   *
   * NOTE: this is the default hash function if none is specified.
   */
-long
+uint32
  string_hash(void *key, int keysize)
  {
-       unsigned char *k = (unsigned char *) key;
-       long            h = 0;
-
-       while (*k)
-               h = (h * PRIME1) ^ (*k++);
-
-       h %= PRIME2;
-
-       return h;
+       return DatumGetUInt32(hash_any((unsigned char *) key, strlen((char *) key)));
  }
  
  /*
   * tag_hash: hash function for fixed-size tag values
- *
- * NB: we assume that the supplied key is aligned at least on an 'int'
- * boundary, if its size is >= sizeof(int).
   */
-long
+uint32
  tag_hash(void *key, int keysize)
  {
-       int                *k = (int *) key;
-       long            h = 0;
-
-       /*
-        * Use four byte chunks in a "jump table" to go a little faster.
-        *
-        * Currently the maximum keysize is 16 (mar 17 1992).  I have put in
-        * cases for up to 32.  Bigger than this will resort to a for loop
-        * (see the default case).
-        */
-       switch (keysize)
-       {
-               case 8 * sizeof(int):
-                       h = (h * PRIME1) ^(*k++);
-                       /* fall through */
-
-               case 7 * sizeof(int):
-                       h = (h * PRIME1) ^(*k++);
-                       /* fall through */
-
-               case 6 * sizeof(int):
-                       h = (h * PRIME1) ^(*k++);
-                       /* fall through */
-
-               case 5 * sizeof(int):
-                       h = (h * PRIME1) ^(*k++);
-                       /* fall through */
-
-               case 4 * sizeof(int):
-                       h = (h * PRIME1) ^(*k++);
-                       /* fall through */
-
-               case 3 * sizeof(int):
-                       h = (h * PRIME1) ^(*k++);
-                       /* fall through */
-
-               case 2 * sizeof(int):
-                       h = (h * PRIME1) ^(*k++);
-                       /* fall through */
-
-               case sizeof(int):
-                       h = (h * PRIME1) ^(*k++);
-                       break;
-
-               default:
-                       /* Do an int at a time */
-                       for (; keysize >= (int) sizeof(int); keysize -= sizeof(int))
-                               h = (h * PRIME1) ^ (*k++);
-
-                       /* Cope with any partial-int leftover bytes */
-                       if (keysize > 0)
-                       {
-                               unsigned char *keybyte = (unsigned char *) k;
-
-                               do
-                                       h = (h * PRIME1) ^ (*keybyte++);
-                               while (--keysize > 0);
-                       }
-                       break;
-       }
-
-       h %= PRIME2;
-
-       return h;
+       return DatumGetUInt32(hash_any((unsigned char *) key, keysize));
  }
diff --git a/src/include/access/hash.h b/src/include/access/hash.h

index 42c0558..c809a13 100644 (file)
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: hash.h,v 1.44 2002/03/06 20:49:45 momjian Exp $
+ * $Id: hash.h,v 1.45 2002/03/09 17:35:37 tgl Exp $
   *
   * NOTES
   *             modeled after Margo Seltzer's hash implementation for unix.
@@ -252,7 +252,8 @@ extern Datum hashbulkdelete(PG_FUNCTION_ARGS);
   * Datatype-specific hash functions in hashfunc.c.
   *
   * NOTE: some of these are also used by catcache operations, without
- * any direct connection to hash indexes.
+ * any direct connection to hash indexes.  Also, the common hash_any
+ * routine is also used by dynahash tables and hash joins.
   */
  extern Datum hashchar(PG_FUNCTION_ARGS);
  extern Datum hashint2(PG_FUNCTION_ARGS);
@@ -265,7 +266,7 @@ extern Datum hashoidvector(PG_FUNCTION_ARGS);
  extern Datum hashint2vector(PG_FUNCTION_ARGS);
  extern Datum hashname(PG_FUNCTION_ARGS);
  extern Datum hashvarlena(PG_FUNCTION_ARGS);
-extern Datum hash_any(register const char *k, register int keylen);
+extern Datum hash_any(register const unsigned char *k, register int keylen);
  
  /* private routines */
  
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h

index 126bd71..11dcd90 100644 (file)
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: hsearch.h,v 1.25 2001/11/05 17:46:36 momjian Exp $
+ * $Id: hsearch.h,v 1.26 2002/03/09 17:35:37 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -35,9 +35,6 @@
  #define DEF_DIRSIZE                       256
  #define DEF_FFACTOR                       1    /* default fill factor */
  
-#define PRIME1                            37           /* for the hash function */
-#define PRIME2                            1048583
-
  
  /*
   * HASHELEMENT is the private part of a hashtable entry.  The caller's data
@@ -60,10 +57,10 @@ typedef struct HASHHDR
  {
         long            dsize;                  /* Directory Size */
         long            ssize;                  /* Segment Size --- must be power of 2 */
-       long            sshift;                 /* Segment shift */
-       long            max_bucket;             /* ID of Maximum bucket in use */
-       long            high_mask;              /* Mask to modulo into entire table */
-       long            low_mask;               /* Mask to modulo into lower half of table */
+       int                     sshift;                 /* Segment shift = log2(ssize) */
+       uint32          max_bucket;             /* ID of Maximum bucket in use */
+       uint32          high_mask;              /* Mask to modulo into entire table */
+       uint32          low_mask;               /* Mask to modulo into lower half of table */
         long            ffactor;                /* Fill factor */
         long            nentries;               /* Number of entries in hash table */
         long            nsegs;                  /* Number of allocated segments */
@@ -86,7 +83,7 @@ typedef struct HTAB
  {
         HASHHDR    *hctl;                       /* shared control information */
         HASHSEGMENT *dir;                       /* directory of segment starts */
-       long            (*hash) (void *key, int keysize);               /* Hash Function */
+       uint32          (*hash) (void *key, int keysize);               /* Hash Function */
         void       *(*alloc) (Size);    /* memory allocator */
         MemoryContext hcxt;                     /* memory context if default allocator
                                                                  * used */
@@ -101,7 +98,7 @@ typedef struct HASHCTL
         long            ssize;                  /* Segment Size */
         long            dsize;                  /* (initial) Directory Size */
         long            ffactor;                /* Fill factor */
-       long            (*hash) (void *key, int keysize);               /* Hash Function */
+       uint32          (*hash) (void *key, int keysize);               /* Hash Function */
         long            keysize;                /* hash key length in bytes */
         long            entrysize;              /* total user element size in bytes */
         long            max_dsize;              /* limit to dsize if directory size is
@@ -143,7 +140,7 @@ typedef enum
  typedef struct
  {
         HTAB       *hashp;
-       long            curBucket;              /* index of current bucket */
+       uint32          curBucket;              /* index of current bucket */
         HASHELEMENT *curEntry;          /* current entry in bucket */
  } HASH_SEQ_STATUS;
  
@@ -164,7 +161,7 @@ extern long hash_select_dirsize(long num_entries);
  /*
   * prototypes for functions in hashfn.c
   */
-extern long string_hash(void *key, int keysize);
-extern long tag_hash(void *key, int keysize);
+extern uint32 string_hash(void *key, int keysize);
+extern uint32 tag_hash(void *key, int keysize);
  
  #endif   /* HSEARCH_H */
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 9 Mar 2002 17:35:37 +0000 (17:35 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 9 Mar 2002 17:35:37 +0000 (17:35 +0000)
src/backend/access/hash/hash.c		patch \| blob \| history
src/backend/access/hash/hashfunc.c		patch \| blob \| history
src/backend/executor/nodeHash.c		patch \| blob \| history
src/backend/utils/adt/date.c		patch \| blob \| history
src/backend/utils/adt/mac.c		patch \| blob \| history
src/backend/utils/adt/timestamp.c		patch \| blob \| history
src/backend/utils/adt/varchar.c		patch \| blob \| history
src/backend/utils/hash/dynahash.c		patch \| blob \| history
src/backend/utils/hash/hashfn.c		patch \| blob \| history
src/include/access/hash.h		patch \| blob \| history
src/include/utils/hsearch.h		patch \| blob \| history