1 /*-------------------------------------------------------------------------
4 * Support functions for hash access method.
6 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.57 2009/01/01 17:23:35 momjian Exp $
14 * These functions are stored in pg_amproc. For each operator class
15 * defined for hash indexes, they compute the hash value of the argument.
17 * Additional hash functions appear in /utils/adt/ files for various
18 * specialized datatypes.
20 * It is expected that every bit of a hash function's 32-bit result is
21 * as random as every other; failure to ensure this is likely to lead
22 * to poor performance of hash joins, for example. In most cases a hash
23 * function should use hash_any() or its variant hash_uint32().
24 *-------------------------------------------------------------------------
29 #include "access/hash.h"
32 /* Note: this is used for both "char" and boolean datatypes */
34 hashchar(PG_FUNCTION_ARGS)
36 return hash_uint32((int32) PG_GETARG_CHAR(0));
40 hashint2(PG_FUNCTION_ARGS)
42 return hash_uint32((int32) PG_GETARG_INT16(0));
46 hashint4(PG_FUNCTION_ARGS)
48 return hash_uint32(PG_GETARG_INT32(0));
52 hashint8(PG_FUNCTION_ARGS)
55 * The idea here is to produce a hash value compatible with the values
56 * produced by hashint4 and hashint2 for logically equal inputs; this is
57 * necessary to support cross-type hash joins across these input types.
58 * Since all three types are signed, we can xor the high half of the int8
59 * value if the sign is positive, or the complement of the high half when
60 * the sign is negative.
62 #ifndef INT64_IS_BUSTED
63 int64 val = PG_GETARG_INT64(0);
64 uint32 lohalf = (uint32) val;
65 uint32 hihalf = (uint32) (val >> 32);
67 lohalf ^= (val >= 0) ? hihalf : ~hihalf;
69 return hash_uint32(lohalf);
71 /* here if we can't count on "x >> 32" to work sanely */
72 return hash_uint32((int32) PG_GETARG_INT64(0));
77 hashoid(PG_FUNCTION_ARGS)
79 return hash_uint32((uint32) PG_GETARG_OID(0));
83 hashenum(PG_FUNCTION_ARGS)
85 return hash_uint32((uint32) PG_GETARG_OID(0));
89 hashfloat4(PG_FUNCTION_ARGS)
91 float4 key = PG_GETARG_FLOAT4(0);
95 * On IEEE-float machines, minus zero and zero have different bit patterns
96 * but should compare as equal. We must ensure that they have the same
97 * hash value, which is most reliably done this way:
99 if (key == (float4) 0)
103 * To support cross-type hashing of float8 and float4, we want to return
104 * the same hash value hashfloat8 would produce for an equal float8 value.
105 * So, widen the value to float8 and hash that. (We must do this rather
106 * than have hashfloat8 try to narrow its value to float4; that could fail
111 return hash_any((unsigned char *) &key8, sizeof(key8));
115 hashfloat8(PG_FUNCTION_ARGS)
117 float8 key = PG_GETARG_FLOAT8(0);
120 * On IEEE-float machines, minus zero and zero have different bit patterns
121 * but should compare as equal. We must ensure that they have the same
122 * hash value, which is most reliably done this way:
124 if (key == (float8) 0)
127 return hash_any((unsigned char *) &key, sizeof(key));
131 hashoidvector(PG_FUNCTION_ARGS)
133 oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
135 return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
139 hashint2vector(PG_FUNCTION_ARGS)
141 int2vector *key = (int2vector *) PG_GETARG_POINTER(0);
143 return hash_any((unsigned char *) key->values, key->dim1 * sizeof(int2));
147 hashname(PG_FUNCTION_ARGS)
149 char *key = NameStr(*PG_GETARG_NAME(0));
150 int keylen = strlen(key);
152 Assert(keylen < NAMEDATALEN); /* else it's not truncated correctly */
154 return hash_any((unsigned char *) key, keylen);
158 hashtext(PG_FUNCTION_ARGS)
160 text *key = PG_GETARG_TEXT_PP(0);
164 * Note: this is currently identical in behavior to hashvarlena, but keep
165 * it as a separate function in case we someday want to do something
166 * different in non-C locales. (See also hashbpchar, if so.)
168 result = hash_any((unsigned char *) VARDATA_ANY(key),
169 VARSIZE_ANY_EXHDR(key));
171 /* Avoid leaking memory for toasted inputs */
172 PG_FREE_IF_COPY(key, 0);
178 * hashvarlena() can be used for any varlena datatype in which there are
179 * no non-significant bits, ie, distinct bitpatterns never compare as equal.
182 hashvarlena(PG_FUNCTION_ARGS)
184 struct varlena *key = PG_GETARG_VARLENA_PP(0);
187 result = hash_any((unsigned char *) VARDATA_ANY(key),
188 VARSIZE_ANY_EXHDR(key));
190 /* Avoid leaking memory for toasted inputs */
191 PG_FREE_IF_COPY(key, 0);
197 * This hash function was written by Bob Jenkins
198 * (bob_jenkins@burtleburtle.net), and superficially adapted
199 * for PostgreSQL by Neil Conway. For more information on this
200 * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
201 * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
203 * In the current code, we have adopted an idea from Bob's 2006 update
204 * of his hash function, which is to fetch the data a word at a time when
205 * it is suitably aligned. This makes for a useful speedup, at the cost
206 * of having to maintain four code paths (aligned vs unaligned, and
207 * little-endian vs big-endian). Note that we have NOT adopted his newer
208 * mix() function, which is faster but may sacrifice some randomness.
211 /* Get a bit mask of the bits set in non-uint32 aligned addresses */
212 #define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
215 * mix -- mix 3 32-bit values reversibly.
216 * For every delta with one or two bits set, and the deltas of all three
217 * high bits or all three low bits, whether the original value of a,b,c
218 * is almost all zero or is uniformly distributed,
219 * - If mix() is run forward or backward, at least 32 bits in a,b,c
220 * have at least 1/4 probability of changing.
221 * - If mix() is run forward, every bit of c will change between 1/3 and
222 * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
227 a -= b; a -= c; a ^= ((c)>>13); \
228 b -= c; b -= a; b ^= ((a)<<8); \
229 c -= a; c -= b; c ^= ((b)>>13); \
230 a -= b; a -= c; a ^= ((c)>>12); \
231 b -= c; b -= a; b ^= ((a)<<16); \
232 c -= a; c -= b; c ^= ((b)>>5); \
233 a -= b; a -= c; a ^= ((c)>>3); \
234 b -= c; b -= a; b ^= ((a)<<10); \
235 c -= a; c -= b; c ^= ((b)>>15); \
239 * hash_any() -- hash a variable-length key into a 32-bit value
240 * k : the key (the unaligned variable-length array of bytes)
241 * len : the length of the key, counting by bytes
243 * Returns a uint32 value. Every bit of the key affects every bit of
244 * the return value. Every 1-bit and 2-bit delta achieves avalanche.
245 * About 6*len+35 instructions. The best hash table sizes are powers
246 * of 2. There is no need to do mod a prime (mod is sooo slow!).
247 * If you need less than 32 bits, use a bitmask.
249 * Note: we could easily change this function to return a 64-bit hash value
250 * by using the final values of both b and c. b is perhaps a little less
251 * well mixed than c, however.
254 hash_any(register const unsigned char *k, register int keylen)
261 /* Set up the internal state */
263 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
264 c = 3923095; /* initialize with an arbitrary value */
266 /* If the source pointer is word-aligned, we use word-wide fetches */
267 if (((long) k & UINT32_ALIGN_MASK) == 0)
269 /* Code path for aligned source data */
270 register const uint32 *ka = (const uint32 *) k;
272 /* handle most of the key */
283 /* handle the last 11 bytes */
284 k = (const unsigned char *) ka;
286 #ifdef WORDS_BIGENDIAN
290 c += ((uint32) k[10] << 8);
293 c += ((uint32) k[9] << 16);
296 c += ((uint32) k[8] << 24);
297 /* the lowest byte of c is reserved for the length */
304 b += ((uint32) k[6] << 8);
307 b += ((uint32) k[5] << 16);
310 b += ((uint32) k[4] << 24);
316 a += ((uint32) k[2] << 8);
319 a += ((uint32) k[1] << 16);
322 a += ((uint32) k[0] << 24);
323 /* case 0: nothing left to add */
325 #else /* !WORDS_BIGENDIAN */
329 c += ((uint32) k[10] << 24);
332 c += ((uint32) k[9] << 16);
335 c += ((uint32) k[8] << 8);
336 /* the lowest byte of c is reserved for the length */
343 b += ((uint32) k[6] << 16);
346 b += ((uint32) k[5] << 8);
355 a += ((uint32) k[2] << 16);
358 a += ((uint32) k[1] << 8);
362 /* case 0: nothing left to add */
364 #endif /* WORDS_BIGENDIAN */
368 /* Code path for non-aligned source data */
370 /* handle most of the key */
373 #ifdef WORDS_BIGENDIAN
374 a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
375 b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
376 c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
377 #else /* !WORDS_BIGENDIAN */
378 a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
379 b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
380 c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
381 #endif /* WORDS_BIGENDIAN */
387 /* handle the last 11 bytes */
389 #ifdef WORDS_BIGENDIAN
390 switch (len) /* all the case statements fall through */
393 c += ((uint32) k[10] << 8);
395 c += ((uint32) k[9] << 16);
397 c += ((uint32) k[8] << 24);
398 /* the lowest byte of c is reserved for the length */
402 b += ((uint32) k[6] << 8);
404 b += ((uint32) k[5] << 16);
406 b += ((uint32) k[4] << 24);
410 a += ((uint32) k[2] << 8);
412 a += ((uint32) k[1] << 16);
414 a += ((uint32) k[0] << 24);
415 /* case 0: nothing left to add */
417 #else /* !WORDS_BIGENDIAN */
418 switch (len) /* all the case statements fall through */
421 c += ((uint32) k[10] << 24);
423 c += ((uint32) k[9] << 16);
425 c += ((uint32) k[8] << 8);
426 /* the lowest byte of c is reserved for the length */
428 b += ((uint32) k[7] << 24);
430 b += ((uint32) k[6] << 16);
432 b += ((uint32) k[5] << 8);
436 a += ((uint32) k[3] << 24);
438 a += ((uint32) k[2] << 16);
440 a += ((uint32) k[1] << 8);
443 /* case 0: nothing left to add */
445 #endif /* WORDS_BIGENDIAN */
450 /* report the result */
451 return UInt32GetDatum(c);
455 * hash_uint32() -- hash a 32-bit value
457 * This has the same result as
458 * hash_any(&k, sizeof(uint32))
459 * but is faster and doesn't force the caller to store k into memory.
462 hash_uint32(uint32 k)
470 c = 3923095 + (uint32) sizeof(uint32);
474 /* report the result */
475 return UInt32GetDatum(c);