4 * Implementation of in-memory hash tables for Tcl and Tcl-based
7 * Copyright 1991 Regents of the University of California
8 * Permission to use, copy, modify, and distribute this
9 * software and its documentation for any purpose and without
10 * fee is hereby granted, provided that this copyright
11 * notice appears in all copies. The University of California
12 * makes no representations about the suitability of this
13 * software for any purpose. It is provided "as is" without
14 * express or implied warranty.
16 * $Id: tclHash.c,v 1.1.1.1 2001/04/29 20:34:51 karll Exp $
22 * Imported library procedures for which there are no header files:
28 * When there are this many entries per bucket, on average, rebuild
29 * the hash table to make it larger.
32 #define REBUILD_MULTIPLIER 3
36 * The following macro takes a preliminary integer hash value and
37 * produces an index into a hash tables bucket list. The idea is
38 * to make it so that preliminary values that are arbitrarily similar
39 * will end up in different buckets. The hash function was taken
40 * from a random-number generator.
43 #define RANDOM_INDEX(tablePtr, i) \
44 (((((long) (i))*1103515245) >> (tablePtr)->downShift) & (tablePtr)->mask)
47 * Procedure prototypes for static procedures in this file:
50 static Tcl_HashEntry * ArrayFind _ANSI_ARGS_((Tcl_HashTable *tablePtr,
52 static Tcl_HashEntry * ArrayCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr,
53 char *key, int *newPtr));
54 static Tcl_HashEntry * BogusFind _ANSI_ARGS_((Tcl_HashTable *tablePtr,
56 static Tcl_HashEntry * BogusCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr,
57 char *key, int *newPtr));
58 static unsigned int HashString _ANSI_ARGS_((char *string));
59 static void RebuildTable _ANSI_ARGS_((Tcl_HashTable *tablePtr));
60 static Tcl_HashEntry * StringFind _ANSI_ARGS_((Tcl_HashTable *tablePtr,
62 static Tcl_HashEntry * StringCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr,
63 char *key, int *newPtr));
64 static Tcl_HashEntry * OneWordFind _ANSI_ARGS_((Tcl_HashTable *tablePtr,
66 static Tcl_HashEntry * OneWordCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr,
67 char *key, int *newPtr));
70 *----------------------------------------------------------------------
72 * Tcl_InitHashTable --
74 * Given storage for a hash table, set up the fields to prepare
75 * the hash table for use.
81 * TablePtr is now ready to be passed to Tcl_FindHashEntry and
82 * Tcl_CreateHashEntry.
84 *----------------------------------------------------------------------
88 Tcl_InitHashTable(tablePtr, keyType)
89 register Tcl_HashTable *tablePtr; /* Pointer to table record, which
90 * is supplied by the caller. */
91 int keyType; /* Type of keys to use in table:
92 * TCL_STRING_KEYS, TCL_ONE_WORD_KEYS,
93 * or an integer >= 2. */
95 tablePtr->buckets = tablePtr->staticBuckets;
96 tablePtr->staticBuckets[0] = tablePtr->staticBuckets[1] = 0;
97 tablePtr->staticBuckets[2] = tablePtr->staticBuckets[3] = 0;
98 tablePtr->numBuckets = TCL_SMALL_HASH_TABLE;
99 tablePtr->numEntries = 0;
100 tablePtr->rebuildSize = TCL_SMALL_HASH_TABLE*REBUILD_MULTIPLIER;
101 tablePtr->downShift = 28;
103 tablePtr->keyType = keyType;
104 if (keyType == TCL_STRING_KEYS) {
105 tablePtr->findProc = StringFind;
106 tablePtr->createProc = StringCreate;
107 } else if (keyType == TCL_ONE_WORD_KEYS) {
108 tablePtr->findProc = OneWordFind;
109 tablePtr->createProc = OneWordCreate;
111 tablePtr->findProc = ArrayFind;
112 tablePtr->createProc = ArrayCreate;
117 *----------------------------------------------------------------------
119 * Tcl_DeleteHashEntry --
121 * Remove a single entry from a hash table.
127 * The entry given by entryPtr is deleted from its table and
128 * should never again be used by the caller. It is up to the
129 * caller to free the clientData field of the entry, if that
132 *----------------------------------------------------------------------
136 Tcl_DeleteHashEntry(entryPtr)
137 Tcl_HashEntry *entryPtr;
139 register Tcl_HashEntry *prevPtr;
141 if (*entryPtr->bucketPtr == entryPtr) {
142 *entryPtr->bucketPtr = entryPtr->nextPtr;
144 for (prevPtr = *entryPtr->bucketPtr; ; prevPtr = prevPtr->nextPtr) {
145 if (prevPtr == NULL) {
146 panic("malformed bucket chain in Tcl_DeleteHashEntry");
148 if (prevPtr->nextPtr == entryPtr) {
149 prevPtr->nextPtr = entryPtr->nextPtr;
154 entryPtr->tablePtr->numEntries--;
155 ckfree((char *) entryPtr);
159 *----------------------------------------------------------------------
161 * Tcl_DeleteHashTable --
163 * Free up everything associated with a hash table except for
164 * the record for the table itself.
170 * The hash table is no longer useable.
172 *----------------------------------------------------------------------
176 Tcl_DeleteHashTable(tablePtr)
177 register Tcl_HashTable *tablePtr; /* Table to delete. */
179 register Tcl_HashEntry *hPtr, *nextPtr;
183 * Free up all the entries in the table.
186 for (i = 0; i < tablePtr->numBuckets; i++) {
187 hPtr = tablePtr->buckets[i];
188 while (hPtr != NULL) {
189 nextPtr = hPtr->nextPtr;
190 ckfree((char *) hPtr);
196 * Free up the bucket array, if it was dynamically allocated.
199 if (tablePtr->buckets != tablePtr->staticBuckets) {
200 ckfree((char *) tablePtr->buckets);
204 * Arrange for panics if the table is used again without
208 tablePtr->findProc = BogusFind;
209 tablePtr->createProc = BogusCreate;
213 *----------------------------------------------------------------------
215 * Tcl_FirstHashEntry --
217 * Locate the first entry in a hash table and set up a record
218 * that can be used to step through all the remaining entries
222 * The return value is a pointer to the first entry in tablePtr,
223 * or NULL if tablePtr has no entries in it. The memory at
224 * *searchPtr is initialized so that subsequent calls to
225 * Tcl_NextHashEntry will return all of the entries in the table,
231 *----------------------------------------------------------------------
235 Tcl_FirstHashEntry(tablePtr, searchPtr)
236 Tcl_HashTable *tablePtr; /* Table to search. */
237 Tcl_HashSearch *searchPtr; /* Place to store information about
238 * progress through the table. */
240 searchPtr->tablePtr = tablePtr;
241 searchPtr->nextIndex = 0;
242 searchPtr->nextEntryPtr = NULL;
243 return Tcl_NextHashEntry(searchPtr);
247 *----------------------------------------------------------------------
249 * Tcl_NextHashEntry --
251 * Once a hash table enumeration has been initiated by calling
252 * Tcl_FirstHashEntry, this procedure may be called to return
253 * successive elements of the table.
256 * The return value is the next entry in the hash table being
257 * enumerated, or NULL if the end of the table is reached.
262 *----------------------------------------------------------------------
266 Tcl_NextHashEntry(searchPtr)
267 register Tcl_HashSearch *searchPtr; /* Place to store information about
268 * progress through the table. Must
269 * have been initialized by calling
270 * Tcl_FirstHashEntry. */
274 while (searchPtr->nextEntryPtr == NULL) {
275 if (searchPtr->nextIndex >= searchPtr->tablePtr->numBuckets) {
278 searchPtr->nextEntryPtr =
279 searchPtr->tablePtr->buckets[searchPtr->nextIndex];
280 searchPtr->nextIndex++;
282 hPtr = searchPtr->nextEntryPtr;
283 searchPtr->nextEntryPtr = hPtr->nextPtr;
288 *----------------------------------------------------------------------
292 * Return statistics describing the layout of the hash table
293 * in its hash buckets.
296 * The return value is a malloc-ed string containing information
297 * about tablePtr. It is the caller's responsibility to free
303 *----------------------------------------------------------------------
307 Tcl_HashStats(tablePtr)
308 Tcl_HashTable *tablePtr; /* Table for which to produce stats. */
310 #define NUM_COUNTERS 10
311 int count[NUM_COUNTERS], overflow, i, j;
313 register Tcl_HashEntry *hPtr;
317 * Compute a histogram of bucket usage.
320 for (i = 0; i < NUM_COUNTERS; i++) {
325 for (i = 0; i < tablePtr->numBuckets; i++) {
327 for (hPtr = tablePtr->buckets[i]; hPtr != NULL; hPtr = hPtr->nextPtr) {
330 if (j < NUM_COUNTERS) {
336 average += (tmp+1.0)*(tmp/tablePtr->numEntries)/2.0;
340 * Print out the histogram and a few other pieces of information.
343 result = (char *) ckalloc((unsigned) ((NUM_COUNTERS*60) + 300));
344 sprintf(result, "%d entries in table, %d buckets\n",
345 tablePtr->numEntries, tablePtr->numBuckets);
346 p = result + strlen(result);
347 for (i = 0; i < NUM_COUNTERS; i++) {
348 sprintf(p, "number of buckets with %d entries: %d\n",
352 sprintf(p, "number of buckets with more %d or more entries: %d\n",
353 NUM_COUNTERS, overflow);
355 sprintf(p, "average search distance for entry: %.1f", average);
360 *----------------------------------------------------------------------
364 * Compute a one-word summary of a text string, which can be
365 * used to generate a hash index.
368 * The return value is a one-word summary of the information in
374 *----------------------------------------------------------------------
379 register char *string; /* String from which to compute hash value. */
381 register unsigned int result;
385 * I tried a zillion different hash functions and asked many other
386 * people for advice. Many people had their own favorite functions,
387 * all different, but no-one had much idea why they were good ones.
388 * I chose the one below (multiply by 9 and add new character)
389 * because of the following reasons:
391 * 1. Multiplying by 10 is perfect for keys that are decimal strings,
392 * and multiplying by 9 is just about as good.
393 * 2. Times-9 is (shift-left-3) plus (old). This means that each
394 * character's bits hang around in the low-order bits of the
395 * hash value for ever, plus they spread fairly rapidly up to
396 * the high-order bits to fill out the hash value. This seems
397 * works well both for decimal and non-decimal strings.
407 result += (result<<3) + c;
413 *----------------------------------------------------------------------
417 * Given a hash table with string keys, and a string key, find
418 * the entry with a matching key.
421 * The return value is a token for the matching entry in the
422 * hash table, or NULL if there was no matching entry.
427 *----------------------------------------------------------------------
430 static Tcl_HashEntry *
431 StringFind(tablePtr, key)
432 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
433 char *key; /* Key to use to find matching entry. */
435 register Tcl_HashEntry *hPtr;
436 register char *p1, *p2;
439 index = HashString(key) & tablePtr->mask;
442 * Search all of the entries in the appropriate bucket.
445 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
446 hPtr = hPtr->nextPtr) {
447 for (p1 = key, p2 = hPtr->key.string; ; p1++, p2++) {
460 *----------------------------------------------------------------------
464 * Given a hash table with string keys, and a string key, find
465 * the entry with a matching key. If there is no matching entry,
466 * then create a new entry that does match.
469 * The return value is a pointer to the matching entry. If this
470 * is a newly-created entry, then *newPtr will be set to a non-zero
471 * value; otherwise *newPtr will be set to 0. If this is a new
472 * entry the value stored in the entry will initially be 0.
475 * A new entry may be added to the hash table.
477 *----------------------------------------------------------------------
480 static Tcl_HashEntry *
481 StringCreate(tablePtr, key, newPtr)
482 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
483 char *key; /* Key to use to find or create matching
485 int *newPtr; /* Store info here telling whether a new
486 * entry was created. */
488 register Tcl_HashEntry *hPtr;
489 register char *p1, *p2;
492 index = HashString(key) & tablePtr->mask;
495 * Search all of the entries in this bucket.
498 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
499 hPtr = hPtr->nextPtr) {
500 for (p1 = key, p2 = hPtr->key.string; ; p1++, p2++) {
512 * Entry not found. Add a new one to the bucket.
516 hPtr = (Tcl_HashEntry *) ckalloc((unsigned)
517 (sizeof(Tcl_HashEntry) + strlen(key) - (sizeof(hPtr->key) -1)));
518 hPtr->tablePtr = tablePtr;
519 hPtr->bucketPtr = &(tablePtr->buckets[index]);
520 hPtr->nextPtr = *hPtr->bucketPtr;
521 hPtr->clientData = 0;
522 strcpy(hPtr->key.string, key);
523 *hPtr->bucketPtr = hPtr;
524 tablePtr->numEntries++;
527 * If the table has exceeded a decent size, rebuild it with many
531 if (tablePtr->numEntries >= tablePtr->rebuildSize) {
532 RebuildTable(tablePtr);
538 *----------------------------------------------------------------------
542 * Given a hash table with one-word keys, and a one-word key, find
543 * the entry with a matching key.
546 * The return value is a token for the matching entry in the
547 * hash table, or NULL if there was no matching entry.
552 *----------------------------------------------------------------------
555 static Tcl_HashEntry *
556 OneWordFind(tablePtr, key)
557 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
558 register char *key; /* Key to use to find matching entry. */
560 register Tcl_HashEntry *hPtr;
563 index = RANDOM_INDEX(tablePtr, key);
566 * Search all of the entries in the appropriate bucket.
569 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
570 hPtr = hPtr->nextPtr) {
571 if (hPtr->key.oneWordValue == key) {
579 *----------------------------------------------------------------------
583 * Given a hash table with one-word keys, and a one-word key, find
584 * the entry with a matching key. If there is no matching entry,
585 * then create a new entry that does match.
588 * The return value is a pointer to the matching entry. If this
589 * is a newly-created entry, then *newPtr will be set to a non-zero
590 * value; otherwise *newPtr will be set to 0. If this is a new
591 * entry the value stored in the entry will initially be 0.
594 * A new entry may be added to the hash table.
596 *----------------------------------------------------------------------
599 static Tcl_HashEntry *
600 OneWordCreate(tablePtr, key, newPtr)
601 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
602 register char *key; /* Key to use to find or create matching
604 int *newPtr; /* Store info here telling whether a new
605 * entry was created. */
607 register Tcl_HashEntry *hPtr;
610 index = RANDOM_INDEX(tablePtr, key);
613 * Search all of the entries in this bucket.
616 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
617 hPtr = hPtr->nextPtr) {
618 if (hPtr->key.oneWordValue == key) {
625 * Entry not found. Add a new one to the bucket.
629 hPtr = (Tcl_HashEntry *) ckalloc(sizeof(Tcl_HashEntry));
630 hPtr->tablePtr = tablePtr;
631 hPtr->bucketPtr = &(tablePtr->buckets[index]);
632 hPtr->nextPtr = *hPtr->bucketPtr;
633 hPtr->clientData = 0;
634 hPtr->key.oneWordValue = key;
635 *hPtr->bucketPtr = hPtr;
636 tablePtr->numEntries++;
639 * If the table has exceeded a decent size, rebuild it with many
643 if (tablePtr->numEntries >= tablePtr->rebuildSize) {
644 RebuildTable(tablePtr);
650 *----------------------------------------------------------------------
654 * Given a hash table with array-of-int keys, and a key, find
655 * the entry with a matching key.
658 * The return value is a token for the matching entry in the
659 * hash table, or NULL if there was no matching entry.
664 *----------------------------------------------------------------------
667 static Tcl_HashEntry *
668 ArrayFind(tablePtr, key)
669 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
670 char *key; /* Key to use to find matching entry. */
672 register Tcl_HashEntry *hPtr;
673 int *arrayPtr = (int *) key;
674 register int *iPtr1, *iPtr2;
677 for (index = 0, count = tablePtr->keyType, iPtr1 = arrayPtr;
678 count > 0; count--, iPtr1++) {
681 index = RANDOM_INDEX(tablePtr, index);
684 * Search all of the entries in the appropriate bucket.
687 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
688 hPtr = hPtr->nextPtr) {
689 for (iPtr1 = arrayPtr, iPtr2 = hPtr->key.words,
690 count = tablePtr->keyType; ; count--, iPtr1++, iPtr2++) {
694 if (*iPtr1 != *iPtr2) {
703 *----------------------------------------------------------------------
707 * Given a hash table with one-word keys, and a one-word key, find
708 * the entry with a matching key. If there is no matching entry,
709 * then create a new entry that does match.
712 * The return value is a pointer to the matching entry. If this
713 * is a newly-created entry, then *newPtr will be set to a non-zero
714 * value; otherwise *newPtr will be set to 0. If this is a new
715 * entry the value stored in the entry will initially be 0.
718 * A new entry may be added to the hash table.
720 *----------------------------------------------------------------------
723 static Tcl_HashEntry *
724 ArrayCreate(tablePtr, key, newPtr)
725 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
726 register char *key; /* Key to use to find or create matching
728 int *newPtr; /* Store info here telling whether a new
729 * entry was created. */
731 register Tcl_HashEntry *hPtr;
732 int *arrayPtr = (int *) key;
733 register int *iPtr1, *iPtr2;
736 for (index = 0, count = tablePtr->keyType, iPtr1 = arrayPtr;
737 count > 0; count--, iPtr1++) {
740 index = RANDOM_INDEX(tablePtr, index);
743 * Search all of the entries in the appropriate bucket.
746 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
747 hPtr = hPtr->nextPtr) {
748 for (iPtr1 = arrayPtr, iPtr2 = hPtr->key.words,
749 count = tablePtr->keyType; ; count--, iPtr1++, iPtr2++) {
754 if (*iPtr1 != *iPtr2) {
761 * Entry not found. Add a new one to the bucket.
765 hPtr = (Tcl_HashEntry *) ckalloc((unsigned) (sizeof(Tcl_HashEntry)
766 + (tablePtr->keyType*sizeof(int)) - 4));
767 hPtr->tablePtr = tablePtr;
768 hPtr->bucketPtr = &(tablePtr->buckets[index]);
769 hPtr->nextPtr = *hPtr->bucketPtr;
770 hPtr->clientData = 0;
771 for (iPtr1 = arrayPtr, iPtr2 = hPtr->key.words, count = tablePtr->keyType;
772 count > 0; count--, iPtr1++, iPtr2++) {
775 *hPtr->bucketPtr = hPtr;
776 tablePtr->numEntries++;
779 * If the table has exceeded a decent size, rebuild it with many
783 if (tablePtr->numEntries >= tablePtr->rebuildSize) {
784 RebuildTable(tablePtr);
790 *----------------------------------------------------------------------
794 * This procedure is invoked when an Tcl_FindHashEntry is called
795 * on a table that has been deleted.
798 * If panic returns (which it shouldn't) this procedure returns
804 *----------------------------------------------------------------------
808 static Tcl_HashEntry *
809 BogusFind(tablePtr, key)
810 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
811 char *key; /* Key to use to find matching entry. */
813 panic("called Tcl_FindHashEntry on deleted table");
818 *----------------------------------------------------------------------
822 * This procedure is invoked when an Tcl_CreateHashEntry is called
823 * on a table that has been deleted.
826 * If panic returns (which it shouldn't) this procedure returns
832 *----------------------------------------------------------------------
836 static Tcl_HashEntry *
837 BogusCreate(tablePtr, key, newPtr)
838 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
839 char *key; /* Key to use to find or create matching
841 int *newPtr; /* Store info here telling whether a new
842 * entry was created. */
844 panic("called Tcl_CreateHashEntry on deleted table");
849 *----------------------------------------------------------------------
853 * This procedure is invoked when the ratio of entries to hash
854 * buckets becomes too large. It creates a new table with a
855 * larger bucket array and moves all of the entries into the
862 * Memory gets reallocated and entries get re-hashed to new
865 *----------------------------------------------------------------------
869 RebuildTable(tablePtr)
870 register Tcl_HashTable *tablePtr; /* Table to enlarge. */
872 int oldSize, count, index;
873 Tcl_HashEntry **oldBuckets;
874 register Tcl_HashEntry **oldChainPtr, **newChainPtr;
875 register Tcl_HashEntry *hPtr;
877 oldSize = tablePtr->numBuckets;
878 oldBuckets = tablePtr->buckets;
881 * Allocate and initialize the new bucket array, and set up
882 * hashing constants for new array size.
885 tablePtr->numBuckets *= 4;
886 tablePtr->buckets = (Tcl_HashEntry **) ckalloc((unsigned)
887 (tablePtr->numBuckets * sizeof(Tcl_HashEntry *)));
888 for (count = tablePtr->numBuckets, newChainPtr = tablePtr->buckets;
889 count > 0; count--, newChainPtr++) {
892 tablePtr->rebuildSize *= 4;
893 tablePtr->downShift -= 2;
894 tablePtr->mask = (tablePtr->mask << 2) + 3;
897 * Rehash all of the existing entries into the new bucket array.
900 for (oldChainPtr = oldBuckets; oldSize > 0; oldSize--, oldChainPtr++) {
901 for (hPtr = *oldChainPtr; hPtr != NULL; hPtr = *oldChainPtr) {
902 *oldChainPtr = hPtr->nextPtr;
903 if (tablePtr->keyType == TCL_STRING_KEYS) {
904 index = HashString(hPtr->key.string) & tablePtr->mask;
905 } else if (tablePtr->keyType == TCL_ONE_WORD_KEYS) {
906 index = RANDOM_INDEX(tablePtr, hPtr->key.oneWordValue);
911 for (index = 0, count = tablePtr->keyType,
912 iPtr = hPtr->key.words; count > 0; count--, iPtr++) {
915 index = RANDOM_INDEX(tablePtr, index);
917 hPtr->bucketPtr = &(tablePtr->buckets[index]);
918 hPtr->nextPtr = *hPtr->bucketPtr;
919 *hPtr->bucketPtr = hPtr;
924 * Free up the old bucket array, if it was dynamically allocated.
927 if (oldBuckets != tablePtr->staticBuckets) {
928 ckfree((char *) oldBuckets);