1 /* Yash: yet another shell */
2 /* hashtable.c: hashtable library */
3 /* (C) 2007-2012 magicant */
5 /* This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 #include "hashtable.h"
28 /* A hashtable is a mapping from keys to values.
29 * Keys and values are all of type (void *).
30 * NULL is allowed as a value, but not as a key.
31 * The capacity of a hashtable is always no less than one. */
33 /* The hashtable_T structure is defined as follows:
34 * struct hashtable_T {
37 * hashfunc_T *hashfunc;
42 * struct hash_entry *entries;
44 * `capacity' is the size of array `entries'.
45 * `count' is the number of entries contained in the hashtable.
46 * `hashfunc' is a pointer to the hash function.
47 * `keycmp' is a pointer to the function that compares keys.
48 * `emptyindex' is the index of the first empty entry.
49 * `tailindex' is the index of the first tail entry.
50 * `indices' is a pointer to the bucket array.
51 * `entries' is a pointer to the array of entries.
53 * The collision resolution strategy used in this implementation is a kind of
54 * separate chaining, but it differs from normal chaining in that entries are
55 * stored in a single array (`entries'). An advantage over normal chaining,
56 * which stores entries in linked lists, is spatial locality: entries can be
57 * quickly referenced because they are collected in one array. Another advantage
58 * is that we don't have to call `malloc' or `free' each time an entry is added
62 //#define DEBUG_HASH 1
63 #if DEBUG_HASH /* For debugging */
64 # define DEBUG_PRINT_STATISTICS(ht) (print_statistics(ht))
66 static void print_statistics(const hashtable_T *ht);
68 # define DEBUG_PRINT_STATISTICS(ht) ((void) 0)
73 #define NOTHING ((size_t) -1)
81 /* An entry is occupied iff `.kv.key' is non-NULL.
82 * When an entry is unoccupied, the values of the other members of the entry are
86 /* Initializes a hashtable with the specified capacity.
87 * `hashfunc' is a hash function to hash keys.
88 * `keycmp' is a function that compares two keys. */
89 hashtable_T *ht_initwithcapacity(
90 hashtable_T *ht, hashfunc_T *hashfunc, keycmp_T *keycmp,
96 ht->capacity = capacity;
98 ht->hashfunc = hashfunc;
100 ht->emptyindex = NOTHING;
102 ht->indices = xmallocn(capacity, sizeof *ht->indices);
103 ht->entries = xmallocn(capacity, sizeof *ht->entries);
105 for (size_t i = 0; i < capacity; i++) {
106 ht->indices[i] = NOTHING;
107 ht->entries[i].kv.key = NULL;
113 /* Changes the capacity of the specified hashtable.
114 * If the specified new capacity is smaller than the number of the entries in
115 * the hashtable, the capacity is not changed.
116 * Note that the capacity cannot be zero. If `newcapacity' is zero, it is
117 * assumed to be one. */
118 /* Capacity should be an odd integer, especially a prime number. */
119 hashtable_T *ht_setcapacity(hashtable_T *ht, size_t newcapacity)
121 if (newcapacity == 0)
123 if (newcapacity < ht->count)
124 newcapacity = ht->count;
126 size_t oldcapacity = ht->capacity;
127 size_t *oldindices = ht->indices;
128 size_t *newindices = xmallocn(newcapacity, sizeof *ht->indices);
129 struct hash_entry *oldentries = ht->entries;
130 struct hash_entry *newentries = xmallocn(newcapacity, sizeof *ht->entries);
133 for (size_t i = 0; i < newcapacity; i++) {
134 newindices[i] = NOTHING;
135 newentries[i].kv.key = NULL;
138 /* move the data from oldentries to newentries */
139 for (size_t i = 0; i < oldcapacity; i++) {
140 void *key = oldentries[i].kv.key;
142 hashval_T hash = oldentries[i].hash;
143 size_t newindex = (size_t) hash % newcapacity;
144 newentries[tail] = (struct hash_entry) {
145 .next = newindices[newindex],
147 .kv = oldentries[i].kv,
149 newindices[newindex] = tail;
156 ht->capacity = newcapacity;
157 ht->emptyindex = NOTHING;
158 ht->tailindex = tail;
159 ht->indices = newindices;
160 ht->entries = newentries;
164 /* Increases the capacity as large as necessary
165 * so that the capacity is no less than the specified. */
166 hashtable_T *ht_ensurecapacity(hashtable_T *ht, size_t capacity)
168 if (capacity <= ht->capacity)
171 size_t cap15 = ht->capacity + (ht->capacity >> 1);
172 if (capacity < cap15)
174 if (capacity < ht->capacity + 6)
175 capacity = ht->capacity + 6;
176 return ht_setcapacity(ht, capacity);
179 /* Removes all the entries of a hashtable.
180 * If `freer' is non-NULL, it is called for each entry removed (in an
181 * unspecified order).
182 * The capacity of the hashtable is not changed. */
183 hashtable_T *ht_clear(hashtable_T *ht, void freer(kvpair_T kv))
185 size_t *indices = ht->indices;
186 struct hash_entry *entries = ht->entries;
191 for (size_t i = 0, cap = ht->capacity; i < cap; i++) {
192 indices[i] = NOTHING;
193 if (entries[i].kv.key != NULL) {
195 freer(entries[i].kv);
196 entries[i].kv.key = NULL;
201 ht->emptyindex = NOTHING;
206 /* Returns the entry whose key is equal to the specified `key',
207 * or { NULL, NULL } if `key' is NULL or there is no such entry. */
208 kvpair_T ht_get(const hashtable_T *ht, const void *key)
211 hashval_T hash = ht->hashfunc(key);
212 size_t index = ht->indices[(size_t) hash % ht->capacity];
213 while (index != NOTHING) {
214 struct hash_entry *entry = &ht->entries[index];
215 if (entry->hash == hash && ht->keycmp(entry->kv.key, key) == 0)
220 return (kvpair_T) { NULL, NULL, };
223 /* Makes a new entry with the specified key and value,
224 * removing and returning the old entry for the key.
225 * If there is no such old entry, { NULL, NULL } is returned.
226 * `key' must not be NULL. */
227 kvpair_T ht_set(hashtable_T *ht, const void *key, const void *value)
231 /* if there is an entry with the specified key, simply replace the value */
232 hashval_T hash = ht->hashfunc(key);
233 size_t mhash = (size_t) hash % ht->capacity;
234 size_t index = ht->indices[mhash];
235 struct hash_entry *entry;
236 while (index != NOTHING) {
237 entry = &ht->entries[index];
238 if (entry->hash == hash && ht->keycmp(entry->kv.key, key) == 0) {
239 kvpair_T oldkv = entry->kv;
240 entry->kv = (kvpair_T) { (void *) key, (void *) value, };
241 DEBUG_PRINT_STATISTICS(ht);
247 /* No entry with the specified key was found; we add a new entry. */
248 index = ht->emptyindex;
249 if (index != NOTHING) {
250 /* if there is an empty entry, use it */
251 entry = &ht->entries[index];
252 ht->emptyindex = entry->next;
254 /* if there is no empty entry, use a tail entry */
255 ht_ensurecapacity(ht, ht->count + 1);
256 mhash = (size_t) hash % ht->capacity;
257 index = ht->tailindex++;
258 entry = &ht->entries[index];
260 *entry = (struct hash_entry) {
261 .next = ht->indices[mhash],
263 .kv = (kvpair_T) { (void *) key, (void *) value, },
265 ht->indices[mhash] = index;
267 DEBUG_PRINT_STATISTICS(ht);
268 return (kvpair_T) { NULL, NULL, };
271 /* Removes and returns the entry with the specified key.
272 * If `key' is NULL or there is no such entry, { NULL, NULL } is returned. */
273 kvpair_T ht_remove(hashtable_T *ht, const void *key)
276 hashval_T hash = ht->hashfunc(key);
277 size_t *indexp = &ht->indices[(size_t) hash % ht->capacity];
278 while (*indexp != NOTHING) {
279 size_t index = *indexp;
280 struct hash_entry *entry = &ht->entries[index];
281 if (entry->hash == hash && ht->keycmp(entry->kv.key, key) == 0) {
282 kvpair_T oldkv = entry->kv;
283 *indexp = entry->next;
284 entry->next = ht->emptyindex;
285 ht->emptyindex = index;
286 entry->kv.key = NULL;
290 indexp = &entry->next;
293 return (kvpair_T) { NULL, NULL, };
298 /* Calls the specified function `f' for each entry in the specified hashtable.
299 * The order in which the entries are applied the function to is unspecified.
300 * If `f' returns a non-zero value for some entry, `f' is not called any more
301 * and `ht_each' immediately returns the non-zero value. Otherwise, that is,
302 * if `f' returns zero for all the entry, `ht_each' also returns zero.
303 * You must not add or remove any entry inside function `f'. */
304 int ht_each(const hashtable_T *ht, int f(kvpair_T kv))
306 struct hash_entry *entries = ht->entries;
308 for (size_t i = 0, cap = ht->capacity; i < cap; i++) {
309 kvpair_T kv = entries[i].kv;
310 if (kv.key != NULL) {
321 /* Iterates the entries of the specified hashtable.
322 * When starting new iteration, `*indexp' must have been initialized to zero.
323 * Each time this function is called, it updates `*indexp' and returns one
325 * You must not change the value of `*indexp' from outside this function or
326 * add/remove any entry in the hashtable until the iteration finishes.
327 * Each entry is returned exactly once, in an unspecified order.
328 * If there is no more entry to be iterated, { NULL, NULL } is returned. */
329 kvpair_T ht_next(const hashtable_T *restrict ht, size_t *restrict indexp)
331 while (*indexp < ht->capacity) {
332 kvpair_T kv = ht->entries[*indexp].kv;
337 return (kvpair_T) { NULL, NULL, };
340 /* Returns a newly malloced array of key-value pairs that contains all the
341 * elements of the specified hashtable.
342 * The returned array is terminated by the { NULL, NULL } element. */
343 kvpair_T *ht_tokvarray(const hashtable_T *ht)
345 kvpair_T *array = xmalloce(ht->count, 1, sizeof *array);
348 for (size_t i = 0; i < ht->capacity; i++) {
349 if (ht->entries[i].kv.key != NULL)
350 array[index++] = ht->entries[i].kv;
353 assert(index == ht->count);
354 array[index] = (kvpair_T) { NULL, NULL, };
359 /* A hash function for a byte string.
360 * The argument is a pointer to a byte string (const char *).
361 * You can use `htstrcmp' as a corresponding comparison function. */
362 hashval_T hashstr(const void *s)
364 /* The hashing algorithm is FNV hash.
365 * Cf. http://www.isthe.com/chongo/tech/comp/fnv/ */
366 const unsigned char *c = s;
369 h = (h ^ (hashval_T) *c++) * FNVPRIME;
373 /* A hash function for a wide string.
374 * The argument is a pointer to a wide string (const wchar_t *).
375 * You can use `htwcscmp' for a corresponding comparison function. */
376 hashval_T hashwcs(const void *s)
378 /* The hashing algorithm is a slightly modified version of FNV hash.
379 * Cf. http://www.isthe.com/chongo/tech/comp/fnv/ */
380 const wchar_t *c = s;
383 h = (h ^ (hashval_T) *c++) * FNVPRIME;
387 /* A comparison function for wide strings.
388 * The arguments are pointers to wide strings (const wchar_t *).
389 * You can use `hashwcs' for a corresponding hash function. */
390 int htwcscmp(const void *s1, const void *s2)
392 return wcscmp((const wchar_t *) s1, (const wchar_t *) s2);
395 /* A comparison function for key-value pairs with multibyte-string keys.
396 * The arguments are pointers to kvpair_T's (const kvpair_T *) whose keys are
397 * multibyte strings. */
398 int keystrcoll(const void *k1, const void *k2)
400 return strcoll(((const kvpair_T *) k1)->key, ((const kvpair_T *) k2)->key);
403 /* A comparison function for key-value pairs with wide-string keys.
404 * The arguments are pointers to kvpair_T's (const kvpair_T *) whose keys are
406 int keywcscoll(const void *k1, const void *k2)
408 return wcscoll(((const kvpair_T *) k1)->key, ((const kvpair_T *) k2)->key);
411 /* `Free's the key of the specified key-value pair.
412 * Can be used as the freer function to `ht_clear'. */
413 void kfree(kvpair_T kv)
418 /* `Free's the value of the specified key-value pair.
419 * Can be used as the freer function to `ht_clear'. */
420 void vfree(kvpair_T kv)
425 /* `Free's the key and the value of the specified key-value pair.
426 * Can be used as the freer function to `ht_clear'. */
427 void kvfree(kvpair_T kv)
435 /* Prints statistics.
436 * This function is used in debugging. */
437 void print_statistics(const hashtable_T *ht)
439 fprintf(stderr, "DEBUG: id=%p hash->count=%zu, capacity=%zu\n",
440 (void *) ht, ht->count, ht->capacity);
441 fprintf(stderr, "DEBUG: hash->emptyindex=%zu, tailindex=%zu\n",
442 ht->emptyindex, ht->tailindex);
444 unsigned emptycount = 0, collcount = 0;
445 for (size_t i = ht->emptyindex; i != NOTHING; i = ht->entries[i].next)
447 for (size_t i = 0; i < ht->capacity; i++)
448 if (ht->entries[i].kv.key && ht->entries[i].next != NOTHING)
450 fprintf(stderr, "DEBUG: hash empties=%u collisions=%u\n\n",
451 emptycount, collcount);
456 /* vim: set ts=8 sts=4 sw=4 noet tw=80: */