lib/Support/StringMap.cpp

   1 //===--- StringMap.cpp - String Hash table map implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by Chris Lattner and is distributed under
   6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the StringMap class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/ADT/StringMap.h"
  15 #include <cassert>
  16 using namespace llvm;
  17
  18 StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
  19   ItemSize = itemSize;
  20
  21   // If a size is specified, initialize the table with that many buckets.
  22   if (InitSize) {
  23     init(InitSize);
  24     return;
  25   }
  26
  27   // Otherwise, initialize it with zero buckets to avoid the allocation.
  28   TheTable = 0;
  29   NumBuckets = 0;
  30   NumItems = 0;
  31   NumTombstones = 0;
  32 }
  33
  34 void StringMapImpl::init(unsigned InitSize) {
  35   assert((InitSize & (InitSize-1)) == 0 &&
  36          "Init Size must be a power of 2 or zero!");
  37   NumBuckets = InitSize ? InitSize : 16;
  38   NumItems = 0;
  39   NumTombstones = 0;
  40
  41   TheTable = new ItemBucket[NumBuckets+1]();
  42   memset(TheTable, 0, NumBuckets*sizeof(ItemBucket));
  43
  44   // Allocate one extra bucket, set it to look filled so the iterators stop at
  45   // end.
  46   TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
  47 }
  48
  49
  50 /// HashString - Compute a hash code for the specified string.
  51 ///
  52 static unsigned HashString(const char *Start, const char *End) {
  53   // Bernstein hash function.
  54   unsigned int Result = 0;
  55   // TODO: investigate whether a modified bernstein hash function performs
  56   // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
  57   //   X*33+c -> X*33^c
  58   while (Start != End)
  59     Result = Result * 33 + *Start++;
  60   Result = Result + (Result >> 5);
  61   return Result;
  62 }
  63
  64 /// LookupBucketFor - Look up the bucket that the specified string should end
  65 /// up in.  If it already exists as a key in the map, the Item pointer for the
  66 /// specified bucket will be non-null.  Otherwise, it will be null.  In either
  67 /// case, the FullHashValue field of the bucket will be set to the hash value
  68 /// of the string.
  69 unsigned StringMapImpl::LookupBucketFor(const char *NameStart,
  70                                         const char *NameEnd) {
  71   unsigned HTSize = NumBuckets;
  72   if (HTSize == 0) {  // Hash table unallocated so far?
  73     init(16);
  74     HTSize = NumBuckets;
  75   }
  76   unsigned FullHashValue = HashString(NameStart, NameEnd);
  77   unsigned BucketNo = FullHashValue & (HTSize-1);
  78
  79   unsigned ProbeAmt = 1;
  80   int FirstTombstone = -1;
  81   while (1) {
  82     ItemBucket &Bucket = TheTable[BucketNo];
  83     StringMapEntryBase *BucketItem = Bucket.Item;
  84     // If we found an empty bucket, this key isn't in the table yet, return it.
  85     if (BucketItem == 0) {
  86       // If we found a tombstone, we want to reuse the tombstone instead of an
  87       // empty bucket.  This reduces probing.
  88       if (FirstTombstone != -1) {
  89         TheTable[FirstTombstone].FullHashValue = FullHashValue;
  90         return FirstTombstone;
  91       }
  92
  93       Bucket.FullHashValue = FullHashValue;
  94       return BucketNo;
  95     }
  96
  97     if (BucketItem == getTombstoneVal()) {
  98       // Skip over tombstones.  However, remember the first one we see.
  99       if (FirstTombstone == -1) FirstTombstone = BucketNo;
 100     } else if (Bucket.FullHashValue == FullHashValue) {
 101       // If the full hash value matches, check deeply for a match.  The common
 102       // case here is that we are only looking at the buckets (for item info
 103       // being non-null and for the full hash value) not at the items.  This
 104       // is important for cache locality.
 105
 106       // Do the comparison like this because NameStart isn't necessarily
 107       // null-terminated!
 108       char *ItemStr = (char*)BucketItem+ItemSize;
 109       unsigned ItemStrLen = BucketItem->getKeyLength();
 110       if (unsigned(NameEnd-NameStart) == ItemStrLen &&
 111           memcmp(ItemStr, NameStart, ItemStrLen) == 0) {
 112         // We found a match!
 113         return BucketNo;
 114       }
 115     }
 116
 117     // Okay, we didn't find the item.  Probe to the next bucket.
 118     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
 119
 120     // Use quadratic probing, it has fewer clumping artifacts than linear
 121     // probing and has good cache behavior in the common case.
 122     ++ProbeAmt;
 123   }
 124 }
 125
 126
 127 /// FindKey - Look up the bucket that contains the specified key. If it exists
 128 /// in the map, return the bucket number of the key.  Otherwise return -1.
 129 /// This does not modify the map.
 130 int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const {
 131   unsigned HTSize = NumBuckets;
 132   if (HTSize == 0) return -1;  // Really empty table?
 133   unsigned FullHashValue = HashString(KeyStart, KeyEnd);
 134   unsigned BucketNo = FullHashValue & (HTSize-1);
 135
 136   unsigned ProbeAmt = 1;
 137   while (1) {
 138     ItemBucket &Bucket = TheTable[BucketNo];
 139     StringMapEntryBase *BucketItem = Bucket.Item;
 140     // If we found an empty bucket, this key isn't in the table yet, return.
 141     if (BucketItem == 0)
 142       return -1;
 143
 144     if (BucketItem == getTombstoneVal()) {
 145       // Ignore tombstones.
 146     } else if (Bucket.FullHashValue == FullHashValue) {
 147       // If the full hash value matches, check deeply for a match.  The common
 148       // case here is that we are only looking at the buckets (for item info
 149       // being non-null and for the full hash value) not at the items.  This
 150       // is important for cache locality.
 151
 152       // Do the comparison like this because NameStart isn't necessarily
 153       // null-terminated!
 154       char *ItemStr = (char*)BucketItem+ItemSize;
 155       unsigned ItemStrLen = BucketItem->getKeyLength();
 156       if (unsigned(KeyEnd-KeyStart) == ItemStrLen &&
 157           memcmp(ItemStr, KeyStart, ItemStrLen) == 0) {
 158         // We found a match!
 159         return BucketNo;
 160       }
 161     }
 162
 163     // Okay, we didn't find the item.  Probe to the next bucket.
 164     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
 165
 166     // Use quadratic probing, it has fewer clumping artifacts than linear
 167     // probing and has good cache behavior in the common case.
 168     ++ProbeAmt;
 169   }
 170 }
 171
 172 /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
 173 /// delete it.  This aborts if the value isn't in the table.
 174 void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
 175   const char *VStr = (char*)V + ItemSize;
 176   StringMapEntryBase *V2 = RemoveKey(VStr, VStr+V->getKeyLength());
 177   V2 = V2;
 178   assert(V == V2 && "Didn't find key?");
 179 }
 180
 181 /// RemoveKey - Remove the StringMapEntry for the specified key from the
 182 /// table, returning it.  If the key is not in the table, this returns null.
 183 StringMapEntryBase *StringMapImpl::RemoveKey(const char *KeyStart,
 184                                              const char *KeyEnd) {
 185   int Bucket = FindKey(KeyStart, KeyEnd);
 186   if (Bucket == -1) return 0;
 187
 188   StringMapEntryBase *Result = TheTable[Bucket].Item;
 189   TheTable[Bucket].Item = getTombstoneVal();
 190   --NumItems;
 191   ++NumTombstones;
 192   return Result;
 193 }
 194
 195
 196
 197 /// RehashTable - Grow the table, redistributing values into the buckets with
 198 /// the appropriate mod-of-hashtable-size.
 199 void StringMapImpl::RehashTable() {
 200   unsigned NewSize = NumBuckets*2;
 201   // Allocate one extra bucket which will always be non-empty.  This allows the
 202   // iterators to stop at end.
 203   ItemBucket *NewTableArray = new ItemBucket[NewSize+1]();
 204   memset(NewTableArray, 0, NewSize*sizeof(ItemBucket));
 205   NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
 206
 207   // Rehash all the items into their new buckets.  Luckily :) we already have
 208   // the hash values available, so we don't have to rehash any strings.
 209   for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
 210     if (IB->Item && IB->Item != getTombstoneVal()) {
 211       // Fast case, bucket available.
 212       unsigned FullHash = IB->FullHashValue;
 213       unsigned NewBucket = FullHash & (NewSize-1);
 214       if (NewTableArray[NewBucket].Item == 0) {
 215         NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
 216         NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
 217         continue;
 218       }
 219
 220       // Otherwise probe for a spot.
 221       unsigned ProbeSize = 1;
 222       do {
 223         NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
 224       } while (NewTableArray[NewBucket].Item);
 225
 226       // Finally found a slot.  Fill it in.
 227       NewTableArray[NewBucket].Item = IB->Item;
 228       NewTableArray[NewBucket].FullHashValue = FullHash;
 229     }
 230   }
 231
 232   delete[] TheTable;
 233
 234   TheTable = NewTableArray;
 235   NumBuckets = NewSize;
 236 }