Re-land: "[Support] Replace HashString with djbHash."

author Jonas Devlieghere <jonas@devlieghere.com>

Mon, 26 Feb 2018 15:16:42 +0000 (15:16 +0000)

committer Jonas Devlieghere <jonas@devlieghere.com>

Mon, 26 Feb 2018 15:16:42 +0000 (15:16 +0000)
author Jonas Devlieghere <jonas@devlieghere.com>
Mon, 26 Feb 2018 15:16:42 +0000 (15:16 +0000)
committer Jonas Devlieghere <jonas@devlieghere.com>
Mon, 26 Feb 2018 15:16:42 +0000 (15:16 +0000)
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h

index 60652f8..45f6677 100644 (file)
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -232,19 +232,6 @@ void SplitString(StringRef Source,
                   SmallVectorImpl<StringRef> &OutFragments,
                   StringRef Delimiters = " \t\n\v\f\r");
  
-/// HashString - Hash function for strings.
-///
-/// This is the Bernstein hash function.
-//
-// FIXME: Investigate whether a modified bernstein hash function performs
-// better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
-//   X*33+c -> X*33^c
-inline unsigned HashString(StringRef Str, unsigned Result = 0) {
-  for (StringRef::size_type i = 0, e = Str.size(); i != e; ++i)
-    Result = Result * 33 + (unsigned char)Str[i];
-  return Result;
-}
-
  /// Returns the English suffix for an ordinal integer (-st, -nd, -rd, -th).
  inline StringRef getOrdinalSuffix(unsigned Val) {
    // It is critically important that we do this perfectly for
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp

index 9382c3c..79262cc 100644 (file)
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -14,6 +14,7 @@
  #include "llvm/ADT/StringMap.h"
  #include "llvm/ADT/StringExtras.h"
  #include "llvm/Support/Compiler.h"
+#include "llvm/Support/DJB.h"
  #include "llvm/Support/MathExtras.h"
  #include <cassert>
  
@@ -32,7 +33,7 @@ static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) {
  
  StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
    ItemSize = itemSize;
-  
+
    // If a size is specified, initialize the table with that many buckets.
    if (InitSize) {
      // The table will grow when the number of entries reach 3/4 of the number of
@@ -41,7 +42,7 @@ StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
      init(getMinBucketToReserveForEntries(InitSize));
      return;
    }
-  
+
    // Otherwise, initialize it with zero buckets to avoid the allocation.
    TheTable = nullptr;
    NumBuckets = 0;
@@ -56,7 +57,7 @@ void StringMapImpl::init(unsigned InitSize) {
    unsigned NewNumBuckets = InitSize ? InitSize : 16;
    NumItems = 0;
    NumTombstones = 0;
-  
+
    TheTable = static_cast<StringMapEntryBase **>(
        std::calloc(NewNumBuckets+1,
                    sizeof(StringMapEntryBase **) + sizeof(unsigned)));
@@ -82,7 +83,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
      init(16);
      HTSize = NumBuckets;
    }
-  unsigned FullHashValue = HashString(Name);
+  unsigned FullHashValue = djbHash(Name, 0);
    unsigned BucketNo = FullHashValue & (HTSize-1);
    unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
  
@@ -98,11 +99,11 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
          HashTable[FirstTombstone] = FullHashValue;
          return FirstTombstone;
        }
-      
+
        HashTable[BucketNo] = FullHashValue;
        return BucketNo;
      }
-    
+
      if (BucketItem == getTombstoneVal()) {
        // Skip over tombstones.  However, remember the first one we see.
        if (FirstTombstone == -1) FirstTombstone = BucketNo;
@@ -111,7 +112,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
        // case here is that we are only looking at the buckets (for item info
        // being non-null and for the full hash value) not at the items.  This
        // is important for cache locality.
-      
+
        // Do the comparison like this because Name isn't necessarily
        // null-terminated!
        char *ItemStr = (char*)BucketItem+ItemSize;
@@ -120,10 +121,10 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
          return BucketNo;
        }
      }
-    
+
      // Okay, we didn't find the item.  Probe to the next bucket.
      BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
-    
+
      // Use quadratic probing, it has fewer clumping artifacts than linear
      // probing and has good cache behavior in the common case.
      ++ProbeAmt;
@@ -136,7 +137,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
  int StringMapImpl::FindKey(StringRef Key) const {
    unsigned HTSize = NumBuckets;
    if (HTSize == 0) return -1;  // Really empty table?
-  unsigned FullHashValue = HashString(Key);
+  unsigned FullHashValue = djbHash(Key, 0);
    unsigned BucketNo = FullHashValue & (HTSize-1);
    unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
  
@@ -146,7 +147,7 @@ int StringMapImpl::FindKey(StringRef Key) const {
      // If we found an empty bucket, this key isn't in the table yet, return.
      if (LLVM_LIKELY(!BucketItem))
        return -1;
-    
+
      if (BucketItem == getTombstoneVal()) {
        // Ignore tombstones.
      } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
@@ -154,7 +155,7 @@ int StringMapImpl::FindKey(StringRef Key) const {
        // case here is that we are only looking at the buckets (for item info
        // being non-null and for the full hash value) not at the items.  This
        // is important for cache locality.
-      
+
        // Do the comparison like this because NameStart isn't necessarily
        // null-terminated!
        char *ItemStr = (char*)BucketItem+ItemSize;
@@ -163,10 +164,10 @@ int StringMapImpl::FindKey(StringRef Key) const {
          return BucketNo;
        }
      }
-    
+
      // Okay, we didn't find the item.  Probe to the next bucket.
      BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
-    
+
      // Use quadratic probing, it has fewer clumping artifacts than linear
      // probing and has good cache behavior in the common case.
      ++ProbeAmt;
@@ -187,7 +188,7 @@ void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
  StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
    int Bucket = FindKey(Key);
    if (Bucket == -1) return nullptr;
-  
+
    StringMapEntryBase *Result = TheTable[Bucket];
    TheTable[Bucket] = getTombstoneVal();
    --NumItems;
@@ -241,13 +242,13 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
            NewBucketNo = NewBucket;
          continue;
        }
-      
+
        // Otherwise probe for a spot.
        unsigned ProbeSize = 1;
        do {
          NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
        } while (NewTableArray[NewBucket]);
-      
+
        // Finally found a slot.  Fill it in.
        NewTableArray[NewBucket] = Bucket;
        NewHashArray[NewBucket] = FullHash;
@@ -255,9 +256,9 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
          NewBucketNo = NewBucket;
      }
    }
-  
+
    free(TheTable);
-  
+
    TheTable = NewTableArray;
    NumBuckets = NewSize;
    NumTombstones = 0;
author	Jonas Devlieghere <jonas@devlieghere.com>
	Mon, 26 Feb 2018 15:16:42 +0000 (15:16 +0000)
committer	Jonas Devlieghere <jonas@devlieghere.com>
	Mon, 26 Feb 2018 15:16:42 +0000 (15:16 +0000)
include/llvm/ADT/StringExtras.h		patch \| blob \| history
lib/Support/StringMap.cpp		patch \| blob \| history