ia64: use %*pb[l] to print bitmaps including cpumasks and nodemasks

[sagit-ice-cold/kernel_xiaomi_msm8998.git] / lib / rhashtable.c
diff --git a/lib/rhashtable.c b/lib/rhashtable.c

index c41e210..9cc4c4a 100644 (file)
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1,7 +1,7 @@
  /*
   * Resizable, Scalable, Concurrent Hash Table
   *
- * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>
+ * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
   * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
   *
   * Based on the following paper:
@@ -23,6 +23,7 @@
  #include <linux/jhash.h>
  #include <linux/random.h>
  #include <linux/rhashtable.h>
+#include <linux/err.h>
  
  #define HASH_DEFAULT_SIZE      64UL
  #define HASH_MIN_SIZE          4UL
@@ -34,12 +35,17 @@
  enum {
         RHT_LOCK_NORMAL,
         RHT_LOCK_NESTED,
-       RHT_LOCK_NESTED2,
  };
  
  /* The bucket lock is selected based on the hash and protects mutations
   * on a group of hash buckets.
   *
+ * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
+ * a single lock always covers both buckets which may both contains
+ * entries which link to the same bucket of the old table during resizing.
+ * This allows to simplify the locking as locking the bucket in both
+ * tables during resize always guarantee protection.
+ *
   * IMPORTANT: When holding the bucket lock of both the old and new table
   * during expansions and shrinking, the old bucket lock must always be
   * acquired first.
@@ -49,26 +55,6 @@ static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
         return &tbl->locks[hash & tbl->locks_mask];
  }
  
-#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
-#define ASSERT_BUCKET_LOCK(TBL, HASH) \
-       BUG_ON(!lockdep_rht_bucket_is_held(TBL, HASH))
-
-#ifdef CONFIG_PROVE_LOCKING
-int lockdep_rht_mutex_is_held(struct rhashtable *ht)
-{
-       return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
-}
-EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
-
-int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
-{
-       spinlock_t *lock = bucket_lock(tbl, hash);
-
-       return (debug_locks) ? lockdep_is_held(lock) : 1;
-}
-EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
-#endif
-
  static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
  {
         return (void *) he - ht->p.head_offset;
@@ -94,13 +80,7 @@ static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
  
  static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
  {
-       struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
-       u32 hash;
-
-       hash = ht->p.hashfn(key, len, ht->p.hash_rnd);
-       hash >>= HASH_RESERVED_SPACE;
-
-       return rht_bucket_index(tbl, hash);
+       return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE;
  }
  
  static u32 head_hashfn(const struct rhashtable *ht,
@@ -110,6 +90,77 @@ static u32 head_hashfn(const struct rhashtable *ht,
         return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he)));
  }
  
+#ifdef CONFIG_PROVE_LOCKING
+static void debug_dump_buckets(const struct rhashtable *ht,
+                              const struct bucket_table *tbl)
+{
+       struct rhash_head *he;
+       unsigned int i, hash;
+
+       for (i = 0; i < tbl->size; i++) {
+               pr_warn(" [Bucket %d] ", i);
+               rht_for_each_rcu(he, tbl, i) {
+                       hash = head_hashfn(ht, tbl, he);
+                       pr_cont("[hash = %#x, lock = %p] ",
+                               hash, bucket_lock(tbl, hash));
+               }
+               pr_cont("\n");
+       }
+
+}
+
+static void debug_dump_table(struct rhashtable *ht,
+                            const struct bucket_table *tbl,
+                            unsigned int hash)
+{
+       struct bucket_table *old_tbl, *future_tbl;
+
+       pr_emerg("BUG: lock for hash %#x in table %p not held\n",
+                hash, tbl);
+
+       rcu_read_lock();
+       future_tbl = rht_dereference_rcu(ht->future_tbl, ht);
+       old_tbl = rht_dereference_rcu(ht->tbl, ht);
+       if (future_tbl != old_tbl) {
+               pr_warn("Future table %p (size: %zd)\n",
+                       future_tbl, future_tbl->size);
+               debug_dump_buckets(ht, future_tbl);
+       }
+
+       pr_warn("Table %p (size: %zd)\n", old_tbl, old_tbl->size);
+       debug_dump_buckets(ht, old_tbl);
+
+       rcu_read_unlock();
+}
+
+#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
+#define ASSERT_BUCKET_LOCK(HT, TBL, HASH)                              \
+       do {                                                            \
+               if (unlikely(!lockdep_rht_bucket_is_held(TBL, HASH))) { \
+                       debug_dump_table(HT, TBL, HASH);                \
+                       BUG();                                          \
+               }                                                       \
+       } while (0)
+
+int lockdep_rht_mutex_is_held(struct rhashtable *ht)
+{
+       return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
+}
+EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
+
+int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
+{
+       spinlock_t *lock = bucket_lock(tbl, hash);
+
+       return (debug_locks) ? lockdep_is_held(lock) : 1;
+}
+EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
+#else
+#define ASSERT_RHT_MUTEX(HT)
+#define ASSERT_BUCKET_LOCK(HT, TBL, HASH)
+#endif
+
+
  static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n)
  {
         struct rhash_head __rcu **pprev;
@@ -134,8 +185,8 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
         nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
         size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
  
-       /* Never allocate more than one lock per bucket */
-       size = min_t(unsigned int, size, tbl->size);
+       /* Never allocate more than 0.5 locks per bucket */
+       size = min_t(unsigned int, size, tbl->size >> 1);
  
         if (sizeof(spinlock_t) != 0) {
  #ifdef CONFIG_NUMA
@@ -217,25 +268,48 @@ bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
  }
  EXPORT_SYMBOL_GPL(rht_shrink_below_30);
  
-static void hashtable_chain_unzip(const struct rhashtable *ht,
+static void lock_buckets(struct bucket_table *new_tbl,
+                        struct bucket_table *old_tbl, unsigned int hash)
+       __acquires(old_bucket_lock)
+{
+       spin_lock_bh(bucket_lock(old_tbl, hash));
+       if (new_tbl != old_tbl)
+               spin_lock_bh_nested(bucket_lock(new_tbl, hash),
+                                   RHT_LOCK_NESTED);
+}
+
+static void unlock_buckets(struct bucket_table *new_tbl,
+                          struct bucket_table *old_tbl, unsigned int hash)
+       __releases(old_bucket_lock)
+{
+       if (new_tbl != old_tbl)
+               spin_unlock_bh(bucket_lock(new_tbl, hash));
+       spin_unlock_bh(bucket_lock(old_tbl, hash));
+}
+
+/**
+ * Unlink entries on bucket which hash to different bucket.
+ *
+ * Returns true if no more work needs to be performed on the bucket.
+ */
+static bool hashtable_chain_unzip(struct rhashtable *ht,
                                   const struct bucket_table *new_tbl,
                                   struct bucket_table *old_tbl,
                                   size_t old_hash)
  {
         struct rhash_head *he, *p, *next;
-       spinlock_t *new_bucket_lock, *new_bucket_lock2 = NULL;
         unsigned int new_hash, new_hash2;
  
-       ASSERT_BUCKET_LOCK(old_tbl, old_hash);
+       ASSERT_BUCKET_LOCK(ht, old_tbl, old_hash);
  
         /* Old bucket empty, no work needed. */
         p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
                                    old_hash);
         if (rht_is_a_nulls(p))
-               return;
+               return false;
  
-       new_hash = new_hash2 = head_hashfn(ht, new_tbl, p);
-       new_bucket_lock = bucket_lock(new_tbl, new_hash);
+       new_hash = head_hashfn(ht, new_tbl, p);
+       ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash);
  
         /* Advance the old bucket pointer one or more times until it
          * reaches a node that doesn't hash to the same bucket as the
@@ -243,22 +317,14 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
          */
         rht_for_each_continue(he, p->next, old_tbl, old_hash) {
                 new_hash2 = head_hashfn(ht, new_tbl, he);
+               ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash2);
+
                 if (new_hash != new_hash2)
                         break;
                 p = he;
         }
         rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
  
-       spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
-
-       /* If we have encountered an entry that maps to a different bucket in
-        * the new table, lock down that bucket as well as we might cut off
-        * the end of the chain.
-        */
-       new_bucket_lock2 = bucket_lock(new_tbl, new_hash);
-       if (new_bucket_lock != new_bucket_lock2)
-               spin_lock_bh_nested(new_bucket_lock2, RHT_LOCK_NESTED2);
-
         /* Find the subsequent node which does hash to the same
          * bucket as node P, or NULL if no such node exists.
          */
@@ -277,21 +343,18 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
          */
         rcu_assign_pointer(p->next, next);
  
-       if (new_bucket_lock != new_bucket_lock2)
-               spin_unlock_bh(new_bucket_lock2);
-       spin_unlock_bh(new_bucket_lock);
+       p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
+                                  old_hash);
+
+       return !rht_is_a_nulls(p);
  }
  
-static void link_old_to_new(struct bucket_table *new_tbl,
+static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl,
                             unsigned int new_hash, struct rhash_head *entry)
  {
-       spinlock_t *new_bucket_lock;
+       ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash);
  
-       new_bucket_lock = bucket_lock(new_tbl, new_hash);
-
-       spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
         rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
-       spin_unlock_bh(new_bucket_lock);
  }
  
  /**
@@ -314,7 +377,6 @@ int rhashtable_expand(struct rhashtable *ht)
  {
         struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
         struct rhash_head *he;
-       spinlock_t *old_bucket_lock;
         unsigned int new_hash, old_hash;
         bool complete = false;
  
@@ -344,24 +406,16 @@ int rhashtable_expand(struct rhashtable *ht)
          */
         for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
                 old_hash = rht_bucket_index(old_tbl, new_hash);
-               old_bucket_lock = bucket_lock(old_tbl, old_hash);
-
-               spin_lock_bh(old_bucket_lock);
+               lock_buckets(new_tbl, old_tbl, new_hash);
                 rht_for_each(he, old_tbl, old_hash) {
                         if (head_hashfn(ht, new_tbl, he) == new_hash) {
-                               link_old_to_new(new_tbl, new_hash, he);
+                               link_old_to_new(ht, new_tbl, new_hash, he);
                                 break;
                         }
                 }
-               spin_unlock_bh(old_bucket_lock);
+               unlock_buckets(new_tbl, old_tbl, new_hash);
         }
  
-       /* Publish the new table pointer. Lookups may now traverse
-        * the new table, but they will not benefit from any
-        * additional efficiency until later steps unzip the buckets.
-        */
-       rcu_assign_pointer(ht->tbl, new_tbl);
-
         /* Unzip interleaved hash chains */
         while (!complete && !ht->being_destroyed) {
                 /* Wait for readers. All new readers will see the new
@@ -376,21 +430,19 @@ int rhashtable_expand(struct rhashtable *ht)
                  */
                 complete = true;
                 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
-                       struct rhash_head *head;
+                       lock_buckets(new_tbl, old_tbl, old_hash);
  
-                       old_bucket_lock = bucket_lock(old_tbl, old_hash);
-                       spin_lock_bh(old_bucket_lock);
-
-                       hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash);
-                       head = rht_dereference_bucket(old_tbl->buckets[old_hash],
-                                                     old_tbl, old_hash);
-                       if (!rht_is_a_nulls(head))
+                       if (hashtable_chain_unzip(ht, new_tbl, old_tbl,
+                                                 old_hash))
                                 complete = false;
  
-                       spin_unlock_bh(old_bucket_lock);
+                       unlock_buckets(new_tbl, old_tbl, old_hash);
                 }
         }
  
+       rcu_assign_pointer(ht->tbl, new_tbl);
+       synchronize_rcu();
+
         bucket_table_free(old_tbl);
         return 0;
  }
@@ -415,7 +467,6 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
  int rhashtable_shrink(struct rhashtable *ht)
  {
         struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht);
-       spinlock_t *new_bucket_lock, *old_bucket_lock1, *old_bucket_lock2;
         unsigned int new_hash;
  
         ASSERT_RHT_MUTEX(ht);
@@ -433,36 +484,17 @@ int rhashtable_shrink(struct rhashtable *ht)
          * always divide the size in half when shrinking, each bucket
          * in the new table maps to exactly two buckets in the old
          * table.
-        *
-        * As removals can occur concurrently on the old table, we need
-        * to lock down both matching buckets in the old table.
          */
         for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
-               old_bucket_lock1 = bucket_lock(tbl, new_hash);
-               old_bucket_lock2 = bucket_lock(tbl, new_hash + new_tbl->size);
-               new_bucket_lock = bucket_lock(new_tbl, new_hash);
-
-               spin_lock_bh(old_bucket_lock1);
-
-               /* Depending on the lock per buckets mapping, the bucket in
-                * the lower and upper region may map to the same lock.
-                */
-               if (old_bucket_lock1 != old_bucket_lock2) {
-                       spin_lock_bh_nested(old_bucket_lock2, RHT_LOCK_NESTED);
-                       spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED2);
-               } else {
-                       spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
-               }
+               lock_buckets(new_tbl, tbl, new_hash);
  
                 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
                                    tbl->buckets[new_hash]);
+               ASSERT_BUCKET_LOCK(ht, tbl, new_hash + new_tbl->size);
                 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
                                    tbl->buckets[new_hash + new_tbl->size]);
  
-               spin_unlock_bh(new_bucket_lock);
-               if (old_bucket_lock1 != old_bucket_lock2)
-                       spin_unlock_bh(old_bucket_lock2);
-               spin_unlock_bh(old_bucket_lock1);
+               unlock_buckets(new_tbl, tbl, new_hash);
         }
  
         /* Publish the new, valid hash table */
@@ -484,16 +516,24 @@ static void rht_deferred_worker(struct work_struct *work)
  {
         struct rhashtable *ht;
         struct bucket_table *tbl;
+       struct rhashtable_walker *walker;
  
         ht = container_of(work, struct rhashtable, run_work);
         mutex_lock(&ht->mutex);
+       if (ht->being_destroyed)
+               goto unlock;
+
         tbl = rht_dereference(ht->tbl, ht);
  
+       list_for_each_entry(walker, &ht->walkers, list)
+               walker->resize = true;
+
         if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
                 rhashtable_expand(ht);
         else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size))
                 rhashtable_shrink(ht);
  
+unlock:
         mutex_unlock(&ht->mutex);
  }
  
@@ -513,8 +553,12 @@ static void rhashtable_wakeup_worker(struct rhashtable *ht)
  static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
                                 struct bucket_table *tbl, u32 hash)
  {
-       struct rhash_head *head = rht_dereference_bucket(tbl->buckets[hash],
-                                                        tbl, hash);
+       struct rhash_head *head;
+
+       hash = rht_bucket_index(tbl, hash);
+       head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+
+       ASSERT_BUCKET_LOCK(ht, tbl, hash);
  
         if (rht_is_a_nulls(head))
                 INIT_RHT_NULLS_HEAD(obj->next, ht, hash);
@@ -545,19 +589,18 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
   */
  void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
  {
-       struct bucket_table *tbl;
-       spinlock_t *lock;
+       struct bucket_table *tbl, *old_tbl;
         unsigned hash;
  
         rcu_read_lock();
  
         tbl = rht_dereference_rcu(ht->future_tbl, ht);
-       hash = head_hashfn(ht, tbl, obj);
-       lock = bucket_lock(tbl, hash);
+       old_tbl = rht_dereference_rcu(ht->tbl, ht);
+       hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
  
-       spin_lock_bh(lock);
+       lock_buckets(tbl, old_tbl, hash);
         __rhashtable_insert(ht, obj, tbl, hash);
-       spin_unlock_bh(lock);
+       unlock_buckets(tbl, old_tbl, hash);
  
         rcu_read_unlock();
  }
@@ -580,21 +623,20 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
   */
  bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
  {
-       struct bucket_table *tbl;
+       struct bucket_table *tbl, *new_tbl, *old_tbl;
         struct rhash_head __rcu **pprev;
-       struct rhash_head *he;
-       spinlock_t *lock;
-       unsigned int hash;
+       struct rhash_head *he, *he2;
+       unsigned int hash, new_hash;
         bool ret = false;
  
         rcu_read_lock();
-       tbl = rht_dereference_rcu(ht->tbl, ht);
-       hash = head_hashfn(ht, tbl, obj);
-
-       lock = bucket_lock(tbl, hash);
-       spin_lock_bh(lock);
+       old_tbl = rht_dereference_rcu(ht->tbl, ht);
+       tbl = new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
+       new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
  
+       lock_buckets(new_tbl, old_tbl, new_hash);
  restart:
+       hash = rht_bucket_index(tbl, new_hash);
         pprev = &tbl->buckets[hash];
         rht_for_each(he, tbl, hash) {
                 if (he != obj) {
@@ -602,8 +644,26 @@ restart:
                         continue;
                 }
  
-               rcu_assign_pointer(*pprev, obj->next);
+               ASSERT_BUCKET_LOCK(ht, tbl, hash);
+
+               if (old_tbl->size > new_tbl->size && tbl == old_tbl &&
+                   !rht_is_a_nulls(obj->next) &&
+                   head_hashfn(ht, tbl, obj->next) != hash) {
+                       rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash));
+               } else if (unlikely(old_tbl->size < new_tbl->size && tbl == new_tbl)) {
+                       rht_for_each_continue(he2, obj->next, tbl, hash) {
+                               if (head_hashfn(ht, tbl, he2) == hash) {
+                                       rcu_assign_pointer(*pprev, he2);
+                                       goto found;
+                               }
+                       }
+
+                       rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash));
+               } else {
+                       rcu_assign_pointer(*pprev, obj->next);
+               }
  
+found:
                 ret = true;
                 break;
         }
@@ -613,18 +673,12 @@ restart:
          * resizing. Thus traversing both is fine and the added cost is
          * very rare.
          */
-       if (tbl != rht_dereference_rcu(ht->future_tbl, ht)) {
-               spin_unlock_bh(lock);
-
-               tbl = rht_dereference_rcu(ht->future_tbl, ht);
-               hash = head_hashfn(ht, tbl, obj);
-
-               lock = bucket_lock(tbl, hash);
-               spin_lock_bh(lock);
+       if (tbl != old_tbl) {
+               tbl = old_tbl;
                 goto restart;
         }
  
-       spin_unlock_bh(lock);
+       unlock_buckets(new_tbl, old_tbl, new_hash);
  
         if (ret) {
                 atomic_dec(&ht->nelems);
@@ -780,24 +834,17 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
                                       void *arg)
  {
         struct bucket_table *new_tbl, *old_tbl;
-       spinlock_t *new_bucket_lock, *old_bucket_lock;
-       u32 new_hash, old_hash;
+       u32 new_hash;
         bool success = true;
  
         BUG_ON(!ht->p.key_len);
  
         rcu_read_lock();
-
         old_tbl = rht_dereference_rcu(ht->tbl, ht);
-       old_hash = head_hashfn(ht, old_tbl, obj);
-       old_bucket_lock = bucket_lock(old_tbl, old_hash);
-       spin_lock_bh(old_bucket_lock);
-
         new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
-       new_hash = head_hashfn(ht, new_tbl, obj);
-       new_bucket_lock = bucket_lock(new_tbl, new_hash);
-       if (unlikely(old_tbl != new_tbl))
-               spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
+       new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
+
+       lock_buckets(new_tbl, old_tbl, new_hash);
  
         if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
                                       compare, arg)) {
@@ -808,16 +855,171 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
         __rhashtable_insert(ht, obj, new_tbl, new_hash);
  
  exit:
-       if (unlikely(old_tbl != new_tbl))
-               spin_unlock_bh(new_bucket_lock);
-       spin_unlock_bh(old_bucket_lock);
-
+       unlock_buckets(new_tbl, old_tbl, new_hash);
         rcu_read_unlock();
  
         return success;
  }
  EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert);
  
+/**
+ * rhashtable_walk_init - Initialise an iterator
+ * @ht:                Table to walk over
+ * @iter:      Hash table Iterator
+ *
+ * This function prepares a hash table walk.
+ *
+ * Note that if you restart a walk after rhashtable_walk_stop you
+ * may see the same object twice.  Also, you may miss objects if
+ * there are removals in between rhashtable_walk_stop and the next
+ * call to rhashtable_walk_start.
+ *
+ * For a completely stable walk you should construct your own data
+ * structure outside the hash table.
+ *
+ * This function may sleep so you must not call it from interrupt
+ * context or with spin locks held.
+ *
+ * You must call rhashtable_walk_exit if this function returns
+ * successfully.
+ */
+int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
+{
+       iter->ht = ht;
+       iter->p = NULL;
+       iter->slot = 0;
+       iter->skip = 0;
+
+       iter->walker = kmalloc(sizeof(*iter->walker), GFP_KERNEL);
+       if (!iter->walker)
+               return -ENOMEM;
+
+       mutex_lock(&ht->mutex);
+       list_add(&iter->walker->list, &ht->walkers);
+       mutex_unlock(&ht->mutex);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_init);
+
+/**
+ * rhashtable_walk_exit - Free an iterator
+ * @iter:      Hash table Iterator
+ *
+ * This function frees resources allocated by rhashtable_walk_init.
+ */
+void rhashtable_walk_exit(struct rhashtable_iter *iter)
+{
+       mutex_lock(&iter->ht->mutex);
+       list_del(&iter->walker->list);
+       mutex_unlock(&iter->ht->mutex);
+       kfree(iter->walker);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
+
+/**
+ * rhashtable_walk_start - Start a hash table walk
+ * @iter:      Hash table iterator
+ *
+ * Start a hash table walk.  Note that we take the RCU lock in all
+ * cases including when we return an error.  So you must always call
+ * rhashtable_walk_stop to clean up.
+ *
+ * Returns zero if successful.
+ *
+ * Returns -EAGAIN if resize event occured.  Note that the iterator
+ * will rewind back to the beginning and you may use it immediately
+ * by calling rhashtable_walk_next.
+ */
+int rhashtable_walk_start(struct rhashtable_iter *iter)
+{
+       rcu_read_lock();
+
+       if (iter->walker->resize) {
+               iter->slot = 0;
+               iter->skip = 0;
+               iter->walker->resize = false;
+               return -EAGAIN;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_start);
+
+/**
+ * rhashtable_walk_next - Return the next object and advance the iterator
+ * @iter:      Hash table iterator
+ *
+ * Note that you must call rhashtable_walk_stop when you are finished
+ * with the walk.
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occured.  Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_next(struct rhashtable_iter *iter)
+{
+       const struct bucket_table *tbl;
+       struct rhashtable *ht = iter->ht;
+       struct rhash_head *p = iter->p;
+       void *obj = NULL;
+
+       tbl = rht_dereference_rcu(ht->tbl, ht);
+
+       if (p) {
+               p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
+               goto next;
+       }
+
+       for (; iter->slot < tbl->size; iter->slot++) {
+               int skip = iter->skip;
+
+               rht_for_each_rcu(p, tbl, iter->slot) {
+                       if (!skip)
+                               break;
+                       skip--;
+               }
+
+next:
+               if (!rht_is_a_nulls(p)) {
+                       iter->skip++;
+                       iter->p = p;
+                       obj = rht_obj(ht, p);
+                       goto out;
+               }
+
+               iter->skip = 0;
+       }
+
+       iter->p = NULL;
+
+out:
+       if (iter->walker->resize) {
+               iter->p = NULL;
+               iter->slot = 0;
+               iter->skip = 0;
+               iter->walker->resize = false;
+               return ERR_PTR(-EAGAIN);
+       }
+
+       return obj;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_next);
+
+/**
+ * rhashtable_walk_stop - Finish a hash table walk
+ * @iter:      Hash table iterator
+ *
+ * Finish a hash table walk.
+ */
+void rhashtable_walk_stop(struct rhashtable_iter *iter)
+{
+       rcu_read_unlock();
+       iter->p = NULL;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
+
  static size_t rounded_hashtable_size(struct rhashtable_params *params)
  {
         return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
@@ -890,6 +1092,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
         memset(ht, 0, sizeof(*ht));
         mutex_init(&ht->mutex);
         memcpy(&ht->p, params, sizeof(*params));
+       INIT_LIST_HEAD(&ht->walkers);
  
         if (params->locks_mul)
                 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);