OSDN Git Service

RDMA/mlx5: Allow MRs to be created in the cache synchronously
authorJason Gunthorpe <jgg@mellanox.com>
Tue, 10 Mar 2020 08:22:38 +0000 (10:22 +0200)
committerJason Gunthorpe <jgg@mellanox.com>
Fri, 13 Mar 2020 14:08:02 +0000 (11:08 -0300)
If the cache is completely out of MRs, and we are running in cache mode,
then directly, and synchronously, create an MR that is compatible with the
cache bucket using a sleeping mailbox command. This ensures that the
thread that is waiting for the MR absolutely will get one.

When a MR allocated in this way becomes freed then it is compatible with
the cache bucket and will be recycled back into it.

Deletes the very buggy ent->compl scheme to create a synchronous MR
allocation.

Link: https://lore.kernel.org/r/20200310082238.239865-13-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c

index 1216575..a5da2d5 100644 (file)
@@ -722,7 +722,6 @@ struct mlx5_cache_ent {
        struct mlx5_ib_dev     *dev;
        struct work_struct      work;
        struct delayed_work     dwork;
-       struct completion       compl;
 };
 
 struct mlx5_mr_cache {
index afacaf8..a401931 100644 (file)
@@ -139,14 +139,34 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
        queue_adjust_cache_locked(ent);
        ent->pending--;
        spin_unlock_irqrestore(&ent->lock, flags);
+}
+
+static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
+{
+       struct mlx5_ib_mr *mr;
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return NULL;
+       mr->order = ent->order;
+       mr->cache_ent = ent;
+       mr->dev = ent->dev;
+
+       MLX5_SET(mkc, mkc, free, 1);
+       MLX5_SET(mkc, mkc, umr_en, 1);
+       MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
+       MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
 
-       if (!completion_done(&ent->compl))
-               complete(&ent->compl);
+       MLX5_SET(mkc, mkc, qpn, 0xffffff);
+       MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
+       MLX5_SET(mkc, mkc, log_page_size, ent->page);
+       return mr;
 }
 
+/* Asynchronously schedule new MRs to be populated in the cache. */
 static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
 {
-       int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+       size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        struct mlx5_ib_mr *mr;
        void *mkc;
        u32 *in;
@@ -159,25 +179,11 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
 
        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
        for (i = 0; i < num; i++) {
-               mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+               mr = alloc_cache_mr(ent, mkc);
                if (!mr) {
                        err = -ENOMEM;
                        break;
                }
-               mr->order = ent->order;
-               mr->cache_ent = ent;
-               mr->dev = ent->dev;
-
-               MLX5_SET(mkc, mkc, free, 1);
-               MLX5_SET(mkc, mkc, umr_en, 1);
-               MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
-               MLX5_SET(mkc, mkc, access_mode_4_2,
-                        (ent->access_mode >> 2) & 0x7);
-
-               MLX5_SET(mkc, mkc, qpn, 0xffffff);
-               MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
-               MLX5_SET(mkc, mkc, log_page_size, ent->page);
-
                spin_lock_irq(&ent->lock);
                if (ent->pending >= MAX_PENDING_REG_MR) {
                        err = -EAGAIN;
@@ -205,6 +211,44 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
        return err;
 }
 
+/* Synchronously create a MR in the cache */
+static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
+{
+       size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+       struct mlx5_ib_mr *mr;
+       void *mkc;
+       u32 *in;
+       int err;
+
+       in = kzalloc(inlen, GFP_KERNEL);
+       if (!in)
+               return ERR_PTR(-ENOMEM);
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+       mr = alloc_cache_mr(ent, mkc);
+       if (!mr) {
+               err = -ENOMEM;
+               goto free_in;
+       }
+
+       err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen);
+       if (err)
+               goto free_mr;
+
+       mr->mmkey.type = MLX5_MKEY_MR;
+       WRITE_ONCE(ent->dev->cache.last_add, jiffies);
+       spin_lock_irq(&ent->lock);
+       ent->total_mrs++;
+       spin_unlock_irq(&ent->lock);
+       kfree(in);
+       return mr;
+free_mr:
+       kfree(mr);
+free_in:
+       kfree(in);
+       return ERR_PTR(err);
+}
+
 static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
 {
        struct mlx5_ib_mr *mr;
@@ -427,12 +471,12 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
                if (ent->disabled)
                        goto out;
                if (err) {
-                       if (err == -EAGAIN) {
-                               mlx5_ib_dbg(dev, "returned eagain, order %d\n",
-                                           ent->order);
-                               queue_delayed_work(cache->wq, &ent->dwork,
-                                                  msecs_to_jiffies(3));
-                       } else {
+                       /*
+                        * EAGAIN only happens if pending is positive, so we
+                        * will be rescheduled from reg_mr_callback(). The only
+                        * failure path here is ENOMEM.
+                        */
+                       if (err != -EAGAIN) {
                                mlx5_ib_warn(
                                        dev,
                                        "command failed order %d, err %d\n",
@@ -495,36 +539,30 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
        struct mlx5_mr_cache *cache = &dev->cache;
        struct mlx5_cache_ent *ent;
        struct mlx5_ib_mr *mr;
-       int err;
 
        if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY ||
                    entry >= ARRAY_SIZE(cache->ent)))
                return ERR_PTR(-EINVAL);
 
        ent = &cache->ent[entry];
-       while (1) {
-               spin_lock_irq(&ent->lock);
-               if (list_empty(&ent->head)) {
-                       spin_unlock_irq(&ent->lock);
-
-                       err = add_keys(ent, 1);
-                       if (err && err != -EAGAIN)
-                               return ERR_PTR(err);
-
-                       wait_for_completion(&ent->compl);
-               } else {
-                       mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
-                                             list);
-                       list_del(&mr->list);
-                       ent->available_mrs--;
-                       queue_adjust_cache_locked(ent);
-                       spin_unlock_irq(&ent->lock);
+       spin_lock_irq(&ent->lock);
+       if (list_empty(&ent->head)) {
+               spin_unlock_irq(&ent->lock);
+               mr = create_cache_mr(ent);
+               if (IS_ERR(mr))
                        return mr;
-               }
+       } else {
+               mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+               list_del(&mr->list);
+               ent->available_mrs--;
+               queue_adjust_cache_locked(ent);
+               spin_unlock_irq(&ent->lock);
        }
+       return mr;
 }
 
-static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent)
+/* Return a MR already available in the cache */
+static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent)
 {
        struct mlx5_ib_dev *dev = req_ent->dev;
        struct mlx5_ib_mr *mr = NULL;
@@ -676,7 +714,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
                ent->dev = dev;
                ent->limit = 0;
 
-               init_completion(&ent->compl);
                INIT_WORK(&ent->work, cache_work_func);
                INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
 
@@ -939,26 +976,16 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
        struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order);
        struct mlx5_ib_mr *mr;
-       int err = 0;
-       int i;
 
        if (!ent)
                return ERR_PTR(-E2BIG);
-       for (i = 0; i < 1; i++) {
-               mr = alloc_cached_mr(ent);
-               if (mr)
-                       break;
-
-               err = add_keys(ent, 1);
-               if (err && err != -EAGAIN) {
-                       mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
-                       break;
-               }
+       mr = get_cache_mr(ent);
+       if (!mr) {
+               mr = create_cache_mr(ent);
+               if (IS_ERR(mr))
+                       return mr;
        }
 
-       if (!mr)
-               return ERR_PTR(-EAGAIN);
-
        mr->ibmr.pd = pd;
        mr->umem = umem;
        mr->access_flags = access_flags;