OSDN Git Service

dm: do not allocate any mempools for blk-mq request-based DM
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / drivers / md / dm.c
index f8c7ca3..916f601 100644 (file)
@@ -990,57 +990,6 @@ static void clone_endio(struct bio *bio, int error)
        dec_pending(io, error);
 }
 
-/*
- * Partial completion handling for request-based dm
- */
-static void end_clone_bio(struct bio *clone, int error)
-{
-       struct dm_rq_clone_bio_info *info =
-               container_of(clone, struct dm_rq_clone_bio_info, clone);
-       struct dm_rq_target_io *tio = info->tio;
-       struct bio *bio = info->orig;
-       unsigned int nr_bytes = info->orig->bi_iter.bi_size;
-
-       bio_put(clone);
-
-       if (tio->error)
-               /*
-                * An error has already been detected on the request.
-                * Once error occurred, just let clone->end_io() handle
-                * the remainder.
-                */
-               return;
-       else if (error) {
-               /*
-                * Don't notice the error to the upper layer yet.
-                * The error handling decision is made by the target driver,
-                * when the request is completed.
-                */
-               tio->error = error;
-               return;
-       }
-
-       /*
-        * I/O for the bio successfully completed.
-        * Notice the data completion to the upper layer.
-        */
-
-       /*
-        * bios are processed from the head of the list.
-        * So the completing bio should always be rq->bio.
-        * If it's not, something wrong is happening.
-        */
-       if (tio->orig->bio != bio)
-               DMERR("bio completion is going in the middle of the request");
-
-       /*
-        * Update the original request.
-        * Do not use blk_end_request() here, because it may complete
-        * the original request before the clone, and break the ordering.
-        */
-       blk_update_request(tio->orig, 0, nr_bytes);
-}
-
 static struct dm_rq_target_io *tio_from_request(struct request *rq)
 {
        return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
@@ -1087,13 +1036,17 @@ static void free_rq_clone(struct request *clone)
        struct dm_rq_target_io *tio = clone->end_io_data;
        struct mapped_device *md = tio->md;
 
-       blk_rq_unprep_clone(clone);
-
-       if (clone->q->mq_ops)
+       if (md->type == DM_TYPE_MQ_REQUEST_BASED)
+               /* stacked on blk-mq queue(s) */
                tio->ti->type->release_clone_rq(clone);
        else if (!md->queue->mq_ops)
                /* request_fn queue stacked on request_fn queue(s) */
                free_clone_request(md, clone);
+       /*
+        * NOTE: for the blk-mq queue stacked on request_fn queue(s) case:
+        * no need to call free_clone_request() because we leverage blk-mq by
+        * allocating the clone at the end of the blk-mq pdu (see: clone_rq)
+        */
 
        if (!md->queue->mq_ops)
                free_rq_tio(tio);
@@ -1156,6 +1109,7 @@ static void old_requeue_request(struct request *rq)
 
        spin_lock_irqsave(q->queue_lock, flags);
        blk_requeue_request(q, rq);
+       blk_run_queue_async(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -1716,8 +1670,7 @@ static int dm_merge_bvec(struct request_queue *q,
        struct mapped_device *md = q->queuedata;
        struct dm_table *map = dm_get_live_table_fast(md);
        struct dm_target *ti;
-       sector_t max_sectors;
-       int max_size = 0;
+       sector_t max_sectors, max_size = 0;
 
        if (unlikely(!map))
                goto out;
@@ -1732,8 +1685,16 @@ static int dm_merge_bvec(struct request_queue *q,
        max_sectors = min(max_io_len(bvm->bi_sector, ti),
                          (sector_t) queue_max_sectors(q));
        max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
-       if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */
-               max_size = 0;
+
+       /*
+        * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t
+        * to the targets' merge function since it holds sectors not bytes).
+        * Just doing this as an interim fix for stable@ because the more
+        * comprehensive cleanup of switching to sector_t will impact every
+        * DM target that implements a ->merge hook.
+        */
+       if (max_size > INT_MAX)
+               max_size = INT_MAX;
 
        /*
         * merge_bvec_fn() returns number of bytes
@@ -1741,7 +1702,7 @@ static int dm_merge_bvec(struct request_queue *q,
         * max is precomputed maximal io size
         */
        if (max_size && ti->type->merge)
-               max_size = ti->type->merge(ti, bvm, biovec, max_size);
+               max_size = ti->type->merge(ti, bvm, biovec, (int) max_size);
        /*
         * If the target doesn't support merge method and some of the devices
         * provided their merge_bvec method (we know this by looking for the
@@ -1813,39 +1774,13 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
                dm_complete_request(rq, r);
 }
 
-static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
-                                void *data)
-{
-       struct dm_rq_target_io *tio = data;
-       struct dm_rq_clone_bio_info *info =
-               container_of(bio, struct dm_rq_clone_bio_info, clone);
-
-       info->orig = bio_orig;
-       info->tio = tio;
-       bio->bi_end_io = end_clone_bio;
-
-       return 0;
-}
-
-static int setup_clone(struct request *clone, struct request *rq,
-                      struct dm_rq_target_io *tio, gfp_t gfp_mask)
+static void setup_clone(struct request *clone, struct request *rq,
+                       struct dm_rq_target_io *tio)
 {
-       int r;
-
-       r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
-                             dm_rq_bio_constructor, tio);
-       if (r)
-               return r;
-
-       clone->cmd = rq->cmd;
-       clone->cmd_len = rq->cmd_len;
-       clone->sense = rq->sense;
+       blk_rq_prep_clone(clone, rq);
        clone->end_io = end_clone_request;
        clone->end_io_data = tio;
-
        tio->clone = clone;
-
-       return 0;
 }
 
 static struct request *clone_rq(struct request *rq, struct mapped_device *md,
@@ -1866,12 +1801,7 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
                clone = tio->clone;
 
        blk_rq_init(NULL, clone);
-       if (setup_clone(clone, rq, tio, gfp_mask)) {
-               /* -ENOMEM */
-               if (alloc_clone)
-                       free_clone_request(md, clone);
-               return NULL;
-       }
+       setup_clone(clone, rq, tio);
 
        return clone;
 }
@@ -1963,13 +1893,9 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
                        dm_kill_unmapped_request(rq, r);
                        return r;
                }
-               if (IS_ERR(clone))
-                       return DM_MAPIO_REQUEUE;
-               if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
-                       /* -ENOMEM */
-                       ti->type->release_clone_rq(clone);
-                       return DM_MAPIO_REQUEUE;
-               }
+               if (r != DM_MAPIO_REMAPPED)
+                       return r;
+               setup_clone(clone, rq, tio);
        }
 
        switch (r) {
@@ -2397,30 +2323,42 @@ static void free_dev(struct mapped_device *md)
        kfree(md);
 }
 
+static unsigned filter_md_type(unsigned type, struct mapped_device *md)
+{
+       if (type == DM_TYPE_BIO_BASED)
+               return type;
+
+       return !md->use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED;
+}
+
 static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
 {
        struct dm_md_mempools *p = dm_table_get_md_mempools(t);
 
-       if (md->bs) {
-               /* The md already has necessary mempools. */
-               if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) {
+       switch (filter_md_type(dm_table_get_type(t), md)) {
+       case DM_TYPE_BIO_BASED:
+               if (md->bs && md->io_pool) {
                        /*
+                        * This bio-based md already has necessary mempools.
                         * Reload bioset because front_pad may have changed
                         * because a different table was loaded.
                         */
                        bioset_free(md->bs);
                        md->bs = p->bs;
                        p->bs = NULL;
+                       goto out;
                }
-               /*
-                * There's no need to reload with request-based dm
-                * because the size of front_pad doesn't change.
-                * Note for future: If you are to reload bioset,
-                * prep-ed requests in the queue may refer
-                * to bio from the old bioset, so you must walk
-                * through the queue to unprep.
-                */
-               goto out;
+               break;
+       case DM_TYPE_REQUEST_BASED:
+               if (md->rq_pool && md->io_pool)
+                       /*
+                        * This request-based md already has necessary mempools.
+                        */
+                       goto out;
+               break;
+       case DM_TYPE_MQ_REQUEST_BASED:
+               BUG_ON(p); /* No mempools needed */
+               return;
        }
 
        BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
@@ -2431,7 +2369,6 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
        p->rq_pool = NULL;
        md->bs = p->bs;
        p->bs = NULL;
-
 out:
        /* mempool bind completed, no longer need any mempools in the table */
        dm_table_free_md_mempools(t);
@@ -2662,9 +2599,6 @@ static int dm_init_request_based_queue(struct mapped_device *md)
 {
        struct request_queue *q = NULL;
 
-       if (md->queue->elevator)
-               return 0;
-
        /* Fully initialize the queue */
        q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
        if (!q)
@@ -2748,13 +2682,15 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
        if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
                /* clone request is allocated at the end of the pdu */
                tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
-               if (!clone_rq(rq, md, tio, GFP_ATOMIC))
-                       return BLK_MQ_RQ_QUEUE_BUSY;
+               (void) clone_rq(rq, md, tio, GFP_ATOMIC);
                queue_kthread_work(&md->kworker, &tio->work);
        } else {
                /* Direct call is fine since .queue_rq allows allocations */
-               if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
-                       dm_requeue_unmapped_original_request(md, rq);
+               if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
+                       /* Undo dm_start_request() before requeuing */
+                       rq_completed(md, rq_data_dir(rq), false);
+                       return BLK_MQ_RQ_QUEUE_BUSY;
+               }
        }
 
        return BLK_MQ_RQ_QUEUE_OK;
@@ -2811,14 +2747,6 @@ out_tag_set:
        return err;
 }
 
-static unsigned filter_md_type(unsigned type, struct mapped_device *md)
-{
-       if (type == DM_TYPE_BIO_BASED)
-               return type;
-
-       return !md->use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED;
-}
-
 /*
  * Setup the DM device's queue based on md's type
  */
@@ -3531,48 +3459,23 @@ int dm_noflush_suspending(struct dm_target *ti)
 }
 EXPORT_SYMBOL_GPL(dm_noflush_suspending);
 
-struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
-                                           unsigned integrity, unsigned per_bio_data_size)
+struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity,
+                                            unsigned per_bio_data_size)
 {
-       struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
-       struct kmem_cache *cachep = NULL;
-       unsigned int pool_size = 0;
+       struct dm_md_mempools *pools;
+       unsigned int pool_size = dm_get_reserved_bio_based_ios();
        unsigned int front_pad;
 
+       pools = kzalloc(sizeof(*pools), GFP_KERNEL);
        if (!pools)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
-       type = filter_md_type(type, md);
+       front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) +
+               offsetof(struct dm_target_io, clone);
 
-       switch (type) {
-       case DM_TYPE_BIO_BASED:
-               cachep = _io_cache;
-               pool_size = dm_get_reserved_bio_based_ios();
-               front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
-               break;
-       case DM_TYPE_REQUEST_BASED:
-               cachep = _rq_tio_cache;
-               pool_size = dm_get_reserved_rq_based_ios();
-               pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
-               if (!pools->rq_pool)
-                       goto out;
-               /* fall through to setup remaining rq-based pools */
-       case DM_TYPE_MQ_REQUEST_BASED:
-               if (!pool_size)
-                       pool_size = dm_get_reserved_rq_based_ios();
-               front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
-               /* per_bio_data_size is not used. See __bind_mempools(). */
-               WARN_ON(per_bio_data_size != 0);
-               break;
-       default:
-               BUG();
-       }
-
-       if (cachep) {
-               pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
-               if (!pools->io_pool)
-                       goto out;
-       }
+       pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache);
+       if (!pools->io_pool)
+               goto out;
 
        pools->bs = bioset_create_nobvec(pool_size, front_pad);
        if (!pools->bs)
@@ -3582,11 +3485,37 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
                goto out;
 
        return pools;
-
 out:
        dm_free_md_mempools(pools);
+       return ERR_PTR(-ENOMEM);
+}
 
-       return NULL;
+struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md,
+                                           unsigned type)
+{
+       unsigned int pool_size;
+       struct dm_md_mempools *pools;
+
+       if (filter_md_type(type, md) == DM_TYPE_MQ_REQUEST_BASED)
+               return NULL; /* No mempools needed */
+
+       pool_size = dm_get_reserved_rq_based_ios();
+       pools = kzalloc(sizeof(*pools), GFP_KERNEL);
+       if (!pools)
+               return ERR_PTR(-ENOMEM);
+
+       pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
+       if (!pools->rq_pool)
+               goto out;
+
+       pools->io_pool = mempool_create_slab_pool(pool_size, _rq_tio_cache);
+       if (!pools->io_pool)
+               goto out;
+
+       return pools;
+out:
+       dm_free_md_mempools(pools);
+       return ERR_PTR(-ENOMEM);
 }
 
 void dm_free_md_mempools(struct dm_md_mempools *pools)