OSDN Git Service

ovl: implement async IO routines
authorJiufei Xue <jiufei.xue@linux.alibaba.com>
Wed, 20 Nov 2019 09:45:26 +0000 (17:45 +0800)
committerMiklos Szeredi <mszeredi@redhat.com>
Fri, 24 Jan 2020 08:46:46 +0000 (09:46 +0100)
A performance regression was observed since linux v4.19 with aio test using
fio with iodepth 128 on overlayfs.  The queue depth of the device was
always 1 which is unexpected.

After investigation, it was found that commit 16914e6fc7e1 ("ovl: add
ovl_read_iter()") and commit 2a92e07edc5e ("ovl: add ovl_write_iter()")
resulted in vfs_iter_{read,write} being called on underlying filesystem,
which always results in syncronous IO.

Implement async IO for stacked reading and writing.  This resolves the
performance regresion.

This is implemented by allocating a new kiocb for submitting the AIO
request on the underlying filesystem.  When the request is completed, the
new kiocb is freed and the completion callback is called on the original
iocb.

Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
fs/overlayfs/file.c
fs/overlayfs/overlayfs.h
fs/overlayfs/super.c

index 859efea..4d1796c 100644 (file)
 #include <linux/uaccess.h>
 #include "overlayfs.h"
 
+struct ovl_aio_req {
+       struct kiocb iocb;
+       struct kiocb *orig_iocb;
+       struct fd fd;
+};
+
+static struct kmem_cache *ovl_aio_request_cachep;
+
 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
 {
        if (realinode != ovl_inode_upper(inode))
@@ -225,6 +233,33 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
        return flags;
 }
 
+static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
+{
+       struct kiocb *iocb = &aio_req->iocb;
+       struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+       if (iocb->ki_flags & IOCB_WRITE) {
+               struct inode *inode = file_inode(orig_iocb->ki_filp);
+
+               file_end_write(iocb->ki_filp);
+               ovl_copyattr(ovl_inode_real(inode), inode);
+       }
+
+       orig_iocb->ki_pos = iocb->ki_pos;
+       fdput(aio_req->fd);
+       kmem_cache_free(ovl_aio_request_cachep, aio_req);
+}
+
+static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+{
+       struct ovl_aio_req *aio_req = container_of(iocb,
+                                                  struct ovl_aio_req, iocb);
+       struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+       ovl_aio_cleanup_handler(aio_req);
+       orig_iocb->ki_complete(orig_iocb, res, res2);
+}
+
 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
        struct file *file = iocb->ki_filp;
@@ -240,10 +275,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
                return ret;
 
        old_cred = ovl_override_creds(file_inode(file)->i_sb);
-       ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
-                           ovl_iocb_to_rwf(iocb));
+       if (is_sync_kiocb(iocb)) {
+               ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
+                                   ovl_iocb_to_rwf(iocb));
+       } else {
+               struct ovl_aio_req *aio_req;
+
+               ret = -ENOMEM;
+               aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
+               if (!aio_req)
+                       goto out;
+
+               aio_req->fd = real;
+               real.flags = 0;
+               aio_req->orig_iocb = iocb;
+               kiocb_clone(&aio_req->iocb, iocb, real.file);
+               aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+               ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
+               if (ret != -EIOCBQUEUED)
+                       ovl_aio_cleanup_handler(aio_req);
+       }
+out:
        revert_creds(old_cred);
-
        ovl_file_accessed(file);
 
        fdput(real);
@@ -274,15 +327,33 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
                goto out_unlock;
 
        old_cred = ovl_override_creds(file_inode(file)->i_sb);
-       file_start_write(real.file);
-       ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
-                            ovl_iocb_to_rwf(iocb));
-       file_end_write(real.file);
+       if (is_sync_kiocb(iocb)) {
+               file_start_write(real.file);
+               ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
+                                    ovl_iocb_to_rwf(iocb));
+               file_end_write(real.file);
+               /* Update size */
+               ovl_copyattr(ovl_inode_real(inode), inode);
+       } else {
+               struct ovl_aio_req *aio_req;
+
+               ret = -ENOMEM;
+               aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
+               if (!aio_req)
+                       goto out;
+
+               file_start_write(real.file);
+               aio_req->fd = real;
+               real.flags = 0;
+               aio_req->orig_iocb = iocb;
+               kiocb_clone(&aio_req->iocb, iocb, real.file);
+               aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+               ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
+               if (ret != -EIOCBQUEUED)
+                       ovl_aio_cleanup_handler(aio_req);
+       }
+out:
        revert_creds(old_cred);
-
-       /* Update size */
-       ovl_copyattr(ovl_inode_real(inode), inode);
-
        fdput(real);
 
 out_unlock:
@@ -651,3 +722,19 @@ const struct file_operations ovl_file_operations = {
        .copy_file_range        = ovl_copy_file_range,
        .remap_file_range       = ovl_remap_file_range,
 };
+
+int __init ovl_aio_request_cache_init(void)
+{
+       ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
+                                                  sizeof(struct ovl_aio_req),
+                                                  0, SLAB_HWCACHE_ALIGN, NULL);
+       if (!ovl_aio_request_cachep)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void ovl_aio_request_cache_destroy(void)
+{
+       kmem_cache_destroy(ovl_aio_request_cachep);
+}
index dabfa0d..3623d28 100644 (file)
@@ -450,6 +450,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
 
 /* file.c */
 extern const struct file_operations ovl_file_operations;
+int __init ovl_aio_request_cache_init(void);
+void ovl_aio_request_cache_destroy(void);
 
 /* copy_up.c */
 int ovl_copy_up(struct dentry *dentry);
index c795b74..319fe0d 100644 (file)
@@ -1770,9 +1770,15 @@ static int __init ovl_init(void)
        if (ovl_inode_cachep == NULL)
                return -ENOMEM;
 
-       err = register_filesystem(&ovl_fs_type);
-       if (err)
-               kmem_cache_destroy(ovl_inode_cachep);
+       err = ovl_aio_request_cache_init();
+       if (!err) {
+               err = register_filesystem(&ovl_fs_type);
+               if (!err)
+                       return 0;
+
+               ovl_aio_request_cache_destroy();
+       }
+       kmem_cache_destroy(ovl_inode_cachep);
 
        return err;
 }
@@ -1787,7 +1793,7 @@ static void __exit ovl_exit(void)
         */
        rcu_barrier();
        kmem_cache_destroy(ovl_inode_cachep);
-
+       ovl_aio_request_cache_destroy();
 }
 
 module_init(ovl_init);