OSDN Git Service

ceph: do not execute direct write in parallel if O_APPEND is specified
authorXiubo Li <xiubli@redhat.com>
Tue, 4 Feb 2020 02:28:25 +0000 (21:28 -0500)
committerIlya Dryomov <idryomov@gmail.com>
Tue, 11 Feb 2020 16:04:04 +0000 (17:04 +0100)
In O_APPEND & O_DIRECT mode, the data from different writers will
be possibly overlapping each other since they take the shared lock.

For example, both Writer1 and Writer2 are in O_APPEND and O_DIRECT
mode:

          Writer1                         Writer2

     shared_lock()                   shared_lock()
     getattr(CAP_SIZE)               getattr(CAP_SIZE)
     iocb->ki_pos = EOF              iocb->ki_pos = EOF
     write(data1)
                                     write(data2)
     shared_unlock()                 shared_unlock()

The data2 will overlap the data1 from the same file offset, the
old EOF.

Switch to exclusive lock instead when O_APPEND is specified.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/ceph/file.c

index c3b8e8e..7e0190b 100644 (file)
@@ -1418,6 +1418,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct ceph_cap_flush *prealloc_cf;
        ssize_t count, written = 0;
        int err, want, got;
+       bool direct_lock = false;
        loff_t pos;
        loff_t limit = max(i_size_read(inode), fsc->max_file_size);
 
@@ -1428,8 +1429,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
        if (!prealloc_cf)
                return -ENOMEM;
 
+       if ((iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND)) == IOCB_DIRECT)
+               direct_lock = true;
+
 retry_snap:
-       if (iocb->ki_flags & IOCB_DIRECT)
+       if (direct_lock)
                ceph_start_io_direct(inode);
        else
                ceph_start_io_write(inode);
@@ -1519,14 +1523,15 @@ retry_snap:
 
                /* we might need to revert back to that point */
                data = *from;
-               if (iocb->ki_flags & IOCB_DIRECT) {
+               if (iocb->ki_flags & IOCB_DIRECT)
                        written = ceph_direct_read_write(iocb, &data, snapc,
                                                         &prealloc_cf);
-                       ceph_end_io_direct(inode);
-               } else {
+               else
                        written = ceph_sync_write(iocb, &data, pos, snapc);
+               if (direct_lock)
+                       ceph_end_io_direct(inode);
+               else
                        ceph_end_io_write(inode);
-               }
                if (written > 0)
                        iov_iter_advance(from, written);
                ceph_put_snap_context(snapc);
@@ -1577,7 +1582,7 @@ retry_snap:
 
        goto out_unlocked;
 out:
-       if (iocb->ki_flags & IOCB_DIRECT)
+       if (direct_lock)
                ceph_end_io_direct(inode);
        else
                ceph_end_io_write(inode);