OSDN Git Service

Merge branch 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 15 Apr 2015 20:22:56 +0000 (13:22 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 15 Apr 2015 20:22:56 +0000 (13:22 -0700)
Pull second vfs update from Al Viro:
 "Now that net-next went in...  Here's the next big chunk - killing
  ->aio_read() and ->aio_write().

  There'll be one more pile today (direct_IO changes and
  generic_write_checks() cleanups/fixes), but I'd prefer to keep that
  one separate"

* 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (37 commits)
  ->aio_read and ->aio_write removed
  pcm: another weird API abuse
  infinibad: weird APIs switched to ->write_iter()
  kill do_sync_read/do_sync_write
  fuse: use iov_iter_get_pages() for non-splice path
  fuse: switch to ->read_iter/->write_iter
  switch drivers/char/mem.c to ->read_iter/->write_iter
  make new_sync_{read,write}() static
  coredump: accept any write method
  switch /dev/loop to vfs_iter_write()
  serial2002: switch to __vfs_read/__vfs_write
  ashmem: use __vfs_read()
  export __vfs_read()
  autofs: switch to __vfs_write()
  new helper: __vfs_write()
  switch hugetlbfs to ->read_iter()
  coda: switch to ->read_iter/->write_iter
  ncpfs: switch to ->read_iter/->write_iter
  net/9p: remove (now-)unused helpers
  p9_client_attach(): set fid->uid correctly
  ...

1  2 
drivers/staging/comedi/drivers/serial2002.c
drivers/staging/lustre/lustre/llite/file.c
fs/aio.c
fs/gfs2/file.c
fs/hugetlbfs/inode.c
fs/ocfs2/file.c
include/linux/fs.h

@@@ -39,12 -39,14 +39,12 @@@ Status: in developmen
  #include <linux/poll.h>
  
  struct serial2002_range_table_t {
 -
        /*  HACK... */
        int length;
        struct comedi_krange range;
  };
  
  struct serial2002_private {
 -
        int port;               /*  /dev/ttyS<port> */
        int speed;              /*  baudrate */
        struct file *tty;
@@@ -108,24 -110,16 +108,16 @@@ static int serial2002_tty_write(struct 
  {
        const char __user *p = (__force const char __user *)buf;
        int result;
+       loff_t offset = 0;
        mm_segment_t oldfs;
  
        oldfs = get_fs();
        set_fs(KERNEL_DS);
-       f->f_pos = 0;
-       result = f->f_op->write(f, p, count, &f->f_pos);
+       result = __vfs_write(f, p, count, &offset);
        set_fs(oldfs);
        return result;
  }
  
- static int serial2002_tty_readb(struct file *f, unsigned char *buf)
- {
-       char __user *p = (__force char __user *)buf;
-       f->f_pos = 0;
-       return f->f_op->read(f, p, 1, &f->f_pos);
- }
  static void serial2002_tty_read_poll_wait(struct file *f, int timeout)
  {
        struct poll_wqueues table;
                        break;
                }
                do_gettimeofday(&now);
 -              elapsed = (1000000 * (now.tv_sec - start.tv_sec) +
 -                        now.tv_usec - start.tv_usec);
 +              elapsed = 1000000 * (now.tv_sec - start.tv_sec) +
 +                        now.tv_usec - start.tv_usec;
                if (elapsed > timeout)
                        break;
                set_current_state(TASK_INTERRUPTIBLE);
@@@ -161,13 -155,15 +153,15 @@@ static int serial2002_tty_read(struct f
        result = -1;
        if (!IS_ERR(f)) {
                mm_segment_t oldfs;
+               char __user *p = (__force char __user *)&ch;
+               loff_t offset = 0;
  
                oldfs = get_fs();
                set_fs(KERNEL_DS);
                if (f->f_op->poll) {
                        serial2002_tty_read_poll_wait(f, timeout);
  
-                       if (serial2002_tty_readb(f, &ch) == 1)
+                       if (__vfs_read(f, p, 1, &offset) == 1)
                                result = ch;
                } else {
                        /* Device does not support poll, busy wait */
                                if (retries >= timeout)
                                        break;
  
-                               if (serial2002_tty_readb(f, &ch) == 1) {
+                               if (__vfs_read(f, p, 1, &offset) == 1) {
                                        result = ch;
                                        break;
                                }
@@@ -298,6 -294,7 +292,6 @@@ static struct serial_data serial2002_re
                }
        }
        return result;
 -
  }
  
  static void serial2002_write(struct file *f, struct serial_data data)
@@@ -161,7 -161,7 +161,7 @@@ static int ll_close_inode_openhandle(st
                op_data->op_lease_handle = och->och_lease_handle;
                op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
        }
 -      epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
 +      epoch_close = op_data->op_flags & MF_EPOCH_CLOSE;
        rc = md_close(md_exp, op_data, och->och_mod, &req);
        if (rc == -EAGAIN) {
                /* This close must have the epoch closed. */
        }
        if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
                struct mdt_body *body;
 +
                body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
                if (!(body->valid & OBD_MD_FLRELEASED))
                        rc = -EBUSY;
@@@ -270,7 -269,7 +270,7 @@@ static int ll_md_close(struct obd_expor
        int lockmode;
        __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
        struct lustre_handle lockh;
 -      ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
 +      ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN}};
        int rc = 0;
  
        /* clear group lock, if present */
@@@ -693,7 -692,7 +693,7 @@@ restart
  out_och_free:
        if (rc) {
                if (och_p && *och_p) {
 -                      OBD_FREE(*och_p, sizeof (struct obd_client_handle));
 +                      OBD_FREE(*och_p, sizeof(struct obd_client_handle));
                        *och_p = NULL; /* OBD_FREE writes some magic there */
                        (*och_usecount)--;
                }
@@@ -1712,12 -1711,6 +1712,12 @@@ static int ll_do_fiemap(struct inode *i
        fm_key.oa.o_oi = lsm->lsm_oi;
        fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
  
 +      if (i_size_read(inode) == 0) {
 +              rc = ll_glimpse_size(inode);
 +              if (rc)
 +                      goto out;
 +      }
 +
        obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
        obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
        /* If filesize is 0, then there would be no objects for mapping */
@@@ -2828,7 -2821,7 +2828,7 @@@ int ll_have_md_lock(struct inode *inode
        int i;
  
        if (!inode)
 -             return 0;
 +              return 0;
  
        fid = &ll_i2info(inode)->lli_fid;
        CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
@@@ -3135,9 -3128,7 +3135,7 @@@ int ll_inode_permission(struct inode *i
  
  /* -o localflock - only provides locally consistent flock locks */
  struct file_operations ll_file_operations = {
-       .read      = new_sync_read,
        .read_iter = ll_file_read_iter,
-       .write    = new_sync_write,
        .write_iter = ll_file_write_iter,
        .unlocked_ioctl = ll_file_ioctl,
        .open      = ll_file_open,
  };
  
  struct file_operations ll_file_operations_flock = {
-       .read      = new_sync_read,
        .read_iter    = ll_file_read_iter,
-       .write    = new_sync_write,
        .write_iter   = ll_file_write_iter,
        .unlocked_ioctl = ll_file_ioctl,
        .open      = ll_file_open,
  
  /* These are for -o noflock - to return ENOSYS on flock calls */
  struct file_operations ll_file_operations_noflock = {
-       .read      = new_sync_read,
        .read_iter    = ll_file_read_iter,
-       .write    = new_sync_write,
        .write_iter   = ll_file_write_iter,
        .unlocked_ioctl = ll_file_ioctl,
        .open      = ll_file_open,
@@@ -3240,7 -3227,6 +3234,7 @@@ void *ll_iocontrol_register(llioc_callb
  
        return in_data;
  }
 +EXPORT_SYMBOL(ll_iocontrol_register);
  
  void ll_iocontrol_unregister(void *magic)
  {
  
        CWARN("didn't find iocontrol register block with magic: %p\n", magic);
  }
 -
 -EXPORT_SYMBOL(ll_iocontrol_register);
  EXPORT_SYMBOL(ll_iocontrol_unregister);
  
  static enum llioc_iter
diff --combined fs/aio.c
+++ b/fs/aio.c
@@@ -310,11 -310,11 +310,11 @@@ static int aio_ring_mmap(struct file *f
        return 0;
  }
  
 -static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
 +static int aio_ring_remap(struct file *file, struct vm_area_struct *vma)
  {
        struct mm_struct *mm = vma->vm_mm;
        struct kioctx_table *table;
 -      int i;
 +      int i, res = -EINVAL;
  
        spin_lock(&mm->ioctx_lock);
        rcu_read_lock();
  
                ctx = table->table[i];
                if (ctx && ctx->aio_ring_file == file) {
 -                      ctx->user_id = ctx->mmap_base = vma->vm_start;
 +                      if (!atomic_read(&ctx->dead)) {
 +                              ctx->user_id = ctx->mmap_base = vma->vm_start;
 +                              res = 0;
 +                      }
                        break;
                }
        }
  
        rcu_read_unlock();
        spin_unlock(&mm->ioctx_lock);
 +      return res;
  }
  
  static const struct file_operations aio_ring_fops = {
@@@ -692,8 -688,7 +692,7 @@@ static struct kioctx *ioctx_alloc(unsig
        nr_events *= 2;
  
        /* Prevent overflows */
-       if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
-           (nr_events > (0x10000000U / sizeof(struct kiocb)))) {
+       if (nr_events > (0x10000000U / sizeof(struct io_event))) {
                pr_debug("ENOMEM: nr_events too high\n");
                return ERR_PTR(-EINVAL);
        }
  err_cleanup:
        aio_nr_sub(ctx->max_reqs);
  err_ctx:
 +      atomic_set(&ctx->dead, 1);
 +      if (ctx->mmap_size)
 +              vm_munmap(ctx->mmap_base, ctx->mmap_size);
        aio_free_ring(ctx);
  err:
        mutex_unlock(&ctx->ring_lock);
@@@ -788,12 -780,11 +787,12 @@@ static int kill_ioctx(struct mm_struct 
  {
        struct kioctx_table *table;
  
 -      if (atomic_xchg(&ctx->dead, 1))
 +      spin_lock(&mm->ioctx_lock);
 +      if (atomic_xchg(&ctx->dead, 1)) {
 +              spin_unlock(&mm->ioctx_lock);
                return -EINVAL;
 +      }
  
 -
 -      spin_lock(&mm->ioctx_lock);
        table = rcu_dereference_raw(mm->ioctx_table);
        WARN_ON(ctx != table->table[ctx->id]);
        table->table[ctx->id] = NULL;
@@@ -1356,8 -1347,6 +1355,6 @@@ SYSCALL_DEFINE1(io_destroy, aio_context
        return -EINVAL;
  }
  
- typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
-                           unsigned long, loff_t);
  typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
  
  static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
@@@ -1386,7 -1375,6 +1383,6 @@@ static ssize_t aio_run_iocb(struct kioc
        ssize_t ret;
        int rw;
        fmode_t mode;
-       aio_rw_op *rw_op;
        rw_iter_op *iter_op;
        struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
        struct iov_iter iter;
        case IOCB_CMD_PREADV:
                mode    = FMODE_READ;
                rw      = READ;
-               rw_op   = file->f_op->aio_read;
                iter_op = file->f_op->read_iter;
                goto rw_common;
  
        case IOCB_CMD_PWRITEV:
                mode    = FMODE_WRITE;
                rw      = WRITE;
-               rw_op   = file->f_op->aio_write;
                iter_op = file->f_op->write_iter;
                goto rw_common;
  rw_common:
                if (unlikely(!(file->f_mode & mode)))
                        return -EBADF;
  
-               if (!rw_op && !iter_op)
+               if (!iter_op)
                        return -EINVAL;
  
                if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
  
                len = ret;
  
-               /* XXX: move/kill - rw_verify_area()? */
-               /* This matches the pread()/pwrite() logic */
-               if (req->ki_pos < 0) {
-                       ret = -EINVAL;
-                       break;
-               }
                if (rw == WRITE)
                        file_start_write(file);
  
-               if (iter_op) {
-                       ret = iter_op(req, &iter);
-               } else {
-                       ret = rw_op(req, iter.iov, iter.nr_segs, req->ki_pos);
-               }
+               ret = iter_op(req, &iter);
  
                if (rw == WRITE)
                        file_end_write(file);
diff --combined fs/gfs2/file.c
@@@ -428,11 -428,11 +428,11 @@@ static int gfs2_page_mkwrite(struct vm_
        if (ret)
                goto out_unlock;
  
 -      ret = gfs2_quota_lock_check(ip);
 -      if (ret)
 -              goto out_unlock;
        gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
        ap.target = data_blocks + ind_blocks;
 +      ret = gfs2_quota_lock_check(ip, &ap);
 +      if (ret)
 +              goto out_unlock;
        ret = gfs2_inplace_reserve(ip, &ap);
        if (ret)
                goto out_quota_unlock;
@@@ -764,30 -764,22 +764,30 @@@ out
        brelse(dibh);
        return error;
  }
 -
 -static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
 -                          unsigned int *data_blocks, unsigned int *ind_blocks)
 +/**
 + * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
 + *                     blocks, determine how many bytes can be written.
 + * @ip:          The inode in question.
 + * @len:         Max cap of bytes. What we return in *len must be <= this.
 + * @data_blocks: Compute and return the number of data blocks needed
 + * @ind_blocks:  Compute and return the number of indirect blocks needed
 + * @max_blocks:  The total blocks available to work with.
 + *
 + * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
 + */
 +static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
 +                          unsigned int *data_blocks, unsigned int *ind_blocks,
 +                          unsigned int max_blocks)
  {
 +      loff_t max = *len;
        const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 -      unsigned int max_blocks = ip->i_rgd->rd_free_clone;
        unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
  
        for (tmp = max_data; tmp > sdp->sd_diptrs;) {
                tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
                max_data -= tmp;
        }
 -      /* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
 -         so it might end up with fewer data blocks */
 -      if (max_data <= *data_blocks)
 -              return;
 +
        *data_blocks = max_data;
        *ind_blocks = max_blocks - max_data;
        *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
@@@ -804,7 -796,7 +804,7 @@@ static long __gfs2_fallocate(struct fil
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_alloc_parms ap = { .aflags = 0, };
        unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
 -      loff_t bytes, max_bytes;
 +      loff_t bytes, max_bytes, max_blks = UINT_MAX;
        int error;
        const loff_t pos = offset;
        const loff_t count = len;
  
        gfs2_size_hint(file, offset, len);
  
 +      gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
 +      ap.min_target = data_blocks + ind_blocks;
 +
        while (len > 0) {
                if (len < bytes)
                        bytes = len;
                        offset += bytes;
                        continue;
                }
 -              error = gfs2_quota_lock_check(ip);
 +
 +              /* We need to determine how many bytes we can actually
 +               * fallocate without exceeding quota or going over the
 +               * end of the fs. We start off optimistically by assuming
 +               * we can write max_bytes */
 +              max_bytes = (len > max_chunk_size) ? max_chunk_size : len;
 +
 +              /* Since max_bytes is most likely a theoretical max, we
 +               * calculate a more realistic 'bytes' to serve as a good
 +               * starting point for the number of bytes we may be able
 +               * to write */
 +              gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
 +              ap.target = data_blocks + ind_blocks;
 +
 +              error = gfs2_quota_lock_check(ip, &ap);
                if (error)
                        return error;
 -retry:
 -              gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
 +              /* ap.allowed tells us how many blocks quota will allow
 +               * us to write. Check if this reduces max_blks */
 +              if (ap.allowed && ap.allowed < max_blks)
 +                      max_blks = ap.allowed;
  
 -              ap.target = data_blocks + ind_blocks;
                error = gfs2_inplace_reserve(ip, &ap);
 -              if (error) {
 -                      if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
 -                              bytes >>= 1;
 -                              bytes &= bsize_mask;
 -                              if (bytes == 0)
 -                                      bytes = sdp->sd_sb.sb_bsize;
 -                              goto retry;
 -                      }
 +              if (error)
                        goto out_qunlock;
 -              }
 -              max_bytes = bytes;
 -              calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len,
 -                              &max_bytes, &data_blocks, &ind_blocks);
 +
 +              /* check if the selected rgrp limits our max_blks further */
 +              if (ap.allowed && ap.allowed < max_blks)
 +                      max_blks = ap.allowed;
 +
 +              /* Almost done. Calculate bytes that can be written using
 +               * max_blks. We also recompute max_bytes, data_blocks and
 +               * ind_blocks */
 +              calc_max_reserv(ip, &max_bytes, &data_blocks,
 +                              &ind_blocks, max_blks);
  
                rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
                          RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
@@@ -955,22 -930,6 +955,22 @@@ out_uninit
        return ret;
  }
  
 +static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
 +                                    struct file *out, loff_t *ppos,
 +                                    size_t len, unsigned int flags)
 +{
 +      int error;
 +      struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
 +
 +      error = gfs2_rs_alloc(ip);
 +      if (error)
 +              return (ssize_t)error;
 +
 +      gfs2_size_hint(out, *ppos, len);
 +
 +      return iter_file_splice_write(pipe, out, ppos, len, flags);
 +}
 +
  #ifdef CONFIG_GFS2_FS_LOCKING_DLM
  
  /**
@@@ -1105,9 -1064,7 +1105,7 @@@ static int gfs2_flock(struct file *file
  
  const struct file_operations gfs2_file_fops = {
        .llseek         = gfs2_llseek,
-       .read           = new_sync_read,
        .read_iter      = generic_file_read_iter,
-       .write          = new_sync_write,
        .write_iter     = gfs2_file_write_iter,
        .unlocked_ioctl = gfs2_ioctl,
        .mmap           = gfs2_mmap,
        .lock           = gfs2_lock,
        .flock          = gfs2_flock,
        .splice_read    = generic_file_splice_read,
 -      .splice_write   = iter_file_splice_write,
 +      .splice_write   = gfs2_file_splice_write,
        .setlease       = simple_nosetlease,
        .fallocate      = gfs2_fallocate,
  };
@@@ -1137,9 -1094,7 +1135,7 @@@ const struct file_operations gfs2_dir_f
  
  const struct file_operations gfs2_file_fops_nolock = {
        .llseek         = gfs2_llseek,
-       .read           = new_sync_read,
        .read_iter      = generic_file_read_iter,
-       .write          = new_sync_write,
        .write_iter     = gfs2_file_write_iter,
        .unlocked_ioctl = gfs2_ioctl,
        .mmap           = gfs2_mmap,
        .release        = gfs2_release,
        .fsync          = gfs2_fsync,
        .splice_read    = generic_file_splice_read,
 -      .splice_write   = iter_file_splice_write,
 +      .splice_write   = gfs2_file_splice_write,
        .setlease       = generic_setlease,
        .fallocate      = gfs2_fallocate,
  };
diff --combined fs/hugetlbfs/inode.c
@@@ -34,6 -34,7 +34,7 @@@
  #include <linux/security.h>
  #include <linux/magic.h>
  #include <linux/migrate.h>
+ #include <linux/uio.h>
  
  #include <asm/uaccess.h>
  
@@@ -179,42 -180,33 +180,33 @@@ hugetlb_get_unmapped_area(struct file *
  }
  #endif
  
- static int
+ static size_t
  hugetlbfs_read_actor(struct page *page, unsigned long offset,
-                       char __user *buf, unsigned long count,
-                       unsigned long size)
+                       struct iov_iter *to, unsigned long size)
  {
-       char *kaddr;
-       unsigned long left, copied = 0;
+       size_t copied = 0;
        int i, chunksize;
  
-       if (size > count)
-               size = count;
        /* Find which 4k chunk and offset with in that chunk */
        i = offset >> PAGE_CACHE_SHIFT;
        offset = offset & ~PAGE_CACHE_MASK;
  
        while (size) {
+               size_t n;
                chunksize = PAGE_CACHE_SIZE;
                if (offset)
                        chunksize -= offset;
                if (chunksize > size)
                        chunksize = size;
-               kaddr = kmap(&page[i]);
-               left = __copy_to_user(buf, kaddr + offset, chunksize);
-               kunmap(&page[i]);
-               if (left) {
-                       copied += (chunksize - left);
-                       break;
-               }
+               n = copy_page_to_iter(&page[i], offset, chunksize, to);
+               copied += n;
+               if (n != chunksize)
+                       return copied;
                offset = 0;
                size -= chunksize;
-               buf += chunksize;
-               copied += chunksize;
                i++;
        }
-       return copied ? copied : -EFAULT;
+       return copied;
  }
  
  /*
   * data. Its *very* similar to do_generic_mapping_read(), we can't use that
   * since it has PAGE_CACHE_SIZE assumptions.
   */
- static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
-                             size_t len, loff_t *ppos)
+ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
  {
-       struct hstate *h = hstate_file(filp);
-       struct address_space *mapping = filp->f_mapping;
+       struct file *file = iocb->ki_filp;
+       struct hstate *h = hstate_file(file);
+       struct address_space *mapping = file->f_mapping;
        struct inode *inode = mapping->host;
-       unsigned long index = *ppos >> huge_page_shift(h);
-       unsigned long offset = *ppos & ~huge_page_mask(h);
+       unsigned long index = iocb->ki_pos >> huge_page_shift(h);
+       unsigned long offset = iocb->ki_pos & ~huge_page_mask(h);
        unsigned long end_index;
        loff_t isize;
        ssize_t retval = 0;
  
-       /* validate length */
-       if (len == 0)
-               goto out;
-       for (;;) {
+       while (iov_iter_count(to)) {
                struct page *page;
-               unsigned long nr, ret;
-               int ra;
+               size_t nr, copied;
  
                /* nr is the maximum number of bytes to copy from this page */
                nr = huge_page_size(h);
                isize = i_size_read(inode);
                if (!isize)
-                       goto out;
+                       break;
                end_index = (isize - 1) >> huge_page_shift(h);
-               if (index >= end_index) {
-                       if (index > end_index)
-                               goto out;
+               if (index > end_index)
+                       break;
+               if (index == end_index) {
                        nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
                        if (nr <= offset)
-                               goto out;
+                               break;
                }
                nr = nr - offset;
  
                         * We have a HOLE, zero out the user-buffer for the
                         * length of the hole or request.
                         */
-                       ret = len < nr ? len : nr;
-                       if (clear_user(buf, ret))
-                               ra = -EFAULT;
-                       else
-                               ra = 0;
+                       copied = iov_iter_zero(nr, to);
                } else {
                        unlock_page(page);
  
                        /*
                         * We have the page, copy it to user space buffer.
                         */
-                       ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
-                       ret = ra;
+                       copied = hugetlbfs_read_actor(page, offset, to, nr);
                        page_cache_release(page);
                }
-               if (ra < 0) {
-                       if (retval == 0)
-                               retval = ra;
-                       goto out;
+               offset += copied;
+               retval += copied;
+               if (copied != nr && iov_iter_count(to)) {
+                       if (!retval)
+                               retval = -EFAULT;
+                       break;
                }
-               offset += ret;
-               retval += ret;
-               len -= ret;
                index += offset >> huge_page_shift(h);
                offset &= ~huge_page_mask(h);
-               /* short read or no more work */
-               if ((ret != nr) || (len == 0))
-                       break;
        }
- out:
-       *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
+       iocb->ki_pos = ((loff_t)index << huge_page_shift(h)) + offset;
        return retval;
  }
  
@@@ -319,7 -294,7 +294,7 @@@ static int hugetlbfs_write_end(struct f
  
  static void truncate_huge_page(struct page *page)
  {
 -      cancel_dirty_page(page, /* No IO accounting for huge pages? */0);
 +      ClearPageDirty(page);
        ClearPageUptodate(page);
        delete_from_page_cache(page);
  }
@@@ -721,7 -696,7 +696,7 @@@ static void init_once(void *foo
  }
  
  const struct file_operations hugetlbfs_file_operations = {
-       .read                   = hugetlbfs_read,
+       .read_iter              = hugetlbfs_read_iter,
        .mmap                   = hugetlbfs_file_mmap,
        .fsync                  = noop_fsync,
        .get_unmapped_area      = hugetlb_get_unmapped_area,
diff --combined fs/ocfs2/file.c
@@@ -2392,6 -2392,7 +2392,6 @@@ relock
                /*
                 * for completing the rest of the request.
                 */
 -              *ppos += written;
                count -= written;
                written_buffered = generic_perform_write(file, from, *ppos);
                /*
                        goto out_dio;
                }
  
 -              iocb->ki_pos = *ppos + written_buffered;
                /* We need to ensure that the page cache pages are written to
                 * disk and invalidated to preserve the expected O_DIRECT
                 * semantics.
                ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
                                endbyte);
                if (ret == 0) {
 +                      iocb->ki_pos = *ppos + written_buffered;
                        written += written_buffered;
                        invalidate_mapping_pages(mapping,
                                        *ppos >> PAGE_CACHE_SHIFT,
@@@ -2437,14 -2438,10 +2437,14 @@@ out_dio
        /* buffered aio wouldn't have proper lock coverage today */
        BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
  
 +      if (unlikely(written <= 0))
 +              goto no_sync;
 +
        if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
            ((file->f_flags & O_DIRECT) && !direct_io)) {
 -              ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
 -                                             *ppos + count - 1);
 +              ret = filemap_fdatawrite_range(file->f_mapping,
 +                                             iocb->ki_pos - written,
 +                                             iocb->ki_pos - 1);
                if (ret < 0)
                        written = ret;
  
                }
  
                if (!ret)
 -                      ret = filemap_fdatawait_range(file->f_mapping, *ppos,
 -                                                    *ppos + count - 1);
 +                      ret = filemap_fdatawait_range(file->f_mapping,
 +                                                    iocb->ki_pos - written,
 +                                                    iocb->ki_pos - 1);
        }
  
 +no_sync:
        /*
         * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
         * function pointer which is called when o_direct io completes so that
@@@ -2681,8 -2676,6 +2681,6 @@@ const struct inode_operations ocfs2_spe
   */
  const struct file_operations ocfs2_fops = {
        .llseek         = ocfs2_file_llseek,
-       .read           = new_sync_read,
-       .write          = new_sync_write,
        .mmap           = ocfs2_mmap,
        .fsync          = ocfs2_sync_file,
        .release        = ocfs2_file_release,
@@@ -2729,8 -2722,6 +2727,6 @@@ const struct file_operations ocfs2_dop
   */
  const struct file_operations ocfs2_fops_no_plocks = {
        .llseek         = ocfs2_file_llseek,
-       .read           = new_sync_read,
-       .write          = new_sync_write,
        .mmap           = ocfs2_mmap,
        .fsync          = ocfs2_sync_file,
        .release        = ocfs2_file_release,
diff --combined include/linux/fs.h
@@@ -1562,8 -1562,6 +1562,6 @@@ struct file_operations 
        loff_t (*llseek) (struct file *, loff_t, int);
        ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
        ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
-       ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
-       ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
        ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
        ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
        int (*iterate) (struct file *, struct dir_context *);
        long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
        long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
        int (*mmap) (struct file *, struct vm_area_struct *);
 -      void (*mremap)(struct file *, struct vm_area_struct *);
 +      int (*mremap)(struct file *, struct vm_area_struct *);
        int (*open) (struct inode *, struct file *);
        int (*flush) (struct file *, fl_owner_t id);
        int (*release) (struct inode *, struct file *);
@@@ -1639,6 -1637,7 +1637,7 @@@ ssize_t rw_copy_check_uvector(int type
                              struct iovec **ret_pointer);
  
  extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
+ extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *);
  extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
  extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
  extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
@@@ -2573,10 -2572,6 +2572,6 @@@ extern ssize_t __generic_file_write_ite
  extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
  extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t);
  extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
- extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
- extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
- extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
- extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
  
  ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos);
  ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos);