OSDN Git Service

Merge tag 'vfs-5.8-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 11 Jun 2020 17:48:12 +0000 (10:48 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 11 Jun 2020 17:48:12 +0000 (10:48 -0700)
Pull DAX updates part three from Darrick Wong:
 "Now that the xfs changes have landed, this third piece changes the
  FS_XFLAG_DAX ioctl code in xfs to request that the inode be reloaded
  after the last program closes the file, if doing so would make a S_DAX
  change happen. The goal here is to make dax access mode switching
  quicker when possible.

  Summary:

   - Teach XFS to ask the VFS to drop an inode if the administrator
     changes the FS_XFLAG_DAX inode flag such that the S_DAX state would
     change. This can result in files changing access modes without
     requiring an unmount cycle"

* tag 'vfs-5.8-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  fs/xfs: Update xfs_ioctl_setattr_dax_invalidate()
  fs/xfs: Combine xfs_diflags_to_linux() and xfs_diflags_to_iflags()
  fs/xfs: Create function xfs_inode_should_enable_dax()
  fs/xfs: Make DAX mount option a tri-state
  fs/xfs: Change XFS_MOUNT_DAX to XFS_MOUNT_DAX_ALWAYS
  fs/xfs: Remove unnecessary initialization of i_rwsem

1  2 
fs/xfs/xfs_ioctl.c

diff --combined fs/xfs/xfs_ioctl.c
@@@ -1104,17 -1104,26 +1104,17 @@@ xfs_fill_fsxattr
        bool                    attr,
        struct fsxattr          *fa)
  {
 +      struct xfs_ifork        *ifp = attr ? ip->i_afp : &ip->i_df;
 +
        simple_fill_fsxattr(fa, xfs_ip2xflags(ip));
        fa->fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
        fa->fsx_cowextsize = ip->i_d.di_cowextsize <<
                        ip->i_mount->m_sb.sb_blocklog;
        fa->fsx_projid = ip->i_d.di_projid;
 -
 -      if (attr) {
 -              if (ip->i_afp) {
 -                      if (ip->i_afp->if_flags & XFS_IFEXTENTS)
 -                              fa->fsx_nextents = xfs_iext_count(ip->i_afp);
 -                      else
 -                              fa->fsx_nextents = ip->i_d.di_anextents;
 -              } else
 -                      fa->fsx_nextents = 0;
 -      } else {
 -              if (ip->i_df.if_flags & XFS_IFEXTENTS)
 -                      fa->fsx_nextents = xfs_iext_count(&ip->i_df);
 -              else
 -                      fa->fsx_nextents = ip->i_d.di_nextents;
 -      }
 +      if (ifp && (ifp->if_flags & XFS_IFEXTENTS))
 +              fa->fsx_nextents = xfs_iext_count(ifp);
 +      else
 +              fa->fsx_nextents = xfs_ifork_nextents(ifp);
  }
  
  STATIC int
@@@ -1202,7 -1211,7 +1202,7 @@@ xfs_ioctl_setattr_xflags
        uint64_t                di_flags2;
  
        /* Can't change realtime flag if any extents are allocated. */
 -      if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
 +      if ((ip->i_df.if_nextents || ip->i_delayed_blks) &&
            XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME))
                return -EINVAL;
  
        return 0;
  }
  
- /*
-  * If we are changing DAX flags, we have to ensure the file is clean and any
-  * cached objects in the address space are invalidated and removed. This
-  * requires us to lock out other IO and page faults similar to a truncate
-  * operation. The locks need to be held until the transaction has been committed
-  * so that the cache invalidation is atomic with respect to the DAX flag
-  * manipulation.
-  */
- static int
- xfs_ioctl_setattr_dax_invalidate(
+ static void
+ xfs_ioctl_setattr_prepare_dax(
        struct xfs_inode        *ip,
-       struct fsxattr          *fa,
-       int                     *join_flags)
+       struct fsxattr          *fa)
  {
-       struct inode            *inode = VFS_I(ip);
-       struct super_block      *sb = inode->i_sb;
-       int                     error;
-       *join_flags = 0;
-       /*
-        * It is only valid to set the DAX flag on regular files and
-        * directories on filesystems where the block size is equal to the page
-        * size. On directories it serves as an inherited hint so we don't
-        * have to check the device for dax support or flush pagecache.
-        */
-       if (fa->fsx_xflags & FS_XFLAG_DAX) {
-               struct xfs_buftarg      *target = xfs_inode_buftarg(ip);
-               if (!bdev_dax_supported(target->bt_bdev, sb->s_blocksize))
-                       return -EINVAL;
-       }
-       /* If the DAX state is not changing, we have nothing to do here. */
-       if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode))
-               return 0;
-       if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
-               return 0;
+       struct xfs_mount        *mp = ip->i_mount;
+       struct inode            *inode = VFS_I(ip);
  
        if (S_ISDIR(inode->i_mode))
-               return 0;
-       /* lock, flush and invalidate mapping in preparation for flag change */
-       xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
-       error = filemap_write_and_wait(inode->i_mapping);
-       if (error)
-               goto out_unlock;
-       error = invalidate_inode_pages2(inode->i_mapping);
-       if (error)
-               goto out_unlock;
-       *join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
-       return 0;
+               return;
  
- out_unlock:
-       xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
-       return error;
+       if ((mp->m_flags & XFS_MOUNT_DAX_ALWAYS) ||
+           (mp->m_flags & XFS_MOUNT_DAX_NEVER))
+               return;
  
+       if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
+           !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
+           (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
+            (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
+               d_mark_dontcache(inode);
  }
  
  /*
   * have permission to do so. On success, return a clean transaction and the
   * inode locked exclusively ready for further operation specific checks. On
   * failure, return an error without modifying or locking the inode.
-  *
-  * The inode might already be IO locked on call. If this is the case, it is
-  * indicated in @join_flags and we take full responsibility for ensuring they
-  * are unlocked from now on. Hence if we have an error here, we still have to
-  * unlock them. Otherwise, once they are joined to the transaction, they will
-  * be unlocked on commit/cancel.
   */
  static struct xfs_trans *
  xfs_ioctl_setattr_get_trans(
-       struct xfs_inode        *ip,
-       int                     join_flags)
+       struct xfs_inode        *ip)
  {
        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_trans        *tp;
                goto out_unlock;
  
        xfs_ilock(ip, XFS_ILOCK_EXCL);
-       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
-       join_flags = 0;
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
  
        /*
         * CAP_FOWNER overrides the following restrictions:
  out_cancel:
        xfs_trans_cancel(tp);
  out_unlock:
-       if (join_flags)
-               xfs_iunlock(ip, join_flags);
        return ERR_PTR(error);
  }
  
@@@ -1380,7 -1341,7 +1332,7 @@@ xfs_ioctl_setattr_check_extsize
        xfs_extlen_t            size;
        xfs_fsblock_t           extsize_fsb;
  
 -      if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_d.di_nextents &&
 +      if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents &&
            ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize))
                return -EINVAL;
  
@@@ -1473,10 -1434,10 +1425,9 @@@ xfs_ioctl_setattr
        struct fsxattr          old_fa;
        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_trans        *tp;
 -      struct xfs_dquot        *udqp = NULL;
        struct xfs_dquot        *pdqp = NULL;
        struct xfs_dquot        *olddquot = NULL;
        int                     code;
-       int                     join_flags = 0;
  
        trace_xfs_ioctl_setattr(ip);
  
        if (XFS_IS_QUOTA_ON(mp)) {
                code = xfs_qm_vop_dqalloc(ip, VFS_I(ip)->i_uid,
                                VFS_I(ip)->i_gid, fa->fsx_projid,
 -                              XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp);
 +                              XFS_QMOPT_PQUOTA, NULL, NULL, &pdqp);
                if (code)
                        return code;
        }
  
-       /*
-        * Changing DAX config may require inode locking for mapping
-        * invalidation. These need to be held all the way to transaction commit
-        * or cancel time, so need to be passed through to
-        * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
-        * appropriately.
-        */
-       code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags);
-       if (code)
-               goto error_free_dquots;
+       xfs_ioctl_setattr_prepare_dax(ip, fa);
  
-       tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
+       tp = xfs_ioctl_setattr_get_trans(ip);
        if (IS_ERR(tp)) {
                code = PTR_ERR(tp);
                goto error_free_dquots;
  
        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) &&
            ip->i_d.di_projid != fa->fsx_projid) {
 -              code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp,
 +              code = xfs_qm_vop_chown_reserve(tp, ip, NULL, NULL, pdqp,
                                capable(CAP_FOWNER) ?  XFS_QMOPT_FORCE_RES : 0);
                if (code)       /* out of quota */
                        goto error_trans_cancel;
         * Release any dquot(s) the inode had kept before chown.
         */
        xfs_qm_dqrele(olddquot);
 -      xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(pdqp);
  
        return code;
  error_trans_cancel:
        xfs_trans_cancel(tp);
  error_free_dquots:
 -      xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(pdqp);
        return code;
  }
@@@ -1639,7 -1593,6 +1581,6 @@@ xfs_ioc_setxflags
        struct fsxattr          fa;
        struct fsxattr          old_fa;
        unsigned int            flags;
-       int                     join_flags = 0;
        int                     error;
  
        if (copy_from_user(&flags, arg, sizeof(flags)))
        if (error)
                return error;
  
-       /*
-        * Changing DAX config may require inode locking for mapping
-        * invalidation. These need to be held all the way to transaction commit
-        * or cancel time, so need to be passed through to
-        * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
-        * appropriately.
-        */
-       error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags);
-       if (error)
-               goto out_drop_write;
+       xfs_ioctl_setattr_prepare_dax(ip, &fa);
  
-       tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
+       tp = xfs_ioctl_setattr_get_trans(ip);
        if (IS_ERR(tp)) {
                error = PTR_ERR(tp);
                goto out_drop_write;
        return error;
  }
  
 +static inline int
 +xfs_fs_eofblocks_from_user(
 +      struct xfs_fs_eofblocks         *src,
 +      struct xfs_eofblocks            *dst)
 +{
 +      if (src->eof_version != XFS_EOFBLOCKS_VERSION)
 +              return -EINVAL;
 +
 +      if (src->eof_flags & ~XFS_EOF_FLAGS_VALID)
 +              return -EINVAL;
 +
 +      if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) ||
 +          memchr_inv(src->pad64, 0, sizeof(src->pad64)))
 +              return -EINVAL;
 +
 +      dst->eof_flags = src->eof_flags;
 +      dst->eof_prid = src->eof_prid;
 +      dst->eof_min_file_size = src->eof_min_file_size;
 +
 +      dst->eof_uid = INVALID_UID;
 +      if (src->eof_flags & XFS_EOF_FLAGS_UID) {
 +              dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid);
 +              if (!uid_valid(dst->eof_uid))
 +                      return -EINVAL;
 +      }
 +
 +      dst->eof_gid = INVALID_GID;
 +      if (src->eof_flags & XFS_EOF_FLAGS_GID) {
 +              dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid);
 +              if (!gid_valid(dst->eof_gid))
 +                      return -EINVAL;
 +      }
 +      return 0;
 +}
 +
  /*
   * Note: some of the ioctl's return positive numbers as a
   * byte count indicating success, such as readlink_by_handle.