OSDN Git Service

Merge branch 'for-4.2' of git://linux-nfs.org/~bfields/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 27 Jun 2015 17:14:39 +0000 (10:14 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 27 Jun 2015 17:14:39 +0000 (10:14 -0700)
Pull nfsd updates from Bruce Fields:
 "A relatively quiet cycle, with a mix of cleanup and smaller bugfixes"

* 'for-4.2' of git://linux-nfs.org/~bfields/linux: (24 commits)
  sunrpc: use sg_init_one() in krb5_rc4_setup_enc/seq_key()
  nfsd: wrap too long lines in nfsd4_encode_read
  nfsd: fput rd_file from XDR encode context
  nfsd: take struct file setup fully into nfs4_preprocess_stateid_op
  nfsd: refactor nfs4_preprocess_stateid_op
  nfsd: clean up raparams handling
  nfsd: use swap() in sort_pacl_range()
  rpcrdma: Merge svcrdma and xprtrdma modules into one
  svcrdma: Add a separate "max data segs macro for svcrdma
  svcrdma: Replace GFP_KERNEL in a loop with GFP_NOFAIL
  svcrdma: Keep rpcrdma_msg fields in network byte-order
  svcrdma: Fix byte-swapping in svc_rdma_sendto.c
  nfsd: Update callback sequnce id only CB_SEQUENCE success
  nfsd: Reset cb_status in nfsd4_cb_prepare() at retrying
  svcrdma: Remove svc_rdma_xdr_decode_deferred_req()
  SUNRPC: Move EXPORT_SYMBOL for svc_process
  uapi/nfs: Add NFSv4.1 ACL definitions
  nfsd: Remove dead declarations
  nfsd: work around a gcc-5.1 warning
  nfsd: Checking for acl support does not require fetching any acls
  ...

27 files changed:
Documentation/filesystems/nfs/knfsd-stats.txt
fs/nfsd/nfs3xdr.c
fs/nfsd/nfs4acl.c
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/nfsproc.c
fs/nfsd/state.h
fs/nfsd/vfs.c
fs/nfsd/vfs.h
fs/nfsd/xdr4.h
include/linux/sunrpc/svc_rdma.h
include/uapi/linux/nfs4.h
net/sunrpc/Kconfig
net/sunrpc/Makefile
net/sunrpc/auth_gss/gss_krb5_crypto.c
net/sunrpc/svc.c
net/sunrpc/xprtrdma/Makefile
net/sunrpc/xprtrdma/module.c [new file with mode: 0644]
net/sunrpc/xprtrdma/svc_rdma.c
net/sunrpc/xprtrdma/svc_rdma_marshal.c
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/xprt_rdma.h

index 64ced51..1a5d821 100644 (file)
@@ -68,16 +68,10 @@ sockets-enqueued
        rate of change for this counter is zero; significantly non-zero
        values may indicate a performance limitation.
 
-       This can happen either because there are too few nfsd threads in the
-       thread pool for the NFS workload (the workload is thread-limited),
-       or because the NFS workload needs more CPU time than is available in
-       the thread pool (the workload is CPU-limited).  In the former case,
-       configuring more nfsd threads will probably improve the performance
-       of the NFS workload.  In the latter case, the sunrpc server layer is
-       already choosing not to wake idle nfsd threads because there are too
-       many nfsd threads which want to run but cannot, so configuring more
-       nfsd threads will make no difference whatsoever.  The overloads-avoided
-       statistic (see below) can be used to distinguish these cases.
+       This can happen because there are too few nfsd threads in the thread
+       pool for the NFS workload (the workload is thread-limited), in which
+       case configuring more nfsd threads will probably improve the
+       performance of the NFS workload.
 
 threads-woken
        Counts how many times an idle nfsd thread is woken to try to
@@ -88,36 +82,6 @@ threads-woken
        thing.  The ideal rate of change for this counter will be close
        to but less than the rate of change of the packets-arrived counter.
 
-overloads-avoided
-       Counts how many times the sunrpc server layer chose not to wake an
-       nfsd thread, despite the presence of idle nfsd threads, because
-       too many nfsd threads had been recently woken but could not get
-       enough CPU time to actually run.
-
-       This statistic counts a circumstance where the sunrpc layer
-       heuristically avoids overloading the CPU scheduler with too many
-       runnable nfsd threads.  The ideal rate of change for this counter
-       is zero.  Significant non-zero values indicate that the workload
-       is CPU limited.  Usually this is associated with heavy CPU usage
-       on all the CPUs in the nfsd thread pool.
-
-       If a sustained large overloads-avoided rate is detected on a pool,
-       the top(1) utility should be used to check for the following
-       pattern of CPU usage on all the CPUs associated with the given
-       nfsd thread pool.
-
-        - %us ~= 0 (as you're *NOT* running applications on your NFS server)
-
-        - %wa ~= 0
-
-        - %id ~= 0
-
-        - %sy + %hi + %si ~= 100
-
-       If this pattern is seen, configuring more nfsd threads will *not*
-       improve the performance of the workload.  If this patten is not
-       seen, then something more subtle is wrong.
-
 threads-timedout
        Counts how many times an nfsd thread triggered an idle timeout,
        i.e. was not woken to handle any incoming network packets for
index e4b2b43..f6e7cba 100644 (file)
@@ -805,7 +805,7 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
 
 static __be32
 compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
-               const char *name, int namlen)
+                const char *name, int namlen, u64 ino)
 {
        struct svc_export       *exp;
        struct dentry           *dparent, *dchild;
@@ -830,19 +830,21 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
                goto out;
        if (d_really_is_negative(dchild))
                goto out;
+       if (dchild->d_inode->i_ino != ino)
+               goto out;
        rv = fh_compose(fhp, exp, dchild, &cd->fh);
 out:
        dput(dchild);
        return rv;
 }
 
-static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
+static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen, u64 ino)
 {
        struct svc_fh   *fh = &cd->scratch;
        __be32 err;
 
        fh_init(fh, NFS3_FHSIZE);
-       err = compose_entry_fh(cd, fh, name, namlen);
+       err = compose_entry_fh(cd, fh, name, namlen, ino);
        if (err) {
                *p++ = 0;
                *p++ = 0;
@@ -927,7 +929,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
                p = encode_entry_baggage(cd, p, name, namlen, ino);
 
                if (plus)
-                       p = encode_entryplus_baggage(cd, p, name, namlen);
+                       p = encode_entryplus_baggage(cd, p, name, namlen, ino);
                num_entry_words = p - cd->buffer;
        } else if (*(page+1) != NULL) {
                /* temporarily encode entry into next page, then move back to
@@ -941,7 +943,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
                p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
 
                if (plus)
-                       p1 = encode_entryplus_baggage(cd, p1, name, namlen);
+                       p1 = encode_entryplus_baggage(cd, p1, name, namlen, ino);
 
                /* determine entry word length and lengths to go in pages */
                num_entry_words = p1 - tmp;
index 67242bf..eb5accf 100644 (file)
 #define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL | NFS4_ACE_SYNCHRONIZE)
 #define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL)
 
-/* We don't support these bits; insist they be neither allowed nor denied */
-#define NFS4_MASK_UNSUPP (NFS4_ACE_DELETE | NFS4_ACE_WRITE_OWNER \
-               | NFS4_ACE_READ_NAMED_ATTRS | NFS4_ACE_WRITE_NAMED_ATTRS)
-
 /* flags used to simulate posix default ACLs */
 #define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
                | NFS4_ACE_DIRECTORY_INHERIT_ACE)
@@ -64,9 +60,6 @@
                | NFS4_ACE_INHERIT_ONLY_ACE \
                | NFS4_ACE_IDENTIFIER_GROUP)
 
-#define MASK_EQUAL(mask1, mask2) \
-       ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
-
 static u32
 mask_from_posix(unsigned short perm, unsigned int flags)
 {
@@ -126,11 +119,6 @@ low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
                *mode |= ACL_EXECUTE;
 }
 
-struct ace_container {
-       struct nfs4_ace  *ace;
-       struct list_head  ace_l;
-};
-
 static short ace2type(struct nfs4_ace *);
 static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *,
                                unsigned int);
@@ -384,7 +372,6 @@ pace_gt(struct posix_acl_entry *pace1, struct posix_acl_entry *pace2)
 static void
 sort_pacl_range(struct posix_acl *pacl, int start, int end) {
        int sorted = 0, i;
-       struct posix_acl_entry tmp;
 
        /* We just do a bubble sort; easy to do in place, and we're not
         * expecting acl's to be long enough to justify anything more. */
@@ -394,9 +381,8 @@ sort_pacl_range(struct posix_acl *pacl, int start, int end) {
                        if (pace_gt(&pacl->a_entries[i],
                                    &pacl->a_entries[i+1])) {
                                sorted = 0;
-                               tmp = pacl->a_entries[i];
-                               pacl->a_entries[i] = pacl->a_entries[i+1];
-                               pacl->a_entries[i+1] = tmp;
+                               swap(pacl->a_entries[i],
+                                    pacl->a_entries[i + 1]);
                        }
                }
        }
index 5694cfb..a492018 100644 (file)
@@ -455,6 +455,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
        if (unlikely(status || cb->cb_status))
                return status;
 
+       cb->cb_update_seq_nr = true;
        return decode_cb_sequence4resok(xdr, cb);
 }
 
@@ -875,6 +876,8 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
        u32 minorversion = clp->cl_minorversion;
 
        cb->cb_minorversion = minorversion;
+       cb->cb_update_seq_nr = false;
+       cb->cb_status = 0;
        if (minorversion) {
                if (!nfsd41_cb_get_slot(clp, task))
                        return;
@@ -891,9 +894,16 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
                clp->cl_minorversion);
 
        if (clp->cl_minorversion) {
-               /* No need for lock, access serialized in nfsd4_cb_prepare */
-               if (!task->tk_status)
+               /*
+                * No need for lock, access serialized in nfsd4_cb_prepare
+                *
+                * RFC5661 20.9.3
+                * If CB_SEQUENCE returns an error, then the state of the slot
+                * (sequence ID, cached reply) MUST NOT change.
+                */
+               if (cb->cb_update_seq_nr)
                        ++clp->cl_cb_session->se_cb_seq_nr;
+
                clear_bit(0, &clp->cl_cb_slot_busy);
                rpc_wake_up_next(&clp->cl_cb_waitq);
                dprintk("%s: freed slot, new seqid=%d\n", __func__,
@@ -1090,6 +1100,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
        cb->cb_ops = ops;
        INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
        cb->cb_status = 0;
+       cb->cb_update_seq_nr = false;
        cb->cb_need_restart = false;
 }
 
index 864e200..90cfda7 100644 (file)
@@ -760,8 +760,6 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
        __be32 status;
 
-       /* no need to check permission - this will be done in nfsd_read() */
-
        read->rd_filp = NULL;
        if (read->rd_offset >= OFFSET_MAX)
                return nfserr_inval;
@@ -778,9 +776,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
 
        /* check stateid */
-       if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
-                                                cstate, &read->rd_stateid,
-                                                RD_STATE, &read->rd_filp))) {
+       status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid,
+                       RD_STATE, &read->rd_filp, &read->rd_tmp_file);
+       if (status) {
                dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
                goto out;
        }
@@ -924,8 +922,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        int err;
 
        if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
-               status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
-                       &setattr->sa_stateid, WR_STATE, NULL);
+               status = nfs4_preprocess_stateid_op(rqstp, cstate,
+                       &setattr->sa_stateid, WR_STATE, NULL, NULL);
                if (status) {
                        dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
                        return status;
@@ -986,13 +984,11 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        unsigned long cnt;
        int nvecs;
 
-       /* no need to check permission - this will be done in nfsd_write() */
-
        if (write->wr_offset >= OFFSET_MAX)
                return nfserr_inval;
 
-       status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
-                                       cstate, stateid, WR_STATE, &filp);
+       status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE,
+                       &filp, NULL);
        if (status) {
                dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
                return status;
@@ -1005,11 +1001,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        nvecs = fill_in_write_vector(rqstp->rq_vec, write);
        WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
 
-       status =  nfsd_write(rqstp, &cstate->current_fh, filp,
-                            write->wr_offset, rqstp->rq_vec, nvecs,
-                            &cnt, &write->wr_how_written);
-       if (filp)
-               fput(filp);
+       status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
+                               write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
+                               &write->wr_how_written);
+       fput(filp);
 
        write->wr_bytes_written = cnt;
 
@@ -1023,15 +1018,13 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        __be32 status = nfserr_notsupp;
        struct file *file;
 
-       status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
+       status = nfs4_preprocess_stateid_op(rqstp, cstate,
                                            &fallocate->falloc_stateid,
-                                           WR_STATE, &file);
+                                           WR_STATE, &file, NULL);
        if (status != nfs_ok) {
                dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
                return status;
        }
-       if (!file)
-               return nfserr_bad_stateid;
 
        status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
                                     fallocate->falloc_offset,
@@ -1064,15 +1057,13 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        __be32 status;
        struct file *file;
 
-       status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
+       status = nfs4_preprocess_stateid_op(rqstp, cstate,
                                            &seek->seek_stateid,
-                                           RD_STATE, &file);
+                                           RD_STATE, &file, NULL);
        if (status) {
                dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
                return status;
        }
-       if (!file)
-               return nfserr_bad_stateid;
 
        switch (seek->seek_whence) {
        case NFS4_CONTENT_DATA:
@@ -1732,10 +1723,6 @@ encode_op:
                        be32_to_cpu(status));
 
                nfsd4_cstate_clear_replay(cstate);
-               /* XXX Ugh, we need to get rid of this kind of special case: */
-               if (op->opnum == OP_READ && op->u.read.rd_filp)
-                       fput(op->u.read.rd_filp);
-
                nfsd4_increment_op_stats(op->opnum);
        }
 
index 039f9c8..61dfb33 100644 (file)
@@ -3861,7 +3861,7 @@ static __be32
 nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
 {
        __be32 status;
-       unsigned char old_deny_bmap;
+       unsigned char old_deny_bmap = stp->st_deny_bmap;
 
        if (!test_access(open->op_share_access, stp))
                return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
@@ -3870,7 +3870,6 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c
        spin_lock(&fp->fi_lock);
        status = nfs4_file_check_deny(fp, open->op_share_deny);
        if (status == nfs_ok) {
-               old_deny_bmap = stp->st_deny_bmap;
                set_deny(open->op_share_deny, stp);
                fp->fi_share_deny |=
                                (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
@@ -4574,85 +4573,130 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
        return nfs_ok;
 }
 
+static struct file *
+nfs4_find_file(struct nfs4_stid *s, int flags)
+{
+       if (!s)
+               return NULL;
+
+       switch (s->sc_type) {
+       case NFS4_DELEG_STID:
+               if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
+                       return NULL;
+               return get_file(s->sc_file->fi_deleg_file);
+       case NFS4_OPEN_STID:
+       case NFS4_LOCK_STID:
+               if (flags & RD_STATE)
+                       return find_readable_file(s->sc_file);
+               else
+                       return find_writeable_file(s->sc_file);
+               break;
+       }
+
+       return NULL;
+}
+
+static __be32
+nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags)
+{
+       __be32 status;
+
+       status = nfs4_check_fh(fhp, ols);
+       if (status)
+               return status;
+       status = nfsd4_check_openowner_confirmed(ols);
+       if (status)
+               return status;
+       return nfs4_check_openmode(ols, flags);
+}
+
+static __be32
+nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
+               struct file **filpp, bool *tmp_file, int flags)
+{
+       int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
+       struct file *file;
+       __be32 status;
+
+       file = nfs4_find_file(s, flags);
+       if (file) {
+               status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+                               acc | NFSD_MAY_OWNER_OVERRIDE);
+               if (status) {
+                       fput(file);
+                       return status;
+               }
+
+               *filpp = file;
+       } else {
+               status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp);
+               if (status)
+                       return status;
+
+               if (tmp_file)
+                       *tmp_file = true;
+       }
+
+       return 0;
+}
+
 /*
-* Checks for stateid operations
-*/
+ * Checks for stateid operations
+ */
 __be32
-nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
-                          stateid_t *stateid, int flags, struct file **filpp)
+nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
+               struct nfsd4_compound_state *cstate, stateid_t *stateid,
+               int flags, struct file **filpp, bool *tmp_file)
 {
-       struct nfs4_stid *s;
-       struct nfs4_ol_stateid *stp = NULL;
-       struct nfs4_delegation *dp = NULL;
-       struct svc_fh *current_fh = &cstate->current_fh;
-       struct inode *ino = d_inode(current_fh->fh_dentry);
+       struct svc_fh *fhp = &cstate->current_fh;
+       struct inode *ino = d_inode(fhp->fh_dentry);
+       struct net *net = SVC_NET(rqstp);
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-       struct file *file = NULL;
+       struct nfs4_stid *s = NULL;
        __be32 status;
 
        if (filpp)
                *filpp = NULL;
+       if (tmp_file)
+               *tmp_file = false;
 
        if (grace_disallows_io(net, ino))
                return nfserr_grace;
 
-       if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
-               return check_special_stateids(net, current_fh, stateid, flags);
+       if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
+               status = check_special_stateids(net, fhp, stateid, flags);
+               goto done;
+       }
 
        status = nfsd4_lookup_stateid(cstate, stateid,
                                NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
                                &s, nn);
        if (status)
                return status;
-       status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
+       status = check_stateid_generation(stateid, &s->sc_stateid,
+                       nfsd4_has_session(cstate));
        if (status)
                goto out;
+
        switch (s->sc_type) {
        case NFS4_DELEG_STID:
-               dp = delegstateid(s);
-               status = nfs4_check_delegmode(dp, flags);
-               if (status)
-                       goto out;
-               if (filpp) {
-                       file = dp->dl_stid.sc_file->fi_deleg_file;
-                       if (!file) {
-                               WARN_ON_ONCE(1);
-                               status = nfserr_serverfault;
-                               goto out;
-                       }
-                       get_file(file);
-               }
+               status = nfs4_check_delegmode(delegstateid(s), flags);
                break;
        case NFS4_OPEN_STID:
        case NFS4_LOCK_STID:
-               stp = openlockstateid(s);
-               status = nfs4_check_fh(current_fh, stp);
-               if (status)
-                       goto out;
-               status = nfsd4_check_openowner_confirmed(stp);
-               if (status)
-                       goto out;
-               status = nfs4_check_openmode(stp, flags);
-               if (status)
-                       goto out;
-               if (filpp) {
-                       struct nfs4_file *fp = stp->st_stid.sc_file;
-
-                       if (flags & RD_STATE)
-                               file = find_readable_file(fp);
-                       else
-                               file = find_writeable_file(fp);
-               }
+               status = nfs4_check_olstateid(fhp, openlockstateid(s), flags);
                break;
        default:
                status = nfserr_bad_stateid;
-               goto out;
+               break;
        }
-       status = nfs_ok;
-       if (file)
-               *filpp = file;
+
+done:
+       if (!status && filpp)
+               status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags);
 out:
-       nfs4_put_stid(s);
+       if (s)
+               nfs4_put_stid(s);
        return status;
 }
 
@@ -5505,7 +5549,7 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
        __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
        if (!err) {
                err = nfserrno(vfs_test_lock(file, lock));
-               nfsd_close(file);
+               fput(file);
        }
        return err;
 }
index 158badf..5463385 100644 (file)
@@ -33,6 +33,7 @@
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/file.h>
 #include <linux/slab.h>
 #include <linux/namei.h>
 #include <linux/statfs.h>
@@ -2227,7 +2228,6 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
        u32 rdattr_err = 0;
        __be32 status;
        int err;
-       int aclsupport = 0;
        struct nfs4_acl *acl = NULL;
        void *context = NULL;
        int contextlen;
@@ -2274,19 +2274,15 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
                        goto out;
                fhp = tempfh;
        }
-       if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT
-                       | FATTR4_WORD0_SUPPORTED_ATTRS)) {
+       if (bmval0 & FATTR4_WORD0_ACL) {
                err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl);
-               aclsupport = (err == 0);
-               if (bmval0 & FATTR4_WORD0_ACL) {
-                       if (err == -EOPNOTSUPP)
-                               bmval0 &= ~FATTR4_WORD0_ACL;
-                       else if (err == -EINVAL) {
-                               status = nfserr_attrnotsupp;
-                               goto out;
-                       } else if (err != 0)
-                               goto out_nfserr;
-               }
+               if (err == -EOPNOTSUPP)
+                       bmval0 &= ~FATTR4_WORD0_ACL;
+               else if (err == -EINVAL) {
+                       status = nfserr_attrnotsupp;
+                       goto out;
+               } else if (err != 0)
+                       goto out_nfserr;
        }
 
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
@@ -2338,7 +2334,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
                u32 word1 = nfsd_suppattrs1(minorversion);
                u32 word2 = nfsd_suppattrs2(minorversion);
 
-               if (!aclsupport)
+               if (!IS_POSIXACL(dentry->d_inode))
                        word0 &= ~FATTR4_WORD0_ACL;
                if (!contextsupport)
                        word2 &= ~FATTR4_WORD2_SECURITY_LABEL;
@@ -2486,7 +2482,7 @@ out_acl:
                p = xdr_reserve_space(xdr, 4);
                if (!p)
                        goto out_resource;
-               *p++ = cpu_to_be32(aclsupport ?
+               *p++ = cpu_to_be32(IS_POSIXACL(dentry->d_inode) ?
                        ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0);
        }
        if (bmval0 & FATTR4_WORD0_CANSETTIME) {
@@ -3422,52 +3418,51 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
        unsigned long maxcount;
        struct xdr_stream *xdr = &resp->xdr;
        struct file *file = read->rd_filp;
-       struct svc_fh *fhp = read->rd_fhp;
        int starting_len = xdr->buf->len;
-       struct raparms *ra;
+       struct raparms *ra = NULL;
        __be32 *p;
-       __be32 err;
 
        if (nfserr)
-               return nfserr;
+               goto out;
 
        p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
        if (!p) {
                WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
-               return nfserr_resource;
+               nfserr = nfserr_resource;
+               goto out;
        }
-       if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
+       if (resp->xdr.buf->page_len &&
+           test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
                WARN_ON_ONCE(1);
-               return nfserr_resource;
+               nfserr = nfserr_resource;
+               goto out;
        }
        xdr_commit_encode(xdr);
 
        maxcount = svc_max_payload(resp->rqstp);
-       maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len));
+       maxcount = min_t(unsigned long, maxcount,
+                        (xdr->buf->buflen - xdr->buf->len));
        maxcount = min_t(unsigned long, maxcount, read->rd_length);
 
-       if (read->rd_filp)
-               err = nfsd_permission(resp->rqstp, fhp->fh_export,
-                               fhp->fh_dentry,
-                               NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
-       else
-               err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
-                                               &file, &ra);
-       if (err)
-               goto err_truncate;
+       if (read->rd_tmp_file)
+               ra = nfsd_init_raparms(file);
 
-       if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
-               err = nfsd4_encode_splice_read(resp, read, file, maxcount);
+       if (file->f_op->splice_read &&
+           test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
+               nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
        else
-               err = nfsd4_encode_readv(resp, read, file, maxcount);
+               nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
 
-       if (!read->rd_filp)
-               nfsd_put_tmp_read_open(file, ra);
+       if (ra)
+               nfsd_put_raparams(file, ra);
 
-err_truncate:
-       if (err)
+       if (nfserr)
                xdr_truncate_encode(xdr, starting_len);
-       return err;
+
+out:
+       if (file)
+               fput(file);
+       return nfserr;
 }
 
 static __be32
index aecbcd3..4cd78ef 100644 (file)
@@ -59,13 +59,61 @@ static __be32
 nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
                                          struct nfsd_attrstat  *resp)
 {
+       struct iattr *iap = &argp->attrs;
+       struct svc_fh *fhp;
        __be32 nfserr;
+
        dprintk("nfsd: SETATTR  %s, valid=%x, size=%ld\n",
                SVCFH_fmt(&argp->fh),
                argp->attrs.ia_valid, (long) argp->attrs.ia_size);
 
-       fh_copy(&resp->fh, &argp->fh);
-       nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0);
+       fhp = fh_copy(&resp->fh, &argp->fh);
+
+       /*
+        * NFSv2 does not differentiate between "set-[ac]time-to-now"
+        * which only requires access, and "set-[ac]time-to-X" which
+        * requires ownership.
+        * So if it looks like it might be "set both to the same time which
+        * is close to now", and if inode_change_ok fails, then we
+        * convert to "set to now" instead of "set to explicit time"
+        *
+        * We only call inode_change_ok as the last test as technically
+        * it is not an interface that we should be using.
+        */
+#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
+#define        MAX_TOUCH_TIME_ERROR (30*60)
+       if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
+           iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
+               /*
+                * Looks probable.
+                *
+                * Now just make sure time is in the right ballpark.
+                * Solaris, at least, doesn't seem to care what the time
+                * request is.  We require it be within 30 minutes of now.
+                */
+               time_t delta = iap->ia_atime.tv_sec - get_seconds();
+               struct inode *inode;
+
+               nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
+               if (nfserr)
+                       goto done;
+               inode = d_inode(fhp->fh_dentry);
+
+               if (delta < 0)
+                       delta = -delta;
+               if (delta < MAX_TOUCH_TIME_ERROR &&
+                   inode_change_ok(inode, iap) != 0) {
+                       /*
+                        * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
+                        * This will cause notify_change to set these times
+                        * to "now"
+                        */
+                       iap->ia_valid &= ~BOTH_TIME_SET;
+               }
+       }
+
+       nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time_t)0);
+done:
        return nfsd_return_attrs(nfserr, resp);
 }
 
index dbc4f85..4874ce5 100644 (file)
@@ -68,6 +68,7 @@ struct nfsd4_callback {
        struct nfsd4_callback_ops *cb_ops;
        struct work_struct cb_work;
        int cb_status;
+       bool cb_update_seq_nr;
        bool cb_need_restart;
 };
 
@@ -582,9 +583,9 @@ enum nfsd4_cb_op {
 struct nfsd4_compound_state;
 struct nfsd_net;
 
-extern __be32 nfs4_preprocess_stateid_op(struct net *net,
-               struct nfsd4_compound_state *cstate,
-               stateid_t *stateid, int flags, struct file **filp);
+extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
+               struct nfsd4_compound_state *cstate, stateid_t *stateid,
+               int flags, struct file **filp, bool *tmp_file);
 __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
                     stateid_t *stateid, unsigned char typemask,
                     struct nfs4_stid **s, struct nfsd_net *nn);
index 84d770b..b5e077a 100644 (file)
@@ -302,42 +302,6 @@ commit_metadata(struct svc_fh *fhp)
 static void
 nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
 {
-       /*
-        * NFSv2 does not differentiate between "set-[ac]time-to-now"
-        * which only requires access, and "set-[ac]time-to-X" which
-        * requires ownership.
-        * So if it looks like it might be "set both to the same time which
-        * is close to now", and if inode_change_ok fails, then we
-        * convert to "set to now" instead of "set to explicit time"
-        *
-        * We only call inode_change_ok as the last test as technically
-        * it is not an interface that we should be using.
-        */
-#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
-#define        MAX_TOUCH_TIME_ERROR (30*60)
-       if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
-           iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
-               /*
-                * Looks probable.
-                *
-                * Now just make sure time is in the right ballpark.
-                * Solaris, at least, doesn't seem to care what the time
-                * request is.  We require it be within 30 minutes of now.
-                */
-               time_t delta = iap->ia_atime.tv_sec - get_seconds();
-               if (delta < 0)
-                       delta = -delta;
-               if (delta < MAX_TOUCH_TIME_ERROR &&
-                   inode_change_ok(inode, iap) != 0) {
-                       /*
-                        * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
-                        * This will cause notify_change to set these times
-                        * to "now"
-                        */
-                       iap->ia_valid &= ~BOTH_TIME_SET;
-               }
-       }
-
        /* sanitize the mode change */
        if (iap->ia_valid & ATTR_MODE) {
                iap->ia_mode &= S_IALLUGO;
@@ -538,16 +502,11 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
                           struct file *file, loff_t offset, loff_t len,
                           int flags)
 {
-       __be32 err;
        int error;
 
        if (!S_ISREG(file_inode(file)->i_mode))
                return nfserr_inval;
 
-       err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, NFSD_MAY_WRITE);
-       if (err)
-               return err;
-
        error = vfs_fallocate(file, flags, offset, len);
        if (!error)
                error = commit_metadata(fhp);
@@ -744,7 +703,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 
        host_err = ima_file_check(file, may_flags, 0);
        if (host_err) {
-               nfsd_close(file);
+               fput(file);
                goto out_nfserr;
        }
 
@@ -761,23 +720,12 @@ out:
        return err;
 }
 
-/*
- * Close a file.
- */
-void
-nfsd_close(struct file *filp)
-{
-       fput(filp);
-}
-
-/*
- * Obtain the readahead parameters for the file
- * specified by (dev, ino).
- */
-
-static inline struct raparms *
-nfsd_get_raparms(dev_t dev, ino_t ino)
+struct raparms *
+nfsd_init_raparms(struct file *file)
 {
+       struct inode *inode = file_inode(file);
+       dev_t dev = inode->i_sb->s_dev;
+       ino_t ino = inode->i_ino;
        struct raparms  *ra, **rap, **frap = NULL;
        int depth = 0;
        unsigned int hash;
@@ -814,9 +762,23 @@ found:
        ra->p_count++;
        nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
        spin_unlock(&rab->pb_lock);
+
+       if (ra->p_set)
+               file->f_ra = ra->p_ra;
        return ra;
 }
 
+void nfsd_put_raparams(struct file *file, struct raparms *ra)
+{
+       struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
+
+       spin_lock(&rab->pb_lock);
+       ra->p_ra = file->f_ra;
+       ra->p_set = 1;
+       ra->p_count--;
+       spin_unlock(&rab->pb_lock);
+}
+
 /*
  * Grab and keep cached pages associated with a file in the svc_rqst
  * so that they can be passed to the network sendmsg/sendpage routines
@@ -945,7 +907,7 @@ static int wait_for_concurrent_writes(struct file *file)
        return err;
 }
 
-static __be32
+__be32
 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                                loff_t offset, struct kvec *vec, int vlen,
                                unsigned long *cnt, int *stablep)
@@ -1009,40 +971,6 @@ out_nfserr:
        return err;
 }
 
-__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp,
-               struct file **file, struct raparms **ra)
-{
-       struct inode *inode;
-       __be32 err;
-
-       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file);
-       if (err)
-               return err;
-
-       inode = file_inode(*file);
-
-       /* Get readahead parameters */
-       *ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
-
-       if (*ra && (*ra)->p_set)
-               (*file)->f_ra = (*ra)->p_ra;
-       return nfs_ok;
-}
-
-void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra)
-{
-       /* Write back readahead params */
-       if (ra) {
-               struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
-               spin_lock(&rab->pb_lock);
-               ra->p_ra = file->f_ra;
-               ra->p_set = 1;
-               ra->p_count--;
-               spin_unlock(&rab->pb_lock);
-       }
-       nfsd_close(file);
-}
-
 /*
  * Read data from a file. count must contain the requested read count
  * on entry. On return, *count contains the number of bytes actually read.
@@ -1055,13 +983,15 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
        struct raparms  *ra;
        __be32 err;
 
-       err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra);
+       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
        if (err)
                return err;
 
+       ra = nfsd_init_raparms(file);
        err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
-
-       nfsd_put_tmp_read_open(file, ra);
+       if (ra)
+               nfsd_put_raparams(file, ra);
+       fput(file);
 
        return err;
 }
@@ -1093,7 +1023,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                if (cnt)
                        err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
                                             cnt, stablep);
-               nfsd_close(file);
+               fput(file);
        }
 out:
        return err;
@@ -1138,7 +1068,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                        err = nfserr_notsupp;
        }
 
-       nfsd_close(file);
+       fput(file);
 out:
        return err;
 }
@@ -1977,7 +1907,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
        if (err == nfserr_eof || err == nfserr_toosmall)
                err = nfs_ok; /* can still be found in ->err */
 out_close:
-       nfsd_close(file);
+       fput(file);
 out:
        return err;
 }
index 2050cb0..5be875e 100644 (file)
@@ -71,11 +71,7 @@ __be32               nfsd_commit(struct svc_rqst *, struct svc_fh *,
 #endif /* CONFIG_NFSD_V3 */
 __be32         nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
                                int, struct file **);
-void           nfsd_close(struct file *);
 struct raparms;
-__be32         nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *,
-                               struct file **, struct raparms **);
-void           nfsd_put_tmp_read_open(struct file *, struct raparms *);
 __be32         nfsd_splice_read(struct svc_rqst *,
                                struct file *, loff_t, unsigned long *);
 __be32         nfsd_readv(struct file *, loff_t, struct kvec *, int,
@@ -84,6 +80,10 @@ __be32               nfsd_read(struct svc_rqst *, struct svc_fh *,
                                loff_t, struct kvec *, int, unsigned long *);
 __be32                 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
                                loff_t, struct kvec *,int, unsigned long *, int *);
+__be32         nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                               struct file *file, loff_t offset,
+                               struct kvec *vec, int vlen, unsigned long *cnt,
+                               int *stablep);
 __be32         nfsd_readlink(struct svc_rqst *, struct svc_fh *,
                                char *, int *);
 __be32         nfsd_symlink(struct svc_rqst *, struct svc_fh *,
@@ -104,6 +104,9 @@ __be32              nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 __be32         nfsd_permission(struct svc_rqst *, struct svc_export *,
                                struct dentry *, int);
 
+struct raparms *nfsd_init_raparms(struct file *file);
+void           nfsd_put_raparams(struct file *file, struct raparms *ra);
+
 static inline int fh_want_write(struct svc_fh *fh)
 {
        int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
index 2f8c092..9f99100 100644 (file)
@@ -273,6 +273,7 @@ struct nfsd4_read {
        u32             rd_length;          /* request */
        int             rd_vlen;
        struct file     *rd_filp;
+       bool            rd_tmp_file;
        
        struct svc_rqst *rd_rqstp;          /* response */
        struct svc_fh * rd_fhp;             /* response */
index df8edf8..cb94ee4 100644 (file)
@@ -172,6 +172,13 @@ struct svcxprt_rdma {
 #define RDMAXPRT_SQ_PENDING    2
 #define RDMAXPRT_CONN_PENDING  3
 
+#define RPCRDMA_MAX_SVC_SEGS   (64)    /* server max scatter/gather */
+#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
+#define RPCRDMA_MAXPAYLOAD     RPCSVC_MAXPAYLOAD
+#else
+#define RPCRDMA_MAXPAYLOAD     (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
+#endif
+
 #define RPCRDMA_LISTEN_BACKLOG  10
 /* The default ORD value is based on two outstanding full-size writes with a
  * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ.  */
@@ -182,10 +189,9 @@ struct svcxprt_rdma {
 
 /* svc_rdma_marshal.c */
 extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
-extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *);
 extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
                                     struct rpcrdma_msg *,
-                                    enum rpcrdma_errcode, u32 *);
+                                    enum rpcrdma_errcode, __be32 *);
 extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
 extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
 extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
@@ -212,7 +218,6 @@ extern int svc_rdma_sendto(struct svc_rqst *);
 extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
 extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
                                enum rpcrdma_errcode);
-struct page *svc_rdma_get_page(void);
 extern int svc_rdma_post_recv(struct svcxprt_rdma *);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
index adc0aff..2119c7c 100644 (file)
 #define ACL4_SUPPORT_AUDIT_ACL 0x04
 #define ACL4_SUPPORT_ALARM_ACL 0x08
 
+#define NFS4_ACL_AUTO_INHERIT 0x00000001
+#define NFS4_ACL_PROTECTED    0x00000002
+#define NFS4_ACL_DEFAULTED    0x00000004
+
 #define NFS4_ACE_FILE_INHERIT_ACE             0x00000001
 #define NFS4_ACE_DIRECTORY_INHERIT_ACE        0x00000002
 #define NFS4_ACE_NO_PROPAGATE_INHERIT_ACE     0x00000004
@@ -93,6 +97,7 @@
 #define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG   0x00000010
 #define NFS4_ACE_FAILED_ACCESS_ACE_FLAG       0x00000020
 #define NFS4_ACE_IDENTIFIER_GROUP             0x00000040
+#define NFS4_ACE_INHERITED_ACE                0x00000080
 
 #define NFS4_ACE_READ_DATA                    0x00000001
 #define NFS4_ACE_LIST_DIRECTORY               0x00000001
 #define NFS4_ACE_DELETE_CHILD                 0x00000040
 #define NFS4_ACE_READ_ATTRIBUTES              0x00000080
 #define NFS4_ACE_WRITE_ATTRIBUTES             0x00000100
+#define NFS4_ACE_WRITE_RETENTION              0x00000200
+#define NFS4_ACE_WRITE_RETENTION_HOLD         0x00000400
 #define NFS4_ACE_DELETE                       0x00010000
 #define NFS4_ACE_READ_ACL                     0x00020000
 #define NFS4_ACE_WRITE_ACL                    0x00040000
index 9068e72..04ce2c0 100644 (file)
@@ -48,28 +48,16 @@ config SUNRPC_DEBUG
 
          If unsure, say Y.
 
-config SUNRPC_XPRT_RDMA_CLIENT
-       tristate "RPC over RDMA Client Support"
+config SUNRPC_XPRT_RDMA
+       tristate "RPC-over-RDMA transport"
        depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
        default SUNRPC && INFINIBAND
        help
-         This option allows the NFS client to support an RDMA-enabled
-         transport.
+         This option allows the NFS client and server to use RDMA
+         transports (InfiniBand, iWARP, or RoCE).
 
-         To compile RPC client RDMA transport support as a module,
-         choose M here: the module will be called xprtrdma.
+         To compile this support as a module, choose M. The module
+         will be called rpcrdma.ko.
 
-         If unsure, say N.
-
-config SUNRPC_XPRT_RDMA_SERVER
-       tristate "RPC over RDMA Server Support"
-       depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
-       default SUNRPC && INFINIBAND
-       help
-         This option allows the NFS server to support an RDMA-enabled
-         transport.
-
-         To compile RPC server RDMA transport support as a module,
-         choose M here: the module will be called svcrdma.
-
-         If unsure, say N.
+         If unsure, or you know there is no RDMA capability on your
+         hardware platform, say N.
index 15e6f6c..936ad0a 100644 (file)
@@ -5,8 +5,7 @@
 
 obj-$(CONFIG_SUNRPC) += sunrpc.o
 obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
-
-obj-y += xprtrdma/
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
 
 sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
            auth.o auth_null.o auth_unix.o auth_generic.o \
index b5408e8..fee3c15 100644 (file)
@@ -881,9 +881,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
        if (err)
                goto out_err;
 
-       sg_init_table(sg, 1);
-       sg_set_buf(sg, &zeroconstant, 4);
-
+       sg_init_one(sg, &zeroconstant, 4);
        err = crypto_hash_digest(&desc, sg, 4, Kseq);
        if (err)
                goto out_err;
@@ -951,9 +949,7 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
        if (err)
                goto out_err;
 
-       sg_init_table(sg, 1);
-       sg_set_buf(sg, zeroconstant, 4);
-
+       sg_init_one(sg, zeroconstant, 4);
        err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
        if (err)
                goto out_err;
index 78974e4..852ae60 100644 (file)
@@ -1290,7 +1290,6 @@ err_bad:
        svc_putnl(resv, ntohl(rpc_stat));
        goto sendit;
 }
-EXPORT_SYMBOL_GPL(svc_process);
 
 /*
  * Process the RPC request.
@@ -1338,6 +1337,7 @@ out_drop:
        svc_drop(rqstp);
        return 0;
 }
+EXPORT_SYMBOL_GPL(svc_process);
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
index 579f72b..48913de 100644 (file)
@@ -1,9 +1,7 @@
-obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
 
-xprtrdma-y := transport.o rpc_rdma.o verbs.o \
-       fmr_ops.o frwr_ops.o physical_ops.o
-
-obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o
-
-svcrdma-y := svc_rdma.o svc_rdma_transport.o \
-       svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o
+rpcrdma-y := transport.o rpc_rdma.o verbs.o \
+       fmr_ops.o frwr_ops.o physical_ops.o \
+       svc_rdma.o svc_rdma_transport.o \
+       svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
+       module.o
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
new file mode 100644 (file)
index 0000000..560712b
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ */
+
+/* rpcrdma.ko module initialization
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY       RPCDBG_TRANS
+#endif
+
+MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
+MODULE_DESCRIPTION("RPC/RDMA Transport");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("svcrdma");
+MODULE_ALIAS("xprtrdma");
+
+static void __exit rpc_rdma_cleanup(void)
+{
+       xprt_rdma_cleanup();
+       svc_rdma_cleanup();
+}
+
+static int __init rpc_rdma_init(void)
+{
+       int rc;
+
+       rc = svc_rdma_init();
+       if (rc)
+               goto out;
+
+       rc = xprt_rdma_init();
+       if (rc)
+               svc_rdma_cleanup();
+
+out:
+       return rc;
+}
+
+module_init(rpc_rdma_init);
+module_exit(rpc_rdma_cleanup);
index c1b6270..2cd252f 100644 (file)
@@ -38,8 +38,7 @@
  *
  * Author: Tom Tucker <tom@opengridcomputing.com>
  */
-#include <linux/module.h>
-#include <linux/init.h>
+
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/sysctl.h>
@@ -295,8 +294,3 @@ int svc_rdma_init(void)
        destroy_workqueue(svc_rdma_wq);
        return -ENOMEM;
 }
-MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
-MODULE_DESCRIPTION("SVC RDMA Transport");
-MODULE_LICENSE("Dual BSD/GPL");
-module_init(svc_rdma_init);
-module_exit(svc_rdma_cleanup);
index b681855..e2fca76 100644 (file)
 /*
  * Decodes a read chunk list. The expected format is as follows:
  *    descrim  : xdr_one
- *    position : u32 offset into XDR stream
- *    handle   : u32 RKEY
+ *    position : __be32 offset into XDR stream
+ *    handle   : __be32 RKEY
  *    . . .
  *  end-of-list: xdr_zero
  */
-static u32 *decode_read_list(u32 *va, u32 *vaend)
+static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
 {
        struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
 
@@ -67,20 +67,20 @@ static u32 *decode_read_list(u32 *va, u32 *vaend)
                }
                ch++;
        }
-       return (u32 *)&ch->rc_position;
+       return &ch->rc_position;
 }
 
 /*
  * Decodes a write chunk list. The expected format is as follows:
  *    descrim  : xdr_one
  *    nchunks  : <count>
- *       handle   : u32 RKEY              ---+
- *       length   : u32 <len of segment>     |
+ *       handle   : __be32 RKEY           ---+
+ *       length   : __be32 <len of segment>  |
  *       offset   : remove va                + <count>
  *       . . .                               |
  *                                        ---+
  */
-static u32 *decode_write_list(u32 *va, u32 *vaend)
+static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
 {
        unsigned long start, end;
        int nchunks;
@@ -90,14 +90,14 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
 
        /* Check for not write-array */
        if (ary->wc_discrim == xdr_zero)
-               return (u32 *)&ary->wc_nchunks;
+               return &ary->wc_nchunks;
 
        if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
            (unsigned long)vaend) {
                dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
                return NULL;
        }
-       nchunks = ntohl(ary->wc_nchunks);
+       nchunks = be32_to_cpu(ary->wc_nchunks);
 
        start = (unsigned long)&ary->wc_array[0];
        end = (unsigned long)vaend;
@@ -112,10 +112,10 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
         * rs_length is the 2nd 4B field in wc_target and taking its
         * address skips the list terminator
         */
-       return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length;
+       return &ary->wc_array[nchunks].wc_target.rs_length;
 }
 
-static u32 *decode_reply_array(u32 *va, u32 *vaend)
+static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
 {
        unsigned long start, end;
        int nchunks;
@@ -124,14 +124,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
 
        /* Check for no reply-array */
        if (ary->wc_discrim == xdr_zero)
-               return (u32 *)&ary->wc_nchunks;
+               return &ary->wc_nchunks;
 
        if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
            (unsigned long)vaend) {
                dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
                return NULL;
        }
-       nchunks = ntohl(ary->wc_nchunks);
+       nchunks = be32_to_cpu(ary->wc_nchunks);
 
        start = (unsigned long)&ary->wc_array[0];
        end = (unsigned long)vaend;
@@ -142,15 +142,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
                        ary, nchunks, vaend);
                return NULL;
        }
-       return (u32 *)&ary->wc_array[nchunks];
+       return (__be32 *)&ary->wc_array[nchunks];
 }
 
 int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
                            struct svc_rqst *rqstp)
 {
        struct rpcrdma_msg *rmsgp = NULL;
-       u32 *va;
-       u32 *vaend;
+       __be32 *va, *vaend;
        u32 hdr_len;
 
        rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
@@ -162,22 +161,17 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
                return -EINVAL;
        }
 
-       /* Decode the header */
-       rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
-       rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
-       rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
-       rmsgp->rm_type = ntohl(rmsgp->rm_type);
-
-       if (rmsgp->rm_vers != RPCRDMA_VERSION)
+       if (rmsgp->rm_vers != rpcrdma_version)
                return -ENOSYS;
 
        /* Pull in the extra for the padded case and bump our pointer */
-       if (rmsgp->rm_type == RDMA_MSGP) {
+       if (rmsgp->rm_type == rdma_msgp) {
                int hdrlen;
+
                rmsgp->rm_body.rm_padded.rm_align =
-                       ntohl(rmsgp->rm_body.rm_padded.rm_align);
+                       be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
                rmsgp->rm_body.rm_padded.rm_thresh =
-                       ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
+                       be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
 
                va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
                rqstp->rq_arg.head[0].iov_base = va;
@@ -192,7 +186,7 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
         * chunk list and a reply chunk list.
         */
        va = &rmsgp->rm_body.rm_chunks[0];
-       vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
+       vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
        va = decode_read_list(va, vaend);
        if (!va)
                return -EINVAL;
@@ -211,76 +205,20 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
        return hdr_len;
 }
 
-int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
-{
-       struct rpcrdma_msg *rmsgp = NULL;
-       struct rpcrdma_read_chunk *ch;
-       struct rpcrdma_write_array *ary;
-       u32 *va;
-       u32 hdrlen;
-
-       dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
-               rqstp);
-       rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
-
-       /* Pull in the extra for the padded case and bump our pointer */
-       if (rmsgp->rm_type == RDMA_MSGP) {
-               va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
-               rqstp->rq_arg.head[0].iov_base = va;
-               hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
-               rqstp->rq_arg.head[0].iov_len -= hdrlen;
-               return hdrlen;
-       }
-
-       /*
-        * Skip all chunks to find RPC msg. These were previously processed
-        */
-       va = &rmsgp->rm_body.rm_chunks[0];
-
-       /* Skip read-list */
-       for (ch = (struct rpcrdma_read_chunk *)va;
-            ch->rc_discrim != xdr_zero; ch++);
-       va = (u32 *)&ch->rc_position;
-
-       /* Skip write-list */
-       ary = (struct rpcrdma_write_array *)va;
-       if (ary->wc_discrim == xdr_zero)
-               va = (u32 *)&ary->wc_nchunks;
-       else
-               /*
-                * rs_length is the 2nd 4B field in wc_target and taking its
-                * address skips the list terminator
-                */
-               va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
-
-       /* Skip reply-array */
-       ary = (struct rpcrdma_write_array *)va;
-       if (ary->wc_discrim == xdr_zero)
-               va = (u32 *)&ary->wc_nchunks;
-       else
-               va = (u32 *)&ary->wc_array[ary->wc_nchunks];
-
-       rqstp->rq_arg.head[0].iov_base = va;
-       hdrlen = (unsigned long)va - (unsigned long)rmsgp;
-       rqstp->rq_arg.head[0].iov_len -= hdrlen;
-
-       return hdrlen;
-}
-
 int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
                              struct rpcrdma_msg *rmsgp,
-                             enum rpcrdma_errcode err, u32 *va)
+                             enum rpcrdma_errcode err, __be32 *va)
 {
-       u32 *startp = va;
+       __be32 *startp = va;
 
-       *va++ = htonl(rmsgp->rm_xid);
-       *va++ = htonl(rmsgp->rm_vers);
-       *va++ = htonl(xprt->sc_max_requests);
-       *va++ = htonl(RDMA_ERROR);
-       *va++ = htonl(err);
+       *va++ = rmsgp->rm_xid;
+       *va++ = rmsgp->rm_vers;
+       *va++ = cpu_to_be32(xprt->sc_max_requests);
+       *va++ = rdma_error;
+       *va++ = cpu_to_be32(err);
        if (err == ERR_VERS) {
-               *va++ = htonl(RPCRDMA_VERSION);
-               *va++ = htonl(RPCRDMA_VERSION);
+               *va++ = rpcrdma_version;
+               *va++ = rpcrdma_version;
        }
 
        return (int)((unsigned long)va - (unsigned long)startp);
@@ -297,7 +235,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
                &rmsgp->rm_body.rm_chunks[1];
        if (wr_ary->wc_discrim)
                wr_ary = (struct rpcrdma_write_array *)
-                       &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
+                       &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
                        wc_target.rs_length;
        else
                wr_ary = (struct rpcrdma_write_array *)
@@ -306,7 +244,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
        /* skip reply array */
        if (wr_ary->wc_discrim)
                wr_ary = (struct rpcrdma_write_array *)
-                       &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
+                       &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
        else
                wr_ary = (struct rpcrdma_write_array *)
                        &wr_ary->wc_nchunks;
@@ -325,7 +263,7 @@ void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
        ary = (struct rpcrdma_write_array *)
                &rmsgp->rm_body.rm_chunks[1];
        ary->wc_discrim = xdr_one;
-       ary->wc_nchunks = htonl(chunks);
+       ary->wc_nchunks = cpu_to_be32(chunks);
 
        /* write-list terminator */
        ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
@@ -338,7 +276,7 @@ void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
                                 int chunks)
 {
        ary->wc_discrim = xdr_one;
-       ary->wc_nchunks = htonl(chunks);
+       ary->wc_nchunks = cpu_to_be32(chunks);
 }
 
 void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
@@ -350,7 +288,7 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
        struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
        seg->rs_handle = rs_handle;
        seg->rs_offset = rs_offset;
-       seg->rs_length = htonl(write_len);
+       seg->rs_length = cpu_to_be32(write_len);
 }
 
 void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
@@ -358,10 +296,10 @@ void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
                                  struct rpcrdma_msg *rdma_resp,
                                  enum rpcrdma_proc rdma_type)
 {
-       rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
-       rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
-       rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
-       rdma_resp->rm_type = htonl(rdma_type);
+       rdma_resp->rm_xid = rdma_argp->rm_xid;
+       rdma_resp->rm_vers = rdma_argp->rm_vers;
+       rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
+       rdma_resp->rm_type = cpu_to_be32(rdma_type);
 
        /* Encode <nul> chunks lists */
        rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
index 86b4416..2e1348b 100644 (file)
@@ -85,7 +85,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 
        /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
        rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
-       if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG)
+       if (rmsgp->rm_type == rdma_nomsg)
                rqstp->rq_arg.pages = &rqstp->rq_pages[0];
        else
                rqstp->rq_arg.pages = &rqstp->rq_pages[1];
index 7de33d1..d25cd43 100644 (file)
@@ -240,6 +240,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
        u32 xdr_off;
        int chunk_off;
        int chunk_no;
+       int nchunks;
        struct rpcrdma_write_array *arg_ary;
        struct rpcrdma_write_array *res_ary;
        int ret;
@@ -251,14 +252,15 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
                &rdma_resp->rm_body.rm_chunks[1];
 
        /* Write chunks start at the pagelist */
+       nchunks = be32_to_cpu(arg_ary->wc_nchunks);
        for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
-            xfer_len && chunk_no < arg_ary->wc_nchunks;
+            xfer_len && chunk_no < nchunks;
             chunk_no++) {
                struct rpcrdma_segment *arg_ch;
                u64 rs_offset;
 
                arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
-               write_len = min(xfer_len, ntohl(arg_ch->rs_length));
+               write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
 
                /* Prepare the response chunk given the length actually
                 * written */
@@ -270,7 +272,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
                chunk_off = 0;
                while (write_len) {
                        ret = send_write(xprt, rqstp,
-                                        ntohl(arg_ch->rs_handle),
+                                        be32_to_cpu(arg_ch->rs_handle),
                                         rs_offset + chunk_off,
                                         xdr_off,
                                         write_len,
@@ -318,13 +320,13 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
                &rdma_resp->rm_body.rm_chunks[2];
 
        /* xdr offset starts at RPC message */
-       nchunks = ntohl(arg_ary->wc_nchunks);
+       nchunks = be32_to_cpu(arg_ary->wc_nchunks);
        for (xdr_off = 0, chunk_no = 0;
             xfer_len && chunk_no < nchunks;
             chunk_no++) {
                u64 rs_offset;
                ch = &arg_ary->wc_array[chunk_no].wc_target;
-               write_len = min(xfer_len, htonl(ch->rs_length));
+               write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
 
                /* Prepare the reply chunk given the length actually
                 * written */
@@ -335,7 +337,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
                chunk_off = 0;
                while (write_len) {
                        ret = send_write(xprt, rqstp,
-                                        ntohl(ch->rs_handle),
+                                        be32_to_cpu(ch->rs_handle),
                                         rs_offset + chunk_off,
                                         xdr_off,
                                         write_len,
@@ -515,7 +517,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
        inline_bytes = rqstp->rq_res.len;
 
        /* Create the RDMA response header */
-       res_page = svc_rdma_get_page();
+       res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
        rdma_resp = page_address(res_page);
        reply_ary = svc_rdma_get_reply_array(rdma_argp);
        if (reply_ary)
index f4cfa76..6b36279 100644 (file)
@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
        .xcl_name = "rdma",
        .xcl_owner = THIS_MODULE,
        .xcl_ops = &svc_rdma_ops,
-       .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
+       .xcl_max_payload = RPCRDMA_MAXPAYLOAD,
        .xcl_ident = XPRT_TRANSPORT_RDMA,
 };
 
@@ -99,12 +99,8 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 {
        struct svc_rdma_op_ctxt *ctxt;
 
-       while (1) {
-               ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
-               if (ctxt)
-                       break;
-               schedule_timeout_uninterruptible(msecs_to_jiffies(500));
-       }
+       ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
+                               GFP_KERNEL | __GFP_NOFAIL);
        ctxt->xprt = xprt;
        INIT_LIST_HEAD(&ctxt->dto_q);
        ctxt->count = 0;
@@ -156,12 +152,8 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 struct svc_rdma_req_map *svc_rdma_get_req_map(void)
 {
        struct svc_rdma_req_map *map;
-       while (1) {
-               map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL);
-               if (map)
-                       break;
-               schedule_timeout_uninterruptible(msecs_to_jiffies(500));
-       }
+       map = kmem_cache_alloc(svc_rdma_map_cachep,
+                              GFP_KERNEL | __GFP_NOFAIL);
        map->count = 0;
        return map;
 }
@@ -493,18 +485,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
        return cma_xprt;
 }
 
-struct page *svc_rdma_get_page(void)
-{
-       struct page *page;
-
-       while ((page = alloc_page(GFP_KERNEL)) == NULL) {
-               /* If we can't get memory, wait a bit and try again */
-               printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n");
-               schedule_timeout_uninterruptible(msecs_to_jiffies(1000));
-       }
-       return page;
-}
-
 int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 {
        struct ib_recv_wr recv_wr, *bad_recv_wr;
@@ -523,7 +503,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
                        pr_err("svcrdma: Too many sges (%d)\n", sge_no);
                        goto err_put_ctxt;
                }
-               page = svc_rdma_get_page();
+               page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
                ctxt->pages[sge_no] = page;
                pa = ib_dma_map_page(xprt->sc_cm_id->device,
                                     page, 0, PAGE_SIZE,
@@ -1318,11 +1298,11 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
        struct ib_send_wr err_wr;
        struct page *p;
        struct svc_rdma_op_ctxt *ctxt;
-       u32 *va;
+       __be32 *va;
        int length;
        int ret;
 
-       p = svc_rdma_get_page();
+       p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
        va = page_address(p);
 
        /* XDR encode error */
index 54f23b1..436da2c 100644 (file)
@@ -48,7 +48,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/sunrpc/addr.h>
 # define RPCDBG_FACILITY       RPCDBG_TRANS
 #endif
 
-MODULE_LICENSE("Dual BSD/GPL");
-
-MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
-MODULE_AUTHOR("Network Appliance, Inc.");
-
 /*
  * tunables
  */
@@ -711,7 +705,7 @@ static struct xprt_class xprt_rdma = {
        .setup                  = xprt_setup_rdma,
 };
 
-static void __exit xprt_rdma_cleanup(void)
+void xprt_rdma_cleanup(void)
 {
        int rc;
 
@@ -728,7 +722,7 @@ static void __exit xprt_rdma_cleanup(void)
                        __func__, rc);
 }
 
-static int __init xprt_rdma_init(void)
+int xprt_rdma_init(void)
 {
        int rc;
 
@@ -753,6 +747,3 @@ static int __init xprt_rdma_init(void)
 #endif
        return 0;
 }
-
-module_init(xprt_rdma_init);
-module_exit(xprt_rdma_cleanup);
index 78e0b8b..58163b8 100644 (file)
@@ -480,6 +480,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
  */
 int rpcrdma_marshal_req(struct rpc_rqst *);
 
+/* RPC/RDMA module init - xprtrdma/transport.c
+ */
+int xprt_rdma_init(void);
+void xprt_rdma_cleanup(void);
+
 /* Temporary NFS request map cache. Created in svc_rdma.c  */
 extern struct kmem_cache *svc_rdma_map_cachep;
 /* WR context cache. Created in svc_rdma.c  */
@@ -487,10 +492,4 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep;
 /* Workqueue created in svc_rdma.c */
 extern struct workqueue_struct *svc_rdma_wq;
 
-#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
-#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
-#else
-#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
-#endif
-
 #endif                         /* _LINUX_SUNRPC_XPRT_RDMA_H */