rate of change for this counter is zero; significantly non-zero
values may indicate a performance limitation.
- This can happen either because there are too few nfsd threads in the
- thread pool for the NFS workload (the workload is thread-limited),
- or because the NFS workload needs more CPU time than is available in
- the thread pool (the workload is CPU-limited). In the former case,
- configuring more nfsd threads will probably improve the performance
- of the NFS workload. In the latter case, the sunrpc server layer is
- already choosing not to wake idle nfsd threads because there are too
- many nfsd threads which want to run but cannot, so configuring more
- nfsd threads will make no difference whatsoever. The overloads-avoided
- statistic (see below) can be used to distinguish these cases.
+ This can happen because there are too few nfsd threads in the thread
+ pool for the NFS workload (the workload is thread-limited), in which
+ case configuring more nfsd threads will probably improve the
+ performance of the NFS workload.
threads-woken
Counts how many times an idle nfsd thread is woken to try to
thing. The ideal rate of change for this counter will be close
to but less than the rate of change of the packets-arrived counter.
-overloads-avoided
- Counts how many times the sunrpc server layer chose not to wake an
- nfsd thread, despite the presence of idle nfsd threads, because
- too many nfsd threads had been recently woken but could not get
- enough CPU time to actually run.
-
- This statistic counts a circumstance where the sunrpc layer
- heuristically avoids overloading the CPU scheduler with too many
- runnable nfsd threads. The ideal rate of change for this counter
- is zero. Significant non-zero values indicate that the workload
- is CPU limited. Usually this is associated with heavy CPU usage
- on all the CPUs in the nfsd thread pool.
-
- If a sustained large overloads-avoided rate is detected on a pool,
- the top(1) utility should be used to check for the following
- pattern of CPU usage on all the CPUs associated with the given
- nfsd thread pool.
-
- - %us ~= 0 (as you're *NOT* running applications on your NFS server)
-
- - %wa ~= 0
-
- - %id ~= 0
-
- - %sy + %hi + %si ~= 100
-
- If this pattern is seen, configuring more nfsd threads will *not*
- improve the performance of the workload. If this patten is not
- seen, then something more subtle is wrong.
-
threads-timedout
Counts how many times an nfsd thread triggered an idle timeout,
i.e. was not woken to handle any incoming network packets for
static __be32
compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
- const char *name, int namlen)
+ const char *name, int namlen, u64 ino)
{
struct svc_export *exp;
struct dentry *dparent, *dchild;
goto out;
if (d_really_is_negative(dchild))
goto out;
+ if (dchild->d_inode->i_ino != ino)
+ goto out;
rv = fh_compose(fhp, exp, dchild, &cd->fh);
out:
dput(dchild);
return rv;
}
-static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
+static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen, u64 ino)
{
struct svc_fh *fh = &cd->scratch;
__be32 err;
fh_init(fh, NFS3_FHSIZE);
- err = compose_entry_fh(cd, fh, name, namlen);
+ err = compose_entry_fh(cd, fh, name, namlen, ino);
if (err) {
*p++ = 0;
*p++ = 0;
p = encode_entry_baggage(cd, p, name, namlen, ino);
if (plus)
- p = encode_entryplus_baggage(cd, p, name, namlen);
+ p = encode_entryplus_baggage(cd, p, name, namlen, ino);
num_entry_words = p - cd->buffer;
} else if (*(page+1) != NULL) {
/* temporarily encode entry into next page, then move back to
p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
if (plus)
- p1 = encode_entryplus_baggage(cd, p1, name, namlen);
+ p1 = encode_entryplus_baggage(cd, p1, name, namlen, ino);
/* determine entry word length and lengths to go in pages */
num_entry_words = p1 - tmp;
#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL | NFS4_ACE_SYNCHRONIZE)
#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL)
-/* We don't support these bits; insist they be neither allowed nor denied */
-#define NFS4_MASK_UNSUPP (NFS4_ACE_DELETE | NFS4_ACE_WRITE_OWNER \
- | NFS4_ACE_READ_NAMED_ATTRS | NFS4_ACE_WRITE_NAMED_ATTRS)
-
/* flags used to simulate posix default ACLs */
#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
| NFS4_ACE_DIRECTORY_INHERIT_ACE)
| NFS4_ACE_INHERIT_ONLY_ACE \
| NFS4_ACE_IDENTIFIER_GROUP)
-#define MASK_EQUAL(mask1, mask2) \
- ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
-
static u32
mask_from_posix(unsigned short perm, unsigned int flags)
{
*mode |= ACL_EXECUTE;
}
-struct ace_container {
- struct nfs4_ace *ace;
- struct list_head ace_l;
-};
-
static short ace2type(struct nfs4_ace *);
static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *,
unsigned int);
static void
sort_pacl_range(struct posix_acl *pacl, int start, int end) {
int sorted = 0, i;
- struct posix_acl_entry tmp;
/* We just do a bubble sort; easy to do in place, and we're not
* expecting acl's to be long enough to justify anything more. */
if (pace_gt(&pacl->a_entries[i],
&pacl->a_entries[i+1])) {
sorted = 0;
- tmp = pacl->a_entries[i];
- pacl->a_entries[i] = pacl->a_entries[i+1];
- pacl->a_entries[i+1] = tmp;
+ swap(pacl->a_entries[i],
+ pacl->a_entries[i + 1]);
}
}
}
if (unlikely(status || cb->cb_status))
return status;
+ cb->cb_update_seq_nr = true;
return decode_cb_sequence4resok(xdr, cb);
}
u32 minorversion = clp->cl_minorversion;
cb->cb_minorversion = minorversion;
+ cb->cb_update_seq_nr = false;
+ cb->cb_status = 0;
if (minorversion) {
if (!nfsd41_cb_get_slot(clp, task))
return;
clp->cl_minorversion);
if (clp->cl_minorversion) {
- /* No need for lock, access serialized in nfsd4_cb_prepare */
- if (!task->tk_status)
+ /*
+ * No need for lock, access serialized in nfsd4_cb_prepare
+ *
+ * RFC5661 20.9.3
+ * If CB_SEQUENCE returns an error, then the state of the slot
+ * (sequence ID, cached reply) MUST NOT change.
+ */
+ if (cb->cb_update_seq_nr)
++clp->cl_cb_session->se_cb_seq_nr;
+
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
cb->cb_ops = ops;
INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_status = 0;
+ cb->cb_update_seq_nr = false;
cb->cb_need_restart = false;
}
{
__be32 status;
- /* no need to check permission - this will be done in nfsd_read() */
-
read->rd_filp = NULL;
if (read->rd_offset >= OFFSET_MAX)
return nfserr_inval;
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* check stateid */
- if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
- cstate, &read->rd_stateid,
- RD_STATE, &read->rd_filp))) {
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid,
+ RD_STATE, &read->rd_filp, &read->rd_tmp_file);
+ if (status) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
}
int err;
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
- status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
- &setattr->sa_stateid, WR_STATE, NULL);
+ status = nfs4_preprocess_stateid_op(rqstp, cstate,
+ &setattr->sa_stateid, WR_STATE, NULL, NULL);
if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
unsigned long cnt;
int nvecs;
- /* no need to check permission - this will be done in nfsd_write() */
-
if (write->wr_offset >= OFFSET_MAX)
return nfserr_inval;
- status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
- cstate, stateid, WR_STATE, &filp);
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE,
+ &filp, NULL);
if (status) {
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status;
nvecs = fill_in_write_vector(rqstp->rq_vec, write);
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
- status = nfsd_write(rqstp, &cstate->current_fh, filp,
- write->wr_offset, rqstp->rq_vec, nvecs,
- &cnt, &write->wr_how_written);
- if (filp)
- fput(filp);
+ status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
+ write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
+ &write->wr_how_written);
+ fput(filp);
write->wr_bytes_written = cnt;
__be32 status = nfserr_notsupp;
struct file *file;
- status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate,
&fallocate->falloc_stateid,
- WR_STATE, &file);
+ WR_STATE, &file, NULL);
if (status != nfs_ok) {
dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
return status;
}
- if (!file)
- return nfserr_bad_stateid;
status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
fallocate->falloc_offset,
__be32 status;
struct file *file;
- status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate,
&seek->seek_stateid,
- RD_STATE, &file);
+ RD_STATE, &file, NULL);
if (status) {
dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
return status;
}
- if (!file)
- return nfserr_bad_stateid;
switch (seek->seek_whence) {
case NFS4_CONTENT_DATA:
be32_to_cpu(status));
nfsd4_cstate_clear_replay(cstate);
- /* XXX Ugh, we need to get rid of this kind of special case: */
- if (op->opnum == OP_READ && op->u.read.rd_filp)
- fput(op->u.read.rd_filp);
-
nfsd4_increment_op_stats(op->opnum);
}
nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
{
__be32 status;
- unsigned char old_deny_bmap;
+ unsigned char old_deny_bmap = stp->st_deny_bmap;
if (!test_access(open->op_share_access, stp))
return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
spin_lock(&fp->fi_lock);
status = nfs4_file_check_deny(fp, open->op_share_deny);
if (status == nfs_ok) {
- old_deny_bmap = stp->st_deny_bmap;
set_deny(open->op_share_deny, stp);
fp->fi_share_deny |=
(open->op_share_deny & NFS4_SHARE_DENY_BOTH);
return nfs_ok;
}
+static struct file *
+nfs4_find_file(struct nfs4_stid *s, int flags)
+{
+ if (!s)
+ return NULL;
+
+ switch (s->sc_type) {
+ case NFS4_DELEG_STID:
+ if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
+ return NULL;
+ return get_file(s->sc_file->fi_deleg_file);
+ case NFS4_OPEN_STID:
+ case NFS4_LOCK_STID:
+ if (flags & RD_STATE)
+ return find_readable_file(s->sc_file);
+ else
+ return find_writeable_file(s->sc_file);
+ break;
+ }
+
+ return NULL;
+}
+
+static __be32
+nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags)
+{
+ __be32 status;
+
+ status = nfs4_check_fh(fhp, ols);
+ if (status)
+ return status;
+ status = nfsd4_check_openowner_confirmed(ols);
+ if (status)
+ return status;
+ return nfs4_check_openmode(ols, flags);
+}
+
+static __be32
+nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
+ struct file **filpp, bool *tmp_file, int flags)
+{
+ int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
+ struct file *file;
+ __be32 status;
+
+ file = nfs4_find_file(s, flags);
+ if (file) {
+ status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+ acc | NFSD_MAY_OWNER_OVERRIDE);
+ if (status) {
+ fput(file);
+ return status;
+ }
+
+ *filpp = file;
+ } else {
+ status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp);
+ if (status)
+ return status;
+
+ if (tmp_file)
+ *tmp_file = true;
+ }
+
+ return 0;
+}
+
/*
-* Checks for stateid operations
-*/
+ * Checks for stateid operations
+ */
__be32
-nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
- stateid_t *stateid, int flags, struct file **filpp)
+nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate, stateid_t *stateid,
+ int flags, struct file **filpp, bool *tmp_file)
{
- struct nfs4_stid *s;
- struct nfs4_ol_stateid *stp = NULL;
- struct nfs4_delegation *dp = NULL;
- struct svc_fh *current_fh = &cstate->current_fh;
- struct inode *ino = d_inode(current_fh->fh_dentry);
+ struct svc_fh *fhp = &cstate->current_fh;
+ struct inode *ino = d_inode(fhp->fh_dentry);
+ struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- struct file *file = NULL;
+ struct nfs4_stid *s = NULL;
__be32 status;
if (filpp)
*filpp = NULL;
+ if (tmp_file)
+ *tmp_file = false;
if (grace_disallows_io(net, ino))
return nfserr_grace;
- if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
- return check_special_stateids(net, current_fh, stateid, flags);
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
+ status = check_special_stateids(net, fhp, stateid, flags);
+ goto done;
+ }
status = nfsd4_lookup_stateid(cstate, stateid,
NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
&s, nn);
if (status)
return status;
- status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
+ status = check_stateid_generation(stateid, &s->sc_stateid,
+ nfsd4_has_session(cstate));
if (status)
goto out;
+
switch (s->sc_type) {
case NFS4_DELEG_STID:
- dp = delegstateid(s);
- status = nfs4_check_delegmode(dp, flags);
- if (status)
- goto out;
- if (filpp) {
- file = dp->dl_stid.sc_file->fi_deleg_file;
- if (!file) {
- WARN_ON_ONCE(1);
- status = nfserr_serverfault;
- goto out;
- }
- get_file(file);
- }
+ status = nfs4_check_delegmode(delegstateid(s), flags);
break;
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
- stp = openlockstateid(s);
- status = nfs4_check_fh(current_fh, stp);
- if (status)
- goto out;
- status = nfsd4_check_openowner_confirmed(stp);
- if (status)
- goto out;
- status = nfs4_check_openmode(stp, flags);
- if (status)
- goto out;
- if (filpp) {
- struct nfs4_file *fp = stp->st_stid.sc_file;
-
- if (flags & RD_STATE)
- file = find_readable_file(fp);
- else
- file = find_writeable_file(fp);
- }
+ status = nfs4_check_olstateid(fhp, openlockstateid(s), flags);
break;
default:
status = nfserr_bad_stateid;
- goto out;
+ break;
}
- status = nfs_ok;
- if (file)
- *filpp = file;
+
+done:
+ if (!status && filpp)
+ status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags);
out:
- nfs4_put_stid(s);
+ if (s)
+ nfs4_put_stid(s);
return status;
}
__be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
if (!err) {
err = nfserrno(vfs_test_lock(file, lock));
- nfsd_close(file);
+ fput(file);
}
return err;
}
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/file.h>
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/statfs.h>
u32 rdattr_err = 0;
__be32 status;
int err;
- int aclsupport = 0;
struct nfs4_acl *acl = NULL;
void *context = NULL;
int contextlen;
goto out;
fhp = tempfh;
}
- if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT
- | FATTR4_WORD0_SUPPORTED_ATTRS)) {
+ if (bmval0 & FATTR4_WORD0_ACL) {
err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl);
- aclsupport = (err == 0);
- if (bmval0 & FATTR4_WORD0_ACL) {
- if (err == -EOPNOTSUPP)
- bmval0 &= ~FATTR4_WORD0_ACL;
- else if (err == -EINVAL) {
- status = nfserr_attrnotsupp;
- goto out;
- } else if (err != 0)
- goto out_nfserr;
- }
+ if (err == -EOPNOTSUPP)
+ bmval0 &= ~FATTR4_WORD0_ACL;
+ else if (err == -EINVAL) {
+ status = nfserr_attrnotsupp;
+ goto out;
+ } else if (err != 0)
+ goto out_nfserr;
}
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
u32 word1 = nfsd_suppattrs1(minorversion);
u32 word2 = nfsd_suppattrs2(minorversion);
- if (!aclsupport)
+ if (!IS_POSIXACL(dentry->d_inode))
word0 &= ~FATTR4_WORD0_ACL;
if (!contextsupport)
word2 &= ~FATTR4_WORD2_SECURITY_LABEL;
p = xdr_reserve_space(xdr, 4);
if (!p)
goto out_resource;
- *p++ = cpu_to_be32(aclsupport ?
+ *p++ = cpu_to_be32(IS_POSIXACL(dentry->d_inode) ?
ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0);
}
if (bmval0 & FATTR4_WORD0_CANSETTIME) {
unsigned long maxcount;
struct xdr_stream *xdr = &resp->xdr;
struct file *file = read->rd_filp;
- struct svc_fh *fhp = read->rd_fhp;
int starting_len = xdr->buf->len;
- struct raparms *ra;
+ struct raparms *ra = NULL;
__be32 *p;
- __be32 err;
if (nfserr)
- return nfserr;
+ goto out;
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p) {
WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
- return nfserr_resource;
+ nfserr = nfserr_resource;
+ goto out;
}
- if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
+ if (resp->xdr.buf->page_len &&
+ test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
WARN_ON_ONCE(1);
- return nfserr_resource;
+ nfserr = nfserr_resource;
+ goto out;
}
xdr_commit_encode(xdr);
maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len));
+ maxcount = min_t(unsigned long, maxcount,
+ (xdr->buf->buflen - xdr->buf->len));
maxcount = min_t(unsigned long, maxcount, read->rd_length);
- if (read->rd_filp)
- err = nfsd_permission(resp->rqstp, fhp->fh_export,
- fhp->fh_dentry,
- NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
- else
- err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
- &file, &ra);
- if (err)
- goto err_truncate;
+ if (read->rd_tmp_file)
+ ra = nfsd_init_raparms(file);
- if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
- err = nfsd4_encode_splice_read(resp, read, file, maxcount);
+ if (file->f_op->splice_read &&
+ test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
+ nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
else
- err = nfsd4_encode_readv(resp, read, file, maxcount);
+ nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
- if (!read->rd_filp)
- nfsd_put_tmp_read_open(file, ra);
+ if (ra)
+ nfsd_put_raparams(file, ra);
-err_truncate:
- if (err)
+ if (nfserr)
xdr_truncate_encode(xdr, starting_len);
- return err;
+
+out:
+ if (file)
+ fput(file);
+ return nfserr;
}
static __be32
nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
struct nfsd_attrstat *resp)
{
+ struct iattr *iap = &argp->attrs;
+ struct svc_fh *fhp;
__be32 nfserr;
+
dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n",
SVCFH_fmt(&argp->fh),
argp->attrs.ia_valid, (long) argp->attrs.ia_size);
- fh_copy(&resp->fh, &argp->fh);
- nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0);
+ fhp = fh_copy(&resp->fh, &argp->fh);
+
+ /*
+ * NFSv2 does not differentiate between "set-[ac]time-to-now"
+ * which only requires access, and "set-[ac]time-to-X" which
+ * requires ownership.
+ * So if it looks like it might be "set both to the same time which
+ * is close to now", and if inode_change_ok fails, then we
+ * convert to "set to now" instead of "set to explicit time"
+ *
+ * We only call inode_change_ok as the last test as technically
+ * it is not an interface that we should be using.
+ */
+#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
+#define MAX_TOUCH_TIME_ERROR (30*60)
+ if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
+ iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
+ /*
+ * Looks probable.
+ *
+ * Now just make sure time is in the right ballpark.
+ * Solaris, at least, doesn't seem to care what the time
+ * request is. We require it be within 30 minutes of now.
+ */
+ time_t delta = iap->ia_atime.tv_sec - get_seconds();
+ struct inode *inode;
+
+ nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
+ if (nfserr)
+ goto done;
+ inode = d_inode(fhp->fh_dentry);
+
+ if (delta < 0)
+ delta = -delta;
+ if (delta < MAX_TOUCH_TIME_ERROR &&
+ inode_change_ok(inode, iap) != 0) {
+ /*
+ * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
+ * This will cause notify_change to set these times
+ * to "now"
+ */
+ iap->ia_valid &= ~BOTH_TIME_SET;
+ }
+ }
+
+ nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time_t)0);
+done:
return nfsd_return_attrs(nfserr, resp);
}
struct nfsd4_callback_ops *cb_ops;
struct work_struct cb_work;
int cb_status;
+ bool cb_update_seq_nr;
bool cb_need_restart;
};
struct nfsd4_compound_state;
struct nfsd_net;
-extern __be32 nfs4_preprocess_stateid_op(struct net *net,
- struct nfsd4_compound_state *cstate,
- stateid_t *stateid, int flags, struct file **filp);
+extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate, stateid_t *stateid,
+ int flags, struct file **filp, bool *tmp_file);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
static void
nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
{
- /*
- * NFSv2 does not differentiate between "set-[ac]time-to-now"
- * which only requires access, and "set-[ac]time-to-X" which
- * requires ownership.
- * So if it looks like it might be "set both to the same time which
- * is close to now", and if inode_change_ok fails, then we
- * convert to "set to now" instead of "set to explicit time"
- *
- * We only call inode_change_ok as the last test as technically
- * it is not an interface that we should be using.
- */
-#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
-#define MAX_TOUCH_TIME_ERROR (30*60)
- if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
- iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
- /*
- * Looks probable.
- *
- * Now just make sure time is in the right ballpark.
- * Solaris, at least, doesn't seem to care what the time
- * request is. We require it be within 30 minutes of now.
- */
- time_t delta = iap->ia_atime.tv_sec - get_seconds();
- if (delta < 0)
- delta = -delta;
- if (delta < MAX_TOUCH_TIME_ERROR &&
- inode_change_ok(inode, iap) != 0) {
- /*
- * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
- * This will cause notify_change to set these times
- * to "now"
- */
- iap->ia_valid &= ~BOTH_TIME_SET;
- }
- }
-
/* sanitize the mode change */
if (iap->ia_valid & ATTR_MODE) {
iap->ia_mode &= S_IALLUGO;
struct file *file, loff_t offset, loff_t len,
int flags)
{
- __be32 err;
int error;
if (!S_ISREG(file_inode(file)->i_mode))
return nfserr_inval;
- err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, NFSD_MAY_WRITE);
- if (err)
- return err;
-
error = vfs_fallocate(file, flags, offset, len);
if (!error)
error = commit_metadata(fhp);
host_err = ima_file_check(file, may_flags, 0);
if (host_err) {
- nfsd_close(file);
+ fput(file);
goto out_nfserr;
}
return err;
}
-/*
- * Close a file.
- */
-void
-nfsd_close(struct file *filp)
-{
- fput(filp);
-}
-
-/*
- * Obtain the readahead parameters for the file
- * specified by (dev, ino).
- */
-
-static inline struct raparms *
-nfsd_get_raparms(dev_t dev, ino_t ino)
+struct raparms *
+nfsd_init_raparms(struct file *file)
{
+ struct inode *inode = file_inode(file);
+ dev_t dev = inode->i_sb->s_dev;
+ ino_t ino = inode->i_ino;
struct raparms *ra, **rap, **frap = NULL;
int depth = 0;
unsigned int hash;
ra->p_count++;
nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
spin_unlock(&rab->pb_lock);
+
+ if (ra->p_set)
+ file->f_ra = ra->p_ra;
return ra;
}
+void nfsd_put_raparams(struct file *file, struct raparms *ra)
+{
+ struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
+
+ spin_lock(&rab->pb_lock);
+ ra->p_ra = file->f_ra;
+ ra->p_set = 1;
+ ra->p_count--;
+ spin_unlock(&rab->pb_lock);
+}
+
/*
* Grab and keep cached pages associated with a file in the svc_rqst
* so that they can be passed to the network sendmsg/sendpage routines
return err;
}
-static __be32
+__be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
unsigned long *cnt, int *stablep)
return err;
}
-__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct file **file, struct raparms **ra)
-{
- struct inode *inode;
- __be32 err;
-
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file);
- if (err)
- return err;
-
- inode = file_inode(*file);
-
- /* Get readahead parameters */
- *ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
-
- if (*ra && (*ra)->p_set)
- (*file)->f_ra = (*ra)->p_ra;
- return nfs_ok;
-}
-
-void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra)
-{
- /* Write back readahead params */
- if (ra) {
- struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
- spin_lock(&rab->pb_lock);
- ra->p_ra = file->f_ra;
- ra->p_set = 1;
- ra->p_count--;
- spin_unlock(&rab->pb_lock);
- }
- nfsd_close(file);
-}
-
/*
* Read data from a file. count must contain the requested read count
* on entry. On return, *count contains the number of bytes actually read.
struct raparms *ra;
__be32 err;
- err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra);
+ err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
if (err)
return err;
+ ra = nfsd_init_raparms(file);
err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
-
- nfsd_put_tmp_read_open(file, ra);
+ if (ra)
+ nfsd_put_raparams(file, ra);
+ fput(file);
return err;
}
if (cnt)
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
cnt, stablep);
- nfsd_close(file);
+ fput(file);
}
out:
return err;
err = nfserr_notsupp;
}
- nfsd_close(file);
+ fput(file);
out:
return err;
}
if (err == nfserr_eof || err == nfserr_toosmall)
err = nfs_ok; /* can still be found in ->err */
out_close:
- nfsd_close(file);
+ fput(file);
out:
return err;
}
#endif /* CONFIG_NFSD_V3 */
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
-void nfsd_close(struct file *);
struct raparms;
-__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *,
- struct file **, struct raparms **);
-void nfsd_put_tmp_read_open(struct file *, struct raparms *);
__be32 nfsd_splice_read(struct svc_rqst *,
struct file *, loff_t, unsigned long *);
__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
loff_t, struct kvec *, int, unsigned long *);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
loff_t, struct kvec *,int, unsigned long *, int *);
+__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct file *file, loff_t offset,
+ struct kvec *vec, int vlen, unsigned long *cnt,
+ int *stablep);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int);
+struct raparms *nfsd_init_raparms(struct file *file);
+void nfsd_put_raparams(struct file *file, struct raparms *ra);
+
static inline int fh_want_write(struct svc_fh *fh)
{
int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
u32 rd_length; /* request */
int rd_vlen;
struct file *rd_filp;
+ bool rd_tmp_file;
struct svc_rqst *rd_rqstp; /* response */
struct svc_fh * rd_fhp; /* response */
#define RDMAXPRT_SQ_PENDING 2
#define RDMAXPRT_CONN_PENDING 3
+#define RPCRDMA_MAX_SVC_SEGS (64) /* server max scatter/gather */
+#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
+#define RPCRDMA_MAXPAYLOAD RPCSVC_MAXPAYLOAD
+#else
+#define RPCRDMA_MAXPAYLOAD (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
+#endif
+
#define RPCRDMA_LISTEN_BACKLOG 10
/* The default ORD value is based on two outstanding full-size writes with a
* page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */
/* svc_rdma_marshal.c */
extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
-extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *);
extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
struct rpcrdma_msg *,
- enum rpcrdma_errcode, u32 *);
+ enum rpcrdma_errcode, __be32 *);
extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
enum rpcrdma_errcode);
-struct page *svc_rdma_get_page(void);
extern int svc_rdma_post_recv(struct svcxprt_rdma *);
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
#define ACL4_SUPPORT_AUDIT_ACL 0x04
#define ACL4_SUPPORT_ALARM_ACL 0x08
+#define NFS4_ACL_AUTO_INHERIT 0x00000001
+#define NFS4_ACL_PROTECTED 0x00000002
+#define NFS4_ACL_DEFAULTED 0x00000004
+
#define NFS4_ACE_FILE_INHERIT_ACE 0x00000001
#define NFS4_ACE_DIRECTORY_INHERIT_ACE 0x00000002
#define NFS4_ACE_NO_PROPAGATE_INHERIT_ACE 0x00000004
#define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010
#define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020
#define NFS4_ACE_IDENTIFIER_GROUP 0x00000040
+#define NFS4_ACE_INHERITED_ACE 0x00000080
#define NFS4_ACE_READ_DATA 0x00000001
#define NFS4_ACE_LIST_DIRECTORY 0x00000001
#define NFS4_ACE_DELETE_CHILD 0x00000040
#define NFS4_ACE_READ_ATTRIBUTES 0x00000080
#define NFS4_ACE_WRITE_ATTRIBUTES 0x00000100
+#define NFS4_ACE_WRITE_RETENTION 0x00000200
+#define NFS4_ACE_WRITE_RETENTION_HOLD 0x00000400
#define NFS4_ACE_DELETE 0x00010000
#define NFS4_ACE_READ_ACL 0x00020000
#define NFS4_ACE_WRITE_ACL 0x00040000
If unsure, say Y.
-config SUNRPC_XPRT_RDMA_CLIENT
- tristate "RPC over RDMA Client Support"
+config SUNRPC_XPRT_RDMA
+ tristate "RPC-over-RDMA transport"
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
default SUNRPC && INFINIBAND
help
- This option allows the NFS client to support an RDMA-enabled
- transport.
+ This option allows the NFS client and server to use RDMA
+ transports (InfiniBand, iWARP, or RoCE).
- To compile RPC client RDMA transport support as a module,
- choose M here: the module will be called xprtrdma.
+ To compile this support as a module, choose M. The module
+ will be called rpcrdma.ko.
- If unsure, say N.
-
-config SUNRPC_XPRT_RDMA_SERVER
- tristate "RPC over RDMA Server Support"
- depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
- default SUNRPC && INFINIBAND
- help
- This option allows the NFS server to support an RDMA-enabled
- transport.
-
- To compile RPC server RDMA transport support as a module,
- choose M here: the module will be called svcrdma.
-
- If unsure, say N.
+ If unsure, or you know there is no RDMA capability on your
+ hardware platform, say N.
obj-$(CONFIG_SUNRPC) += sunrpc.o
obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
-
-obj-y += xprtrdma/
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o auth_generic.o \
if (err)
goto out_err;
- sg_init_table(sg, 1);
- sg_set_buf(sg, &zeroconstant, 4);
-
+ sg_init_one(sg, &zeroconstant, 4);
err = crypto_hash_digest(&desc, sg, 4, Kseq);
if (err)
goto out_err;
if (err)
goto out_err;
- sg_init_table(sg, 1);
- sg_set_buf(sg, zeroconstant, 4);
-
+ sg_init_one(sg, zeroconstant, 4);
err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
if (err)
goto out_err;
svc_putnl(resv, ntohl(rpc_stat));
goto sendit;
}
-EXPORT_SYMBOL_GPL(svc_process);
/*
* Process the RPC request.
svc_drop(rqstp);
return 0;
}
+EXPORT_SYMBOL_GPL(svc_process);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
-obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
-xprtrdma-y := transport.o rpc_rdma.o verbs.o \
- fmr_ops.o frwr_ops.o physical_ops.o
-
-obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o
-
-svcrdma-y := svc_rdma.o svc_rdma_transport.o \
- svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o
+rpcrdma-y := transport.o rpc_rdma.o verbs.o \
+ fmr_ops.o frwr_ops.o physical_ops.o \
+ svc_rdma.o svc_rdma_transport.o \
+ svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
+ module.o
--- /dev/null
+/*
+ * Copyright (c) 2015 Oracle. All rights reserved.
+ */
+
+/* rpcrdma.ko module initialization
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY RPCDBG_TRANS
+#endif
+
+MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
+MODULE_DESCRIPTION("RPC/RDMA Transport");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("svcrdma");
+MODULE_ALIAS("xprtrdma");
+
+static void __exit rpc_rdma_cleanup(void)
+{
+ xprt_rdma_cleanup();
+ svc_rdma_cleanup();
+}
+
+static int __init rpc_rdma_init(void)
+{
+ int rc;
+
+ rc = svc_rdma_init();
+ if (rc)
+ goto out;
+
+ rc = xprt_rdma_init();
+ if (rc)
+ svc_rdma_cleanup();
+
+out:
+ return rc;
+}
+
+module_init(rpc_rdma_init);
+module_exit(rpc_rdma_cleanup);
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
-#include <linux/module.h>
-#include <linux/init.h>
+
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
destroy_workqueue(svc_rdma_wq);
return -ENOMEM;
}
-MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
-MODULE_DESCRIPTION("SVC RDMA Transport");
-MODULE_LICENSE("Dual BSD/GPL");
-module_init(svc_rdma_init);
-module_exit(svc_rdma_cleanup);
/*
* Decodes a read chunk list. The expected format is as follows:
* descrim : xdr_one
- * position : u32 offset into XDR stream
- * handle : u32 RKEY
+ * position : __be32 offset into XDR stream
+ * handle : __be32 RKEY
* . . .
* end-of-list: xdr_zero
*/
-static u32 *decode_read_list(u32 *va, u32 *vaend)
+static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
{
struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
}
ch++;
}
- return (u32 *)&ch->rc_position;
+ return &ch->rc_position;
}
/*
* Decodes a write chunk list. The expected format is as follows:
* descrim : xdr_one
* nchunks : <count>
- * handle : u32 RKEY ---+
- * length : u32 <len of segment> |
+ * handle : __be32 RKEY ---+
+ * length : __be32 <len of segment> |
* offset : remove va + <count>
* . . . |
* ---+
*/
-static u32 *decode_write_list(u32 *va, u32 *vaend)
+static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
{
unsigned long start, end;
int nchunks;
/* Check for not write-array */
if (ary->wc_discrim == xdr_zero)
- return (u32 *)&ary->wc_nchunks;
+ return &ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
- nchunks = ntohl(ary->wc_nchunks);
+ nchunks = be32_to_cpu(ary->wc_nchunks);
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
- return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length;
+ return &ary->wc_array[nchunks].wc_target.rs_length;
}
-static u32 *decode_reply_array(u32 *va, u32 *vaend)
+static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
{
unsigned long start, end;
int nchunks;
/* Check for no reply-array */
if (ary->wc_discrim == xdr_zero)
- return (u32 *)&ary->wc_nchunks;
+ return &ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
- nchunks = ntohl(ary->wc_nchunks);
+ nchunks = be32_to_cpu(ary->wc_nchunks);
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
ary, nchunks, vaend);
return NULL;
}
- return (u32 *)&ary->wc_array[nchunks];
+ return (__be32 *)&ary->wc_array[nchunks];
}
int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
struct svc_rqst *rqstp)
{
struct rpcrdma_msg *rmsgp = NULL;
- u32 *va;
- u32 *vaend;
+ __be32 *va, *vaend;
u32 hdr_len;
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
return -EINVAL;
}
- /* Decode the header */
- rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
- rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
- rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
- rmsgp->rm_type = ntohl(rmsgp->rm_type);
-
- if (rmsgp->rm_vers != RPCRDMA_VERSION)
+ if (rmsgp->rm_vers != rpcrdma_version)
return -ENOSYS;
/* Pull in the extra for the padded case and bump our pointer */
- if (rmsgp->rm_type == RDMA_MSGP) {
+ if (rmsgp->rm_type == rdma_msgp) {
int hdrlen;
+
rmsgp->rm_body.rm_padded.rm_align =
- ntohl(rmsgp->rm_body.rm_padded.rm_align);
+ be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
rmsgp->rm_body.rm_padded.rm_thresh =
- ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
+ be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rqstp->rq_arg.head[0].iov_base = va;
* chunk list and a reply chunk list.
*/
va = &rmsgp->rm_body.rm_chunks[0];
- vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
+ vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
va = decode_read_list(va, vaend);
if (!va)
return -EINVAL;
return hdr_len;
}
-int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
-{
- struct rpcrdma_msg *rmsgp = NULL;
- struct rpcrdma_read_chunk *ch;
- struct rpcrdma_write_array *ary;
- u32 *va;
- u32 hdrlen;
-
- dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
- rqstp);
- rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
-
- /* Pull in the extra for the padded case and bump our pointer */
- if (rmsgp->rm_type == RDMA_MSGP) {
- va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
- rqstp->rq_arg.head[0].iov_base = va;
- hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
- rqstp->rq_arg.head[0].iov_len -= hdrlen;
- return hdrlen;
- }
-
- /*
- * Skip all chunks to find RPC msg. These were previously processed
- */
- va = &rmsgp->rm_body.rm_chunks[0];
-
- /* Skip read-list */
- for (ch = (struct rpcrdma_read_chunk *)va;
- ch->rc_discrim != xdr_zero; ch++);
- va = (u32 *)&ch->rc_position;
-
- /* Skip write-list */
- ary = (struct rpcrdma_write_array *)va;
- if (ary->wc_discrim == xdr_zero)
- va = (u32 *)&ary->wc_nchunks;
- else
- /*
- * rs_length is the 2nd 4B field in wc_target and taking its
- * address skips the list terminator
- */
- va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
-
- /* Skip reply-array */
- ary = (struct rpcrdma_write_array *)va;
- if (ary->wc_discrim == xdr_zero)
- va = (u32 *)&ary->wc_nchunks;
- else
- va = (u32 *)&ary->wc_array[ary->wc_nchunks];
-
- rqstp->rq_arg.head[0].iov_base = va;
- hdrlen = (unsigned long)va - (unsigned long)rmsgp;
- rqstp->rq_arg.head[0].iov_len -= hdrlen;
-
- return hdrlen;
-}
-
int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
- enum rpcrdma_errcode err, u32 *va)
+ enum rpcrdma_errcode err, __be32 *va)
{
- u32 *startp = va;
+ __be32 *startp = va;
- *va++ = htonl(rmsgp->rm_xid);
- *va++ = htonl(rmsgp->rm_vers);
- *va++ = htonl(xprt->sc_max_requests);
- *va++ = htonl(RDMA_ERROR);
- *va++ = htonl(err);
+ *va++ = rmsgp->rm_xid;
+ *va++ = rmsgp->rm_vers;
+ *va++ = cpu_to_be32(xprt->sc_max_requests);
+ *va++ = rdma_error;
+ *va++ = cpu_to_be32(err);
if (err == ERR_VERS) {
- *va++ = htonl(RPCRDMA_VERSION);
- *va++ = htonl(RPCRDMA_VERSION);
+ *va++ = rpcrdma_version;
+ *va++ = rpcrdma_version;
}
return (int)((unsigned long)va - (unsigned long)startp);
&rmsgp->rm_body.rm_chunks[1];
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
- &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
+ &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
wc_target.rs_length;
else
wr_ary = (struct rpcrdma_write_array *)
/* skip reply array */
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
- &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
+ &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[1];
ary->wc_discrim = xdr_one;
- ary->wc_nchunks = htonl(chunks);
+ ary->wc_nchunks = cpu_to_be32(chunks);
/* write-list terminator */
ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
int chunks)
{
ary->wc_discrim = xdr_one;
- ary->wc_nchunks = htonl(chunks);
+ ary->wc_nchunks = cpu_to_be32(chunks);
}
void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
seg->rs_handle = rs_handle;
seg->rs_offset = rs_offset;
- seg->rs_length = htonl(write_len);
+ seg->rs_length = cpu_to_be32(write_len);
}
void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_resp,
enum rpcrdma_proc rdma_type)
{
- rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
- rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
- rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
- rdma_resp->rm_type = htonl(rdma_type);
+ rdma_resp->rm_xid = rdma_argp->rm_xid;
+ rdma_resp->rm_vers = rdma_argp->rm_vers;
+ rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
+ rdma_resp->rm_type = cpu_to_be32(rdma_type);
/* Encode <nul> chunks lists */
rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
/* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
- if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG)
+ if (rmsgp->rm_type == rdma_nomsg)
rqstp->rq_arg.pages = &rqstp->rq_pages[0];
else
rqstp->rq_arg.pages = &rqstp->rq_pages[1];
u32 xdr_off;
int chunk_off;
int chunk_no;
+ int nchunks;
struct rpcrdma_write_array *arg_ary;
struct rpcrdma_write_array *res_ary;
int ret;
&rdma_resp->rm_body.rm_chunks[1];
/* Write chunks start at the pagelist */
+ nchunks = be32_to_cpu(arg_ary->wc_nchunks);
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
- xfer_len && chunk_no < arg_ary->wc_nchunks;
+ xfer_len && chunk_no < nchunks;
chunk_no++) {
struct rpcrdma_segment *arg_ch;
u64 rs_offset;
arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
- write_len = min(xfer_len, ntohl(arg_ch->rs_length));
+ write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
/* Prepare the response chunk given the length actually
* written */
chunk_off = 0;
while (write_len) {
ret = send_write(xprt, rqstp,
- ntohl(arg_ch->rs_handle),
+ be32_to_cpu(arg_ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
write_len,
&rdma_resp->rm_body.rm_chunks[2];
/* xdr offset starts at RPC message */
- nchunks = ntohl(arg_ary->wc_nchunks);
+ nchunks = be32_to_cpu(arg_ary->wc_nchunks);
for (xdr_off = 0, chunk_no = 0;
xfer_len && chunk_no < nchunks;
chunk_no++) {
u64 rs_offset;
ch = &arg_ary->wc_array[chunk_no].wc_target;
- write_len = min(xfer_len, htonl(ch->rs_length));
+ write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
/* Prepare the reply chunk given the length actually
* written */
chunk_off = 0;
while (write_len) {
ret = send_write(xprt, rqstp,
- ntohl(ch->rs_handle),
+ be32_to_cpu(ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
write_len,
inline_bytes = rqstp->rq_res.len;
/* Create the RDMA response header */
- res_page = svc_rdma_get_page();
+ res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
rdma_resp = page_address(res_page);
reply_ary = svc_rdma_get_reply_array(rdma_argp);
if (reply_ary)
.xcl_name = "rdma",
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_rdma_ops,
- .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
+ .xcl_max_payload = RPCRDMA_MAXPAYLOAD,
.xcl_ident = XPRT_TRANSPORT_RDMA,
};
{
struct svc_rdma_op_ctxt *ctxt;
- while (1) {
- ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
- if (ctxt)
- break;
- schedule_timeout_uninterruptible(msecs_to_jiffies(500));
- }
+ ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
+ GFP_KERNEL | __GFP_NOFAIL);
ctxt->xprt = xprt;
INIT_LIST_HEAD(&ctxt->dto_q);
ctxt->count = 0;
struct svc_rdma_req_map *svc_rdma_get_req_map(void)
{
struct svc_rdma_req_map *map;
- while (1) {
- map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL);
- if (map)
- break;
- schedule_timeout_uninterruptible(msecs_to_jiffies(500));
- }
+ map = kmem_cache_alloc(svc_rdma_map_cachep,
+ GFP_KERNEL | __GFP_NOFAIL);
map->count = 0;
return map;
}
return cma_xprt;
}
-struct page *svc_rdma_get_page(void)
-{
- struct page *page;
-
- while ((page = alloc_page(GFP_KERNEL)) == NULL) {
- /* If we can't get memory, wait a bit and try again */
- printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n");
- schedule_timeout_uninterruptible(msecs_to_jiffies(1000));
- }
- return page;
-}
-
int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
{
struct ib_recv_wr recv_wr, *bad_recv_wr;
pr_err("svcrdma: Too many sges (%d)\n", sge_no);
goto err_put_ctxt;
}
- page = svc_rdma_get_page();
+ page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
ctxt->pages[sge_no] = page;
pa = ib_dma_map_page(xprt->sc_cm_id->device,
page, 0, PAGE_SIZE,
struct ib_send_wr err_wr;
struct page *p;
struct svc_rdma_op_ctxt *ctxt;
- u32 *va;
+ __be32 *va;
int length;
int ret;
- p = svc_rdma_get_page();
+ p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
va = page_address(p);
/* XDR encode error */
*/
#include <linux/module.h>
-#include <linux/init.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/addr.h>
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-MODULE_LICENSE("Dual BSD/GPL");
-
-MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
-MODULE_AUTHOR("Network Appliance, Inc.");
-
/*
* tunables
*/
.setup = xprt_setup_rdma,
};
-static void __exit xprt_rdma_cleanup(void)
+void xprt_rdma_cleanup(void)
{
int rc;
__func__, rc);
}
-static int __init xprt_rdma_init(void)
+int xprt_rdma_init(void)
{
int rc;
#endif
return 0;
}
-
-module_init(xprt_rdma_init);
-module_exit(xprt_rdma_cleanup);
*/
int rpcrdma_marshal_req(struct rpc_rqst *);
+/* RPC/RDMA module init - xprtrdma/transport.c
+ */
+int xprt_rdma_init(void);
+void xprt_rdma_cleanup(void);
+
/* Temporary NFS request map cache. Created in svc_rdma.c */
extern struct kmem_cache *svc_rdma_map_cachep;
/* WR context cache. Created in svc_rdma.c */
/* Workqueue created in svc_rdma.c */
extern struct workqueue_struct *svc_rdma_wq;
-#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
-#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
-#else
-#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
-#endif
-
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */