#include <linux/nsproxy.h>
#include <linux/virtio_net.h>
#include <linux/rcupdate.h>
+ #include <net/ipv6.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
struct net_device *dev;
netdev_features_t set_features;
#define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
- NETIF_F_TSO6|NETIF_F_UFO)
+ NETIF_F_TSO6)
int vnet_hdr_sz;
int sndbuf;
break;
}
+ skb_reset_network_header(skb);
+
if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
pr_debug("GSO!\n");
switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
break;
case VIRTIO_NET_HDR_GSO_UDP:
+ {
+ static bool warned;
+
+ if (!warned) {
+ warned = true;
+ netdev_warn(tun->dev,
+ "%s: using disabled UFO feature; please fix this program\n",
+ current->comm);
+ }
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+ if (skb->protocol == htons(ETH_P_IPV6))
+ ipv6_proxy_select_ident(skb);
break;
+ }
default:
tun->dev->stats.rx_frame_errors++;
kfree_skb(skb);
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
}
- skb_reset_network_header(skb);
skb_probe_transport_header(skb, 0);
rxhash = skb_get_hash(skb);
struct tun_pi pi = { 0, skb->protocol };
ssize_t total = 0;
int vlan_offset = 0, copied;
+ int vlan_hlen = 0;
+ int vnet_hdr_sz = 0;
+
+ if (vlan_tx_tag_present(skb))
+ vlan_hlen = VLAN_HLEN;
+
+ if (tun->flags & TUN_VNET_HDR)
+ vnet_hdr_sz = tun->vnet_hdr_sz;
if (!(tun->flags & TUN_NO_PI)) {
if ((len -= sizeof(pi)) < 0)
return -EINVAL;
- if (len < skb->len) {
+ if (len < skb->len + vlan_hlen + vnet_hdr_sz) {
/* Packet will be striped */
pi.flags |= TUN_PKT_STRIP;
}
total += sizeof(pi);
}
- if (tun->flags & TUN_VNET_HDR) {
+ if (vnet_hdr_sz) {
struct virtio_net_hdr gso = { 0 }; /* no info leak */
- if ((len -= tun->vnet_hdr_sz) < 0)
+ if ((len -= vnet_hdr_sz) < 0)
return -EINVAL;
if (skb_is_gso(skb)) {
gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
else if (sinfo->gso_type & SKB_GSO_TCPV6)
gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
- else if (sinfo->gso_type & SKB_GSO_UDP)
- gso.gso_type = VIRTIO_NET_HDR_GSO_UDP;
else {
pr_err("unexpected GSO type: "
"0x%x, gso_size %d, hdr_len %d\n",
if (skb->ip_summed == CHECKSUM_PARTIAL) {
gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- gso.csum_start = skb_checksum_start_offset(skb);
+ gso.csum_start = skb_checksum_start_offset(skb) +
+ vlan_hlen;
gso.csum_offset = skb->csum_offset;
} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
gso.flags = VIRTIO_NET_HDR_F_DATA_VALID;
if (unlikely(memcpy_toiovecend(iv, (void *)&gso, total,
sizeof(gso))))
return -EFAULT;
- total += tun->vnet_hdr_sz;
+ total += vnet_hdr_sz;
}
copied = total;
- total += skb->len;
- if (!vlan_tx_tag_present(skb)) {
- len = min_t(int, skb->len, len);
- } else {
+ len = min_t(int, skb->len + vlan_hlen, len);
+ total += skb->len + vlan_hlen;
+ if (vlan_hlen) {
int copy, ret;
struct {
__be16 h_vlan_proto;
veth.h_vlan_TCI = htons(vlan_tx_tag_get(skb));
vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto);
- len = min_t(int, skb->len + VLAN_HLEN, len);
- total += VLAN_HLEN;
copy = min_t(int, vlan_offset, len);
ret = skb_copy_datagram_const_iovec(skb, 0, iv, copied, copy);
features |= NETIF_F_TSO6;
arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
}
-
- if (arg & TUN_F_UFO) {
- features |= NETIF_F_UFO;
- arg &= ~TUN_F_UFO;
- }
}
/* This gives the user a way to test for new features in future by
}
#ifdef CONFIG_PROC_FS
-static int tun_chr_show_fdinfo(struct seq_file *m, struct file *f)
+static void tun_chr_show_fdinfo(struct seq_file *m, struct file *f)
{
struct tun_struct *tun;
struct ifreq ifr;
if (tun)
tun_put(tun);
- return seq_printf(m, "iff:\t%s\n", ifr.ifr_name);
+ seq_printf(m, "iff:\t%s\n", ifr.ifr_name);
}
#endif
/*
* Usage:
* dcache->d_inode->i_lock protects:
- * - i_dentry, d_alias, d_inode of aliases
+ * - i_dentry, d_u.d_alias, d_inode of aliases
* dcache_hash_bucket lock protects:
* - the dcache hash table
* s_anon bl list spinlock protects:
* - d_unhashed()
* - d_parent and d_subdirs
* - childrens' d_child and d_parent
- * - d_alias, d_inode
+ * - d_u.d_alias, d_inode
*
* Ordering:
* dentry->d_inode->i_lock
{
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
- WARN_ON(!hlist_unhashed(&dentry->d_alias));
kmem_cache_free(dentry_cache, dentry);
}
static void __d_free_external(struct rcu_head *head)
{
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
- WARN_ON(!hlist_unhashed(&dentry->d_alias));
kfree(external_name(dentry));
kmem_cache_free(dentry_cache, dentry);
}
static void dentry_free(struct dentry *dentry)
{
+ WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
if (unlikely(dname_external(dentry))) {
struct external_name *p = external_name(dentry);
if (likely(atomic_dec_and_test(&p->u.count))) {
struct inode *inode = dentry->d_inode;
if (inode) {
dentry->d_inode = NULL;
- hlist_del_init(&dentry->d_alias);
+ hlist_del_init(&dentry->d_u.d_alias);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
if (!inode->i_nlink)
struct inode *inode = dentry->d_inode;
__d_clear_type(dentry);
dentry->d_inode = NULL;
- hlist_del_init(&dentry->d_alias);
+ hlist_del_init(&dentry->d_u.d_alias);
dentry_rcuwalk_barrier(dentry);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
}
/* if it was on the hash then remove it */
__d_drop(dentry);
- list_del(&dentry->d_u.d_child);
+ __list_del_entry(&dentry->d_child);
/*
* Inform d_walk() that we are no longer attached to the
* dentry tree
again:
discon_alias = NULL;
- hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
spin_lock(&alias->d_lock);
if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
if (IS_ROOT(alias) &&
struct dentry *dentry;
restart:
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
spin_lock(&dentry->d_lock);
if (!dentry->d_lockref.count) {
struct dentry *parent = lock_parent(dentry);
if (likely(!dentry->d_lockref.count)) {
__dentry_kill(dentry);
+ dput(parent);
goto restart;
}
if (parent)
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
next = tmp->next;
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
/*
* All done at this level ... ascend and resume the search.
*/
+ rcu_read_lock();
+ascend:
if (this_parent != parent) {
struct dentry *child = this_parent;
this_parent = child->d_parent;
- rcu_read_lock();
spin_unlock(&child->d_lock);
spin_lock(&this_parent->d_lock);
- /*
- * might go back up the wrong parent if we have had a rename
- * or deletion
- */
- if (this_parent != child->d_parent ||
- (child->d_flags & DCACHE_DENTRY_KILLED) ||
- need_seqretry(&rename_lock, seq)) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ /* might go back up the wrong parent if we have had a rename. */
+ if (need_seqretry(&rename_lock, seq))
goto rename_retry;
+ next = child->d_child.next;
+ while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) {
+ if (next == &this_parent->d_subdirs)
+ goto ascend;
+ child = list_entry(next, struct dentry, d_child);
+ next = next->next;
}
rcu_read_unlock();
- next = child->d_u.d_child.next;
goto resume;
}
- if (need_seqretry(&rename_lock, seq)) {
- spin_unlock(&this_parent->d_lock);
+ if (need_seqretry(&rename_lock, seq))
goto rename_retry;
- }
+ rcu_read_unlock();
if (finish)
finish(data);
return;
rename_retry:
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
+ BUG_ON(seq & 1);
if (!retry)
return;
seq = 1;
INIT_HLIST_BL_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
- INIT_HLIST_NODE(&dentry->d_alias);
- INIT_LIST_HEAD(&dentry->d_u.d_child);
+ INIT_HLIST_NODE(&dentry->d_u.d_alias);
+ INIT_LIST_HEAD(&dentry->d_child);
d_set_d_op(dentry, dentry->d_sb->s_d_op);
this_cpu_inc(nr_dentry);
*/
__dget_dlock(parent);
dentry->d_parent = parent;
- list_add(&dentry->d_u.d_child, &parent->d_subdirs);
+ list_add(&dentry->d_child, &parent->d_subdirs);
spin_unlock(&parent->d_lock);
return dentry;
spin_lock(&dentry->d_lock);
__d_set_type(dentry, add_flags);
if (inode)
- hlist_add_head(&dentry->d_alias, &inode->i_dentry);
+ hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
dentry->d_inode = inode;
dentry_rcuwalk_barrier(dentry);
spin_unlock(&dentry->d_lock);
void d_instantiate(struct dentry *entry, struct inode * inode)
{
- BUG_ON(!hlist_unhashed(&entry->d_alias));
+ BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
if (inode)
spin_lock(&inode->i_lock);
__d_instantiate(entry, inode);
return NULL;
}
- hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
/*
* Don't need alias->d_lock here, because aliases with
* d_parent == entry->d_parent are not subject to name or
{
struct dentry *result;
- BUG_ON(!hlist_unhashed(&entry->d_alias));
+ BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
if (inode)
spin_lock(&inode->i_lock);
*/
int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
{
- BUG_ON(!hlist_unhashed(&entry->d_alias));
+ BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
spin_lock(&inode->i_lock);
if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
if (hlist_empty(&inode->i_dentry))
return NULL;
- alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
+ alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
__dget(alias);
return alias;
}
spin_lock(&tmp->d_lock);
tmp->d_inode = inode;
tmp->d_flags |= add_flags;
- hlist_add_head(&tmp->d_alias, &inode->i_dentry);
+ hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
hlist_bl_lock(&tmp->d_sb->s_anon);
hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
hlist_bl_unlock(&tmp->d_sb->s_anon);
* if not go ahead and create it now.
*/
found = d_hash_and_lookup(dentry->d_parent, name);
- if (unlikely(IS_ERR(found)))
- goto err_out;
if (!found) {
new = d_alloc(dentry->d_parent, name);
if (!new) {
found = ERR_PTR(-ENOMEM);
- goto err_out;
- }
-
- found = d_splice_alias(inode, new);
- if (found) {
- dput(new);
- return found;
- }
- return new;
- }
-
- /*
- * If a matching dentry exists, and it's not negative use it.
- *
- * Decrement the reference count to balance the iget() done
- * earlier on.
- */
- if (found->d_inode) {
- if (unlikely(found->d_inode != inode)) {
- /* This can't happen because bad inodes are unhashed. */
- BUG_ON(!is_bad_inode(inode));
- BUG_ON(!is_bad_inode(found->d_inode));
+ } else {
+ found = d_splice_alias(inode, new);
+ if (found) {
+ dput(new);
+ return found;
+ }
+ return new;
}
- iput(inode);
- return found;
}
-
- /*
- * Negative dentry: instantiate it unless the inode is a directory and
- * already has a dentry.
- */
- new = d_splice_alias(inode, found);
- if (new) {
- dput(found);
- found = new;
- }
- return found;
-
-err_out:
iput(inode);
return found;
}
struct dentry *child;
spin_lock(&dparent->d_lock);
- list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
+ list_for_each_entry(child, &dparent->d_subdirs, d_child) {
if (dentry == child) {
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
__dget_dlock(dentry);
*/
unsigned int i;
BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
+ kmemcheck_mark_initialized(dentry->d_iname, DNAME_INLINE_LEN);
+ kmemcheck_mark_initialized(target->d_iname, DNAME_INLINE_LEN);
for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
swap(((long *) &dentry->d_iname)[i],
((long *) &target->d_iname)[i]);
/* splicing a tree */
dentry->d_parent = target->d_parent;
target->d_parent = target;
- list_del_init(&target->d_u.d_child);
- list_move(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+ list_del_init(&target->d_child);
+ list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
} else {
/* swapping two dentries */
swap(dentry->d_parent, target->d_parent);
- list_move(&target->d_u.d_child, &target->d_parent->d_subdirs);
- list_move(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+ list_move(&target->d_child, &target->d_parent->d_subdirs);
+ list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
if (exchange)
fsnotify_d_move(target);
fsnotify_d_move(dentry);
* Note: If ever the locking in lock_rename() changes, then please
* remember to update this too...
*/
-static struct dentry *__d_unalias(struct inode *inode,
+static int __d_unalias(struct inode *inode,
struct dentry *dentry, struct dentry *alias)
{
struct mutex *m1 = NULL, *m2 = NULL;
- struct dentry *ret = ERR_PTR(-EBUSY);
+ int ret = -EBUSY;
/* If alias and dentry share a parent, then no extra locks required */
if (alias->d_parent == dentry->d_parent)
m2 = &alias->d_parent->d_inode->i_mutex;
out_unalias:
__d_move(alias, dentry, false);
- ret = alias;
+ ret = 0;
out_err:
spin_unlock(&inode->i_lock);
if (m2)
*/
struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
{
- struct dentry *new = NULL;
-
if (IS_ERR(inode))
return ERR_CAST(inode);
- if (inode && S_ISDIR(inode->i_mode)) {
- spin_lock(&inode->i_lock);
- new = __d_find_any_alias(inode);
- if (new) {
- if (!IS_ROOT(new)) {
- spin_unlock(&inode->i_lock);
- dput(new);
- iput(inode);
- return ERR_PTR(-EIO);
- }
- if (d_ancestor(new, dentry)) {
- spin_unlock(&inode->i_lock);
- dput(new);
- iput(inode);
- return ERR_PTR(-EIO);
- }
- write_seqlock(&rename_lock);
- __d_move(new, dentry, false);
- write_sequnlock(&rename_lock);
- spin_unlock(&inode->i_lock);
- security_d_instantiate(new, inode);
- iput(inode);
- } else {
- /* already taking inode->i_lock, so d_add() by hand */
- __d_instantiate(dentry, inode);
- spin_unlock(&inode->i_lock);
- security_d_instantiate(dentry, inode);
- d_rehash(dentry);
- }
- } else {
- d_instantiate(dentry, inode);
- if (d_unhashed(dentry))
- d_rehash(dentry);
- }
- return new;
-}
-EXPORT_SYMBOL(d_splice_alias);
-
-/**
- * d_materialise_unique - introduce an inode into the tree
- * @dentry: candidate dentry
- * @inode: inode to bind to the dentry, to which aliases may be attached
- *
- * Introduces an dentry into the tree, substituting an extant disconnected
- * root directory alias in its place if there is one. Caller must hold the
- * i_mutex of the parent directory.
- */
-struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
-{
- struct dentry *actual;
-
BUG_ON(!d_unhashed(dentry));
if (!inode) {
- actual = dentry;
__d_instantiate(dentry, NULL);
- d_rehash(actual);
- goto out_nolock;
+ goto out;
}
-
spin_lock(&inode->i_lock);
-
if (S_ISDIR(inode->i_mode)) {
- struct dentry *alias;
-
- /* Does an aliased dentry already exist? */
- alias = __d_find_alias(inode);
- if (alias) {
- actual = alias;
+ struct dentry *new = __d_find_any_alias(inode);
+ if (unlikely(new)) {
write_seqlock(&rename_lock);
-
- if (d_ancestor(alias, dentry)) {
- /* Check for loops */
- actual = ERR_PTR(-ELOOP);
+ if (unlikely(d_ancestor(new, dentry))) {
+ write_sequnlock(&rename_lock);
spin_unlock(&inode->i_lock);
- } else if (IS_ROOT(alias)) {
- /* Is this an anonymous mountpoint that we
- * could splice into our tree? */
- __d_move(alias, dentry, false);
+ dput(new);
+ new = ERR_PTR(-ELOOP);
+ pr_warn_ratelimited(
+ "VFS: Lookup of '%s' in %s %s"
+ " would have caused loop\n",
+ dentry->d_name.name,
+ inode->i_sb->s_type->name,
+ inode->i_sb->s_id);
+ } else if (!IS_ROOT(new)) {
+ int err = __d_unalias(inode, dentry, new);
write_sequnlock(&rename_lock);
- goto found;
+ if (err) {
+ dput(new);
+ new = ERR_PTR(err);
+ }
} else {
- /* Nope, but we must(!) avoid directory
- * aliasing. This drops inode->i_lock */
- actual = __d_unalias(inode, dentry, alias);
- }
- write_sequnlock(&rename_lock);
- if (IS_ERR(actual)) {
- if (PTR_ERR(actual) == -ELOOP)
- pr_warn_ratelimited(
- "VFS: Lookup of '%s' in %s %s"
- " would have caused loop\n",
- dentry->d_name.name,
- inode->i_sb->s_type->name,
- inode->i_sb->s_id);
- dput(alias);
+ __d_move(new, dentry, false);
+ write_sequnlock(&rename_lock);
+ spin_unlock(&inode->i_lock);
+ security_d_instantiate(new, inode);
}
- goto out_nolock;
+ iput(inode);
+ return new;
}
}
-
- /* Add a unique reference */
- actual = __d_instantiate_unique(dentry, inode);
- if (!actual)
- actual = dentry;
-
- d_rehash(actual);
-found:
+ /* already taking inode->i_lock, so d_add() by hand */
+ __d_instantiate(dentry, inode);
spin_unlock(&inode->i_lock);
-out_nolock:
- if (actual == dentry) {
- security_d_instantiate(dentry, inode);
- return NULL;
- }
-
- iput(inode);
- return actual;
+out:
+ security_d_instantiate(dentry, inode);
+ d_rehash(dentry);
+ return NULL;
}
-EXPORT_SYMBOL_GPL(d_materialise_unique);
+EXPORT_SYMBOL(d_splice_alias);
static int prepend(char **buffer, int *buflen, const char *str, int namelen)
{
{
inode_dec_link_count(inode);
BUG_ON(dentry->d_name.name != dentry->d_iname ||
- !hlist_unhashed(&dentry->d_alias) ||
+ !hlist_unhashed(&dentry->d_u.d_alias) ||
!d_unlinked(dentry));
spin_lock(&dentry->d_parent->d_lock);
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
+ mutex_lock(&nn->bl_mutex);
bl_pipe_msg.bl_wq = &nn->bl_wq;
b->simple.len += 4; /* single volume */
if (b->simple.len > PAGE_SIZE)
- return -EIO;
+ goto out_unlock;
memset(msg, 0, sizeof(*msg));
msg->len = sizeof(*bl_msg) + b->simple.len;
msg->data = kzalloc(msg->len, gfp_mask);
if (!msg->data)
- goto out;
+ goto out_free_data;
bl_msg = msg->data;
bl_msg->type = BL_DEVICE_MOUNT,
rc = rpc_queue_upcall(nn->bl_device_pipe, msg);
if (rc < 0) {
remove_wait_queue(&nn->bl_wq, &wq);
- goto out;
+ goto out_free_data;
}
set_current_state(TASK_UNINTERRUPTIBLE);
if (reply->status != BL_DEVICE_REQUEST_PROC) {
printk(KERN_WARNING "%s failed to decode device: %d\n",
__func__, reply->status);
- goto out;
+ goto out_free_data;
}
dev = MKDEV(reply->major, reply->minor);
- out:
+ out_free_data:
kfree(msg->data);
+ out_unlock:
+ mutex_unlock(&nn->bl_mutex);
return dev;
}
static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
size_t mlen)
{
- struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info,
+ struct nfs_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
nfs_net_id);
if (mlen != sizeof (struct bl_dev_msg))
struct nfs_net *nn = net_generic(net, nfs_net_id);
struct dentry *dentry;
+ mutex_init(&nn->bl_mutex);
init_waitqueue_head(&nn->bl_wq);
nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0);
if (IS_ERR(nn->bl_device_pipe))
static int
nfs_closedir(struct inode *inode, struct file *filp)
{
- put_nfs_open_dir_context(filp->f_path.dentry->d_inode, filp->private_data);
+ put_nfs_open_dir_context(file_inode(filp), filp->private_data);
return 0;
}
if (IS_ERR(inode))
goto out;
- alias = d_materialise_unique(dentry, inode);
+ alias = d_splice_alias(inode, dentry);
if (IS_ERR(alias))
goto out;
else if (alias) {
nfs_advise_use_readdirplus(dir);
no_entry:
- res = d_materialise_unique(dentry, inode);
+ res = d_splice_alias(inode, dentry);
if (res != NULL) {
if (IS_ERR(res))
goto out_unblock_sillyrename;
case -ENOENT:
d_drop(dentry);
d_add(dentry, NULL);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
break;
case -EISDIR:
case -ENOTDIR:
spin_lock(&inode->i_lock);
/* run all of the dentries associated with this inode. Since this is a
* directory, there damn well better only be one item on this list */
- hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
struct dentry *child;
/* run all of the children of the original inode and fix their
* d_flags to indicate parental interest (their parent is the
* original inode) */
spin_lock(&alias->d_lock);
- list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
+ list_for_each_entry(child, &alias->d_subdirs, d_child) {
if (!child->d_inode)
continue;
&fsnotify_mark_srcu);
}
+ /*
+ * We need to merge inode & vfsmount mark lists so that inode mark
+ * ignore masks are properly reflected for mount mark notifications.
+ * That's why this traversal is so complicated...
+ */
while (inode_node || vfsmount_node) {
- inode_group = vfsmount_group = NULL;
+ inode_group = NULL;
+ inode_mark = NULL;
+ vfsmount_group = NULL;
+ vfsmount_mark = NULL;
if (inode_node) {
inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
vfsmount_group = vfsmount_mark->group;
}
- if (inode_group > vfsmount_group) {
- /* handle inode */
- ret = send_to_group(to_tell, inode_mark, NULL, mask,
- data, data_is, cookie, file_name);
- /* we didn't use the vfsmount_mark */
- vfsmount_group = NULL;
- } else if (vfsmount_group > inode_group) {
- ret = send_to_group(to_tell, NULL, vfsmount_mark, mask,
- data, data_is, cookie, file_name);
- inode_group = NULL;
- } else {
- ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
- mask, data, data_is, cookie,
- file_name);
+ if (inode_group && vfsmount_group) {
+ int cmp = fsnotify_compare_groups(inode_group,
+ vfsmount_group);
+ if (cmp > 0) {
+ inode_group = NULL;
+ inode_mark = NULL;
+ } else if (cmp < 0) {
+ vfsmount_group = NULL;
+ vfsmount_mark = NULL;
+ }
}
+ ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
+ data, data_is, cookie, file_name);
if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
goto out;
{
struct ovl_dir_cache *cache = od->cache;
- list_del(&od->cursor.l_node);
+ list_del_init(&od->cursor.l_node);
WARN_ON(cache->refcount <= 0);
cache->refcount--;
if (!cache->refcount) {
}
}
-static int ovl_fill_merge(void *buf, const char *name, int namelen,
- loff_t offset, u64 ino, unsigned int d_type)
+static int ovl_fill_merge(struct dir_context *ctx, const char *name,
+ int namelen, loff_t offset, u64 ino,
+ unsigned int d_type)
{
- struct ovl_readdir_data *rdd = buf;
+ struct ovl_readdir_data *rdd =
+ container_of(ctx, struct ovl_readdir_data, ctx);
rdd->count++;
if (!rdd->is_merge)
return 0;
}
- static inline int ovl_dir_read_merged(struct path *upperpath,
- struct path *lowerpath,
- struct list_head *list)
+ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
{
int err;
+ struct path lowerpath;
+ struct path upperpath;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_merge,
.list = list,
.is_merge = false,
};
- if (upperpath->dentry) {
- err = ovl_dir_read(upperpath, &rdd);
+ ovl_path_lower(dentry, &lowerpath);
+ ovl_path_upper(dentry, &upperpath);
+
+ if (upperpath.dentry) {
+ err = ovl_dir_read(&upperpath, &rdd);
if (err)
goto out;
- if (lowerpath->dentry) {
- err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd);
+ if (lowerpath.dentry) {
+ err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd);
if (err)
goto out;
}
}
- if (lowerpath->dentry) {
+ if (lowerpath.dentry) {
/*
* Insert lowerpath entries before upperpath ones, this allows
* offsets to be reasonably constant
*/
list_add(&rdd.middle, rdd.list);
rdd.is_merge = true;
- err = ovl_dir_read(lowerpath, &rdd);
+ err = ovl_dir_read(&lowerpath, &rdd);
list_del(&rdd.middle);
}
out:
static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
{
int res;
- struct path lowerpath;
- struct path upperpath;
struct ovl_dir_cache *cache;
cache = ovl_dir_cache(dentry);
cache->refcount = 1;
INIT_LIST_HEAD(&cache->entries);
- ovl_path_lower(dentry, &lowerpath);
- ovl_path_upper(dentry, &upperpath);
-
- res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries);
+ res = ovl_dir_read_merged(dentry, &cache->entries);
if (res) {
ovl_cache_free(&cache->entries);
kfree(cache);
/*
* Need to check if we started out being a lower dir, but got copied up
*/
- if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) {
+ if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) {
struct inode *inode = file_inode(file);
- realfile =lockless_dereference(od->upperfile);
+ realfile = lockless_dereference(od->upperfile);
if (!realfile) {
struct path upperpath;
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
{
int err;
- struct path lowerpath;
- struct path upperpath;
struct ovl_cache_entry *p;
- ovl_path_upper(dentry, &upperpath);
- ovl_path_lower(dentry, &lowerpath);
-
- err = ovl_dir_read_merged(&upperpath, &lowerpath, list);
+ err = ovl_dir_read_merged(dentry, list);
if (err)
return err;
if (!f.file)
return -EBADF;
- css = css_tryget_online_from_dir(f.file->f_dentry,
+ css = css_tryget_online_from_dir(f.file->f_path.dentry,
&perf_event_cgrp_subsys);
if (IS_ERR(css)) {
ret = PTR_ERR(css);
if (!task) {
/*
- * Per cpu events are removed via an smp call and
- * the removal is always successful.
+ * Per cpu events are removed via an smp call. The removal can
+ * fail if the CPU is currently offline, but in that case we
+ * already called __perf_remove_from_context from
+ * perf_event_exit_cpu.
*/
cpu_function_call(event->cpu, __perf_remove_from_context, &re);
return;
return 0;
}
- static int perf_swevent_event_idx(struct perf_event *event)
- {
- return 0;
- }
-
static struct pmu perf_swevent = {
.task_ctx_nr = perf_sw_context,
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
-
- .event_idx = perf_swevent_event_idx,
};
#ifdef CONFIG_EVENT_TRACING
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
-
- .event_idx = perf_swevent_event_idx,
};
static inline void perf_tp_register(void)
.start = cpu_clock_event_start,
.stop = cpu_clock_event_stop,
.read = cpu_clock_event_read,
-
- .event_idx = perf_swevent_event_idx,
};
/*
.start = task_clock_event_start,
.stop = task_clock_event_stop,
.read = task_clock_event_read,
-
- .event_idx = perf_swevent_event_idx,
};
static void perf_pmu_nop_void(struct pmu *pmu)
static int perf_event_idx_default(struct perf_event *event)
{
- return event->hw.idx + 1;
+ return 0;
}
/*
static void __perf_event_exit_context(void *__info)
{
- struct remove_event re = { .detach_group = false };
+ struct remove_event re = { .detach_group = true };
struct perf_event_context *ctx = __info;
perf_pmu_rotate_stop(ctx->pmu);
}
#endif /* CONFIG_TRACER_MAX_TRACE */
- static int wait_on_pipe(struct trace_iterator *iter)
+ static int wait_on_pipe(struct trace_iterator *iter, bool full)
{
/* Iterators are static, they should be filled or empty */
if (trace_buffer_iter(iter, iter->cpu_file))
return 0;
- return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
+ return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
+ full);
}
#ifdef CONFIG_FTRACE_STARTUP_TEST
mutex_unlock(&iter->mutex);
- ret = wait_on_pipe(iter);
+ ret = wait_on_pipe(iter, false);
mutex_lock(&iter->mutex);
if (ret)
return ret;
-
- if (signal_pending(current))
- return -EINTR;
}
return 1;
goto out_unlock;
}
mutex_unlock(&trace_types_lock);
- ret = wait_on_pipe(iter);
+ ret = wait_on_pipe(iter, false);
mutex_lock(&trace_types_lock);
if (ret) {
size = ret;
goto out_unlock;
}
- if (signal_pending(current)) {
- size = -EINTR;
- goto out_unlock;
- }
goto again;
}
size = 0;
};
struct buffer_ref *ref;
int entries, size, i;
- ssize_t ret;
+ ssize_t ret = 0;
mutex_lock(&trace_types_lock);
int r;
ref = kzalloc(sizeof(*ref), GFP_KERNEL);
- if (!ref)
+ if (!ref) {
+ ret = -ENOMEM;
break;
+ }
ref->ref = 1;
ref->buffer = iter->trace_buffer->buffer;
ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
if (!ref->page) {
+ ret = -ENOMEM;
kfree(ref);
break;
}
/* did we read anything? */
if (!spd.nr_pages) {
+ if (ret)
+ goto out;
+
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
ret = -EAGAIN;
goto out;
}
mutex_unlock(&trace_types_lock);
- ret = wait_on_pipe(iter);
+ ret = wait_on_pipe(iter, true);
mutex_lock(&trace_types_lock);
if (ret)
goto out;
- if (signal_pending(current)) {
- ret = -EINTR;
- goto out;
- }
+
goto again;
}
int ret;
/* Paranoid: Make sure the parent is the "instances" directory */
- parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
+ parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
if (WARN_ON_ONCE(parent != trace_instance_dir))
return -ENOENT;
int ret;
/* Paranoid: Make sure the parent is the "instances" directory */
- parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
+ parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
if (WARN_ON_ONCE(parent != trace_instance_dir))
return -ENOENT;
* start move here.
*/
- /* for quick checking without looking up memcg */
- atomic_t memcg_moving __read_mostly;
-
static void mem_cgroup_start_move(struct mem_cgroup *memcg)
{
- atomic_inc(&memcg_moving);
atomic_inc(&memcg->moving_account);
synchronize_rcu();
}
* Now, mem_cgroup_clear_mc() may call this function with NULL.
* We check NULL in callee rather than caller.
*/
- if (memcg) {
- atomic_dec(&memcg_moving);
+ if (memcg)
atomic_dec(&memcg->moving_account);
- }
}
/*
return true;
}
- /*
- * Used to update mapped file or writeback or other statistics.
+ /**
+ * mem_cgroup_begin_page_stat - begin a page state statistics transaction
+ * @page: page that is going to change accounted state
+ * @locked: &memcg->move_lock slowpath was taken
+ * @flags: IRQ-state flags for &memcg->move_lock
*
- * Notes: Race condition
+ * This function must mark the beginning of an accounted page state
+ * change to prevent double accounting when the page is concurrently
+ * being moved to another memcg:
*
- * Charging occurs during page instantiation, while the page is
- * unmapped and locked in page migration, or while the page table is
- * locked in THP migration. No race is possible.
+ * memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
+ * if (TestClearPageState(page))
+ * mem_cgroup_update_page_stat(memcg, state, -1);
+ * mem_cgroup_end_page_stat(memcg, locked, flags);
*
- * Uncharge happens to pages with zero references, no race possible.
+ * The RCU lock is held throughout the transaction. The fast path can
+ * get away without acquiring the memcg->move_lock (@locked is false)
+ * because page moving starts with an RCU grace period.
*
- * Charge moving between groups is protected by checking mm->moving
- * account and taking the move_lock in the slowpath.
+ * The RCU lock also protects the memcg from being freed when the page
+ * state that is going to change is the only thing preventing the page
+ * from being uncharged. E.g. end-writeback clearing PageWriteback(),
+ * which allows migration to go ahead and uncharge the page before the
+ * account transaction might be complete.
*/
-
- void __mem_cgroup_begin_update_page_stat(struct page *page,
- bool *locked, unsigned long *flags)
+ struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
+ bool *locked,
+ unsigned long *flags)
{
struct mem_cgroup *memcg;
struct page_cgroup *pc;
+ rcu_read_lock();
+
+ if (mem_cgroup_disabled())
+ return NULL;
+
pc = lookup_page_cgroup(page);
again:
memcg = pc->mem_cgroup;
if (unlikely(!memcg || !PageCgroupUsed(pc)))
- return;
- /*
- * If this memory cgroup is not under account moving, we don't
- * need to take move_lock_mem_cgroup(). Because we already hold
- * rcu_read_lock(), any calls to move_account will be delayed until
- * rcu_read_unlock().
- */
- VM_BUG_ON(!rcu_read_lock_held());
+ return NULL;
+
+ *locked = false;
if (atomic_read(&memcg->moving_account) <= 0)
- return;
+ return memcg;
move_lock_mem_cgroup(memcg, flags);
if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
goto again;
}
*locked = true;
+
+ return memcg;
}
- void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
+ /**
+ * mem_cgroup_end_page_stat - finish a page state statistics transaction
+ * @memcg: the memcg that was accounted against
+ * @locked: value received from mem_cgroup_begin_page_stat()
+ * @flags: value received from mem_cgroup_begin_page_stat()
+ */
+ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
+ unsigned long flags)
{
- struct page_cgroup *pc = lookup_page_cgroup(page);
+ if (memcg && locked)
+ move_unlock_mem_cgroup(memcg, &flags);
- /*
- * It's guaranteed that pc->mem_cgroup never changes while
- * lock is held because a routine modifies pc->mem_cgroup
- * should take move_lock_mem_cgroup().
- */
- move_unlock_mem_cgroup(pc->mem_cgroup, flags);
+ rcu_read_unlock();
}
- void mem_cgroup_update_page_stat(struct page *page,
+ /**
+ * mem_cgroup_update_page_stat - update page state statistics
+ * @memcg: memcg to account against
+ * @idx: page state item to account
+ * @val: number of pages (positive or negative)
+ *
+ * See mem_cgroup_begin_page_stat() for locking requirements.
+ */
+ void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx, int val)
{
- struct mem_cgroup *memcg;
- struct page_cgroup *pc = lookup_page_cgroup(page);
- unsigned long uninitialized_var(flags);
-
- if (mem_cgroup_disabled())
- return;
-
VM_BUG_ON(!rcu_read_lock_held());
- memcg = pc->mem_cgroup;
- if (unlikely(!memcg || !PageCgroupUsed(pc)))
- return;
- this_cpu_add(memcg->stat->count[idx], val);
+ if (memcg)
+ this_cpu_add(memcg->stat->count[idx], val);
}
/*
*
* DO NOT ADD NEW FILES.
*/
- name = cfile.file->f_dentry->d_name.name;
+ name = cfile.file->f_path.dentry->d_name.name;
if (!strcmp(name, "memory.usage_in_bytes")) {
event->register_event = mem_cgroup_usage_register_event;
* automatically removed on cgroup destruction but the removal is
* asynchronous, so take an extra ref on @css.
*/
- cfile_css = css_tryget_online_from_dir(cfile.file->f_dentry->d_parent,
+ cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent,
&memory_cgrp_subsys);
ret = -EINVAL;
if (IS_ERR(cfile_css))
{
/* REPLY */
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
- /*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
+ /*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
/*
* sNO -> sIV Never reached.
* sSS -> sS2 Simultaneous open
* sFW -> sIV
* sCW -> sIV
* sLA -> sIV
- * sTW -> sIV Reopened connection, but server may not do it.
+ * sTW -> sSS Reopened connection, but server may have switched role
* sCL -> sIV
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
}
/* Print out the per-protocol part of the tuple. */
-static int tcp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
+static void tcp_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
{
- return seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.tcp.port),
- ntohs(tuple->dst.u.tcp.port));
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.tcp.port),
+ ntohs(tuple->dst.u.tcp.port));
}
/* Print out the private part of the conntrack. */
-static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
+static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
enum tcp_conntrack state;
state = ct->proto.tcp.state;
spin_unlock_bh(&ct->lock);
- return seq_printf(s, "%s ", tcp_conntrack_names[state]);
+ seq_printf(s, "%s ", tcp_conntrack_names[state]);
}
static unsigned int get_conntrack_index(const struct tcphdr *tcph)
* returned by nf_queue. For instance, callers rely on -ECANCELED to
* mean 'ignore this hook'.
*/
- if (IS_ERR(segs))
+ if (IS_ERR_OR_NULL(segs))
goto out_err;
queued = 0;
err = 0;
{
const struct nfqnl_instance *inst = v;
- return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
- inst->queue_num,
- inst->peer_portid, inst->queue_total,
- inst->copy_mode, inst->copy_range,
- inst->queue_dropped, inst->queue_user_dropped,
- inst->id_sequence, 1);
+ seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
+ inst->queue_num,
+ inst->peer_portid, inst->queue_total,
+ inst->copy_mode, inst->copy_range,
+ inst->queue_dropped, inst->queue_user_dropped,
+ inst->id_sequence, 1);
+ return seq_has_overflowed(s);
}
static const struct seq_operations nfqnl_seq_ops = {
{
static const char op[] = "appraise_data";
char *cause = "unknown";
- struct dentry *dentry = file->f_dentry;
+ struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
enum integrity_status status = INTEGRITY_UNKNOWN;
int rc = xattr_len, hash_start = 0;
*/
void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
{
- struct dentry *dentry = file->f_dentry;
+ struct dentry *dentry = file->f_path.dentry;
int rc = 0;
/* do not collect and update hash for digital signatures */
result = ima_protect_xattr(dentry, xattr_name, xattr_value,
xattr_value_len);
if (result == 1) {
+ if (!xattr_value_len || (xvalue->type >= IMA_XATTR_LAST))
+ return -EINVAL;
ima_reset_appraise_flags(dentry->d_inode,
(xvalue->type == EVM_IMA_XATTR_DIGSIG) ? 1 : 0);
result = 0;