OSDN Git Service

net: tun: track dropped skb via kfree_skb_reason()
authorDongli Zhang <dongli.zhang@oracle.com>
Fri, 4 Mar 2022 14:55:07 +0000 (06:55 -0800)
committerDavid S. Miller <davem@davemloft.net>
Sun, 6 Mar 2022 11:04:01 +0000 (11:04 +0000)
The TUN can be used as vhost-net backend. E.g, the tun_net_xmit() is the
interface to forward the skb from TUN to vhost-net/virtio-net.

However, there are many "goto drop" in the TUN driver. Therefore, the
kfree_skb_reason() is involved at each "goto drop" to help userspace
ftrace/ebpf to track the reason for the loss of packets.

The below reasons are introduced:

- SKB_DROP_REASON_DEV_READY
- SKB_DROP_REASON_NOMEM
- SKB_DROP_REASON_HDR_TRUNC
- SKB_DROP_REASON_TAP_FILTER
- SKB_DROP_REASON_TAP_TXFILTER

Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Joe Jin <joe.jin@oracle.com>
Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/tun.c
include/linux/skbuff.h
include/trace/events/skb.h

index 6e06c84..bab92e4 100644 (file)
@@ -1058,6 +1058,7 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun,
 static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct tun_struct *tun = netdev_priv(dev);
+       enum skb_drop_reason drop_reason;
        int txq = skb->queue_mapping;
        struct netdev_queue *queue;
        struct tun_file *tfile;
@@ -1067,8 +1068,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
        tfile = rcu_dereference(tun->tfiles[txq]);
 
        /* Drop packet if interface is not attached */
-       if (!tfile)
+       if (!tfile) {
+               drop_reason = SKB_DROP_REASON_DEV_READY;
                goto drop;
+       }
 
        if (!rcu_dereference(tun->steering_prog))
                tun_automq_xmit(tun, skb);
@@ -1078,22 +1081,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
        /* Drop if the filter does not like it.
         * This is a noop if the filter is disabled.
         * Filter can be enabled only for the TAP devices. */
-       if (!check_filter(&tun->txflt, skb))
+       if (!check_filter(&tun->txflt, skb)) {
+               drop_reason = SKB_DROP_REASON_TAP_TXFILTER;
                goto drop;
+       }
 
        if (tfile->socket.sk->sk_filter &&
-           sk_filter(tfile->socket.sk, skb))
+           sk_filter(tfile->socket.sk, skb)) {
+               drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
                goto drop;
+       }
 
        len = run_ebpf_filter(tun, skb, len);
-       if (len == 0)
+       if (len == 0) {
+               drop_reason = SKB_DROP_REASON_TAP_FILTER;
                goto drop;
+       }
 
-       if (pskb_trim(skb, len))
+       if (pskb_trim(skb, len)) {
+               drop_reason = SKB_DROP_REASON_NOMEM;
                goto drop;
+       }
 
-       if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
+       if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) {
+               drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT;
                goto drop;
+       }
 
        skb_tx_timestamp(skb);
 
@@ -1104,8 +1117,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 
        nf_reset_ct(skb);
 
-       if (ptr_ring_produce(&tfile->tx_ring, skb))
+       if (ptr_ring_produce(&tfile->tx_ring, skb)) {
+               drop_reason = SKB_DROP_REASON_FULL_RING;
                goto drop;
+       }
 
        /* NETIF_F_LLTX requires to do our own update of trans_start */
        queue = netdev_get_tx_queue(dev, txq);
@@ -1122,7 +1137,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 drop:
        atomic_long_inc(&dev->tx_dropped);
        skb_tx_error(skb);
-       kfree_skb(skb);
+       kfree_skb_reason(skb, drop_reason);
        rcu_read_unlock();
        return NET_XMIT_DROP;
 }
@@ -1720,6 +1735,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
        u32 rxhash = 0;
        int skb_xdp = 1;
        bool frags = tun_napi_frags_enabled(tfile);
+       enum skb_drop_reason drop_reason;
 
        if (!(tun->flags & IFF_NO_PI)) {
                if (len < sizeof(pi))
@@ -1823,9 +1839,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 
                if (err) {
                        err = -EFAULT;
+                       drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT;
 drop:
                        atomic_long_inc(&tun->dev->rx_dropped);
-                       kfree_skb(skb);
+                       kfree_skb_reason(skb, drop_reason);
                        if (frags) {
                                tfile->napi.skb = NULL;
                                mutex_unlock(&tfile->napi_mutex);
@@ -1872,6 +1889,7 @@ drop:
        case IFF_TAP:
                if (frags && !pskb_may_pull(skb, ETH_HLEN)) {
                        err = -ENOMEM;
+                       drop_reason = SKB_DROP_REASON_HDR_TRUNC;
                        goto drop;
                }
                skb->protocol = eth_type_trans(skb, tun->dev);
@@ -1925,6 +1943,7 @@ drop:
        if (unlikely(!(tun->dev->flags & IFF_UP))) {
                err = -EIO;
                rcu_read_unlock();
+               drop_reason = SKB_DROP_REASON_DEV_READY;
                goto drop;
        }
 
index 67cfff4..34f5722 100644 (file)
@@ -424,7 +424,25 @@ enum skb_drop_reason {
        SKB_DROP_REASON_DEV_HDR,        /* device driver specific
                                         * header/metadata is invalid
                                         */
+       /* the device is not ready to xmit/recv due to any of its data
+        * structure that is not up/ready/initialized, e.g., the IFF_UP is
+        * not set, or driver specific tun->tfiles[txq] is not initialized
+        */
+       SKB_DROP_REASON_DEV_READY,
        SKB_DROP_REASON_FULL_RING,      /* ring buffer is full */
+       SKB_DROP_REASON_NOMEM,          /* error due to OOM */
+       SKB_DROP_REASON_HDR_TRUNC,      /* failed to trunc/extract the header
+                                        * from networking data, e.g., failed
+                                        * to pull the protocol header from
+                                        * frags via pskb_may_pull()
+                                        */
+       SKB_DROP_REASON_TAP_FILTER,     /* dropped by (ebpf) filter directly
+                                        * attached to tun/tap, e.g., via
+                                        * TUNSETFILTEREBPF
+                                        */
+       SKB_DROP_REASON_TAP_TXFILTER,   /* dropped by tx filter implemented
+                                        * at tun/tap, e.g., check_filter()
+                                        */
        SKB_DROP_REASON_MAX,
 };
 
index 240e7e7..e1670e1 100644 (file)
        EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG)            \
        EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT)    \
        EM(SKB_DROP_REASON_DEV_HDR, DEV_HDR)                    \
+       EM(SKB_DROP_REASON_DEV_READY, DEV_READY)                \
        EM(SKB_DROP_REASON_FULL_RING, FULL_RING)                \
+       EM(SKB_DROP_REASON_NOMEM, NOMEM)                        \
+       EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC)                \
+       EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER)              \
+       EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER)          \
        EMe(SKB_DROP_REASON_MAX, MAX)
 
 #undef EM