OSDN Git Service

IB/hfi1: Add functions to receive accelerated ipoib packets
authorKaike Wan <kaike.wan@intel.com>
Mon, 11 May 2020 16:06:31 +0000 (12:06 -0400)
committerJason Gunthorpe <jgg@mellanox.com>
Thu, 21 May 2020 14:23:56 +0000 (11:23 -0300)
Ipoib netdev will share receive contexts with existing VNIC netdev.
To achieve that, a dummy netdev is allocated with hfi1_devdata to
own the receive contexts, and ipoib and VNIC netdevs will be put
on top of it. Each receive context is associated with a single
NAPI object.

This patch adds the functions to receive incoming packets for
accelerated ipoib.

Link: https://lore.kernel.org/r/20200511160631.173205.54184.stgit@awfm-01.aw.intel.com
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Sadanand Warrier <sadanand.warrier@intel.com>
Signed-off-by: Grzegorz Andrejczuk <grzegorz.andrejczuk@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/hw/hfi1/Makefile
drivers/infiniband/hw/hfi1/driver.c
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/ipoib.h
drivers/infiniband/hw/hfi1/ipoib_rx.c [new file with mode: 0644]
drivers/infiniband/hw/hfi1/netdev.h [new file with mode: 0644]
drivers/infiniband/hw/hfi1/netdev_rx.c [new file with mode: 0644]

index 0b25713..2e89ec1 100644 (file)
@@ -23,10 +23,12 @@ hfi1-y := \
        intr.o \
        iowait.o \
        ipoib_main.o \
+       ipoib_rx.o \
        ipoib_tx.o \
        mad.o \
        mmu_rb.o \
        msix.o \
+       netdev_rx.o \
        opfn.o \
        pcie.o \
        pio.o \
index 049d15b..c5ed6ed 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015-2018 Intel Corporation.
+ * Copyright(c) 2015-2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -54,6 +54,7 @@
 #include <linux/module.h>
 #include <linux/prefetch.h>
 #include <rdma/ib_verbs.h>
+#include <linux/etherdevice.h>
 
 #include "hfi.h"
 #include "trace.h"
@@ -63,6 +64,9 @@
 #include "vnic.h"
 #include "fault.h"
 
+#include "ipoib.h"
+#include "netdev.h"
+
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
 
@@ -1550,6 +1554,81 @@ void handle_eflags(struct hfi1_packet *packet)
                show_eflags_errs(packet);
 }
 
+static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet)
+{
+       struct hfi1_ibport *ibp;
+       struct net_device *netdev;
+       struct hfi1_ctxtdata *rcd = packet->rcd;
+       struct napi_struct *napi = rcd->napi;
+       struct sk_buff *skb;
+       struct hfi1_netdev_rxq *rxq = container_of(napi,
+                       struct hfi1_netdev_rxq, napi);
+       u32 extra_bytes;
+       u32 tlen, qpnum;
+       bool do_work, do_cnp;
+       struct hfi1_ipoib_dev_priv *priv;
+
+       trace_hfi1_rcvhdr(packet);
+
+       hfi1_setup_ib_header(packet);
+
+       packet->ohdr = &((struct ib_header *)packet->hdr)->u.oth;
+       packet->grh = NULL;
+
+       if (unlikely(rhf_err_flags(packet->rhf))) {
+               handle_eflags(packet);
+               return;
+       }
+
+       qpnum = ib_bth_get_qpn(packet->ohdr);
+       netdev = hfi1_netdev_get_data(rcd->dd, qpnum);
+       if (!netdev)
+               goto drop_no_nd;
+
+       trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
+
+       /* handle congestion notifications */
+       do_work = hfi1_may_ecn(packet);
+       if (unlikely(do_work)) {
+               do_cnp = (packet->opcode != IB_OPCODE_CNP);
+               (void)hfi1_process_ecn_slowpath(hfi1_ipoib_priv(netdev)->qp,
+                                                packet, do_cnp);
+       }
+
+       /*
+        * We have split point after last byte of DETH
+        * lets strip padding and CRC and ICRC.
+        * tlen is whole packet len so we need to
+        * subtract header size as well.
+        */
+       tlen = packet->tlen;
+       extra_bytes = ib_bth_get_pad(packet->ohdr) + (SIZE_OF_CRC << 2) +
+                       packet->hlen;
+       if (unlikely(tlen < extra_bytes))
+               goto drop;
+
+       tlen -= extra_bytes;
+
+       skb = hfi1_ipoib_prepare_skb(rxq, tlen, packet->ebuf);
+       if (unlikely(!skb))
+               goto drop;
+
+       priv = hfi1_ipoib_priv(netdev);
+       hfi1_ipoib_update_rx_netstats(priv, 1, skb->len);
+
+       skb->dev = netdev;
+       skb->pkt_type = PACKET_HOST;
+       netif_receive_skb(skb);
+
+       return;
+
+drop:
+       ++netdev->stats.rx_dropped;
+drop_no_nd:
+       ibp = rcd_to_iport(packet->rcd);
+       ++ibp->rvp.n_pkt_drops;
+}
+
 /*
  * The following functions are called by the interrupt handler. They are type
  * specific handlers for each packet type.
@@ -1757,3 +1836,14 @@ const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = {
        [RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
        [RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
 };
+
+const rhf_rcv_function_ptr netdev_rhf_rcv_functions[] = {
+       [RHF_RCV_TYPE_EXPECTED] = process_receive_invalid,
+       [RHF_RCV_TYPE_EAGER] = process_receive_invalid,
+       [RHF_RCV_TYPE_IB] = hfi1_ipoib_ib_rcv,
+       [RHF_RCV_TYPE_ERROR] = process_receive_error,
+       [RHF_RCV_TYPE_BYPASS] = hfi1_vnic_bypass_rcv,
+       [RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
+       [RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
+       [RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
+};
index 5a9276c..c7d0aad 100644 (file)
@@ -233,6 +233,8 @@ struct hfi1_ctxtdata {
        intr_handler fast_handler;
        /** slow handler */
        intr_handler slow_handler;
+       /* napi pointer assiociated with netdev */
+       struct napi_struct *napi;
        /* verbs rx_stats per rcd */
        struct hfi1_opcode_stats_perctx *opstats;
        /* clear interrupt mask */
@@ -985,7 +987,7 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp,
                              struct hfi1_pkt_state *ps,
                              struct rvt_swqe *wqe);
 extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];
-
+extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[];
 
 /* return values for the RHF receive functions */
 #define RHF_RCV_CONTINUE  0    /* keep going */
@@ -1417,6 +1419,7 @@ struct hfi1_devdata {
        struct hfi1_vnic_data vnic;
        /* Lock to protect IRQ SRC register access */
        spinlock_t irq_src_lock;
+       struct net_device *dummy_netdev;
 
        /* Keeps track of IPoIB RSM rule users */
        atomic_t ipoib_rsm_usr_num;
index c2e63ca..ca00f6c 100644 (file)
@@ -22,6 +22,7 @@
 
 #include "hfi.h"
 #include "iowait.h"
+#include "netdev.h"
 
 #include <rdma/ib_verbs.h>
 
@@ -29,6 +30,7 @@
 
 #define HFI1_IPOIB_TXREQ_NAME_LEN   32
 
+#define HFI1_IPOIB_PSEUDO_LEN 20
 #define HFI1_IPOIB_ENCAP_LEN 4
 
 struct hfi1_ipoib_dev_priv;
@@ -119,6 +121,19 @@ hfi1_ipoib_priv(const struct net_device *dev)
 }
 
 static inline void
+hfi1_ipoib_update_rx_netstats(struct hfi1_ipoib_dev_priv *priv,
+                             u64 packets,
+                             u64 bytes)
+{
+       struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats);
+
+       u64_stats_update_begin(&netstats->syncp);
+       netstats->rx_packets += packets;
+       netstats->rx_bytes += bytes;
+       u64_stats_update_end(&netstats->syncp);
+}
+
+static inline void
 hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv,
                              u64 packets,
                              u64 bytes)
@@ -142,6 +157,9 @@ void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv);
 void hfi1_ipoib_napi_tx_enable(struct net_device *dev);
 void hfi1_ipoib_napi_tx_disable(struct net_device *dev);
 
+struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
+                                      int size, void *data);
+
 int hfi1_ipoib_rn_get_params(struct ib_device *device,
                             u8 port_num,
                             enum rdma_netdev_t type,
diff --git a/drivers/infiniband/hw/hfi1/ipoib_rx.c b/drivers/infiniband/hw/hfi1/ipoib_rx.c
new file mode 100644 (file)
index 0000000..2485663
--- /dev/null
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+#include "netdev.h"
+#include "ipoib.h"
+
+#define HFI1_IPOIB_SKB_PAD ((NET_SKB_PAD) + (NET_IP_ALIGN))
+
+static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size)
+{
+       void *dst_data;
+
+       skb_checksum_none_assert(skb);
+       skb->protocol = *((__be16 *)data);
+
+       dst_data = skb_put(skb, size);
+       memcpy(dst_data, data, size);
+       skb->mac_header = HFI1_IPOIB_PSEUDO_LEN;
+       skb_pull(skb, HFI1_IPOIB_ENCAP_LEN);
+}
+
+static struct sk_buff *prepare_frag_skb(struct napi_struct *napi, int size)
+{
+       struct sk_buff *skb;
+       int skb_size = SKB_DATA_ALIGN(size + HFI1_IPOIB_SKB_PAD);
+       void *frag;
+
+       skb_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+       skb_size = SKB_DATA_ALIGN(skb_size);
+       frag = napi_alloc_frag(skb_size);
+
+       if (unlikely(!frag))
+               return napi_alloc_skb(napi, size);
+
+       skb = build_skb(frag, skb_size);
+
+       if (unlikely(!skb)) {
+               skb_free_frag(frag);
+               return NULL;
+       }
+
+       skb_reserve(skb, HFI1_IPOIB_SKB_PAD);
+       return skb;
+}
+
+struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
+                                      int size, void *data)
+{
+       struct napi_struct *napi = &rxq->napi;
+       int skb_size = size + HFI1_IPOIB_ENCAP_LEN;
+       struct sk_buff *skb;
+
+       /*
+        * For smaller(4k + skb overhead) allocations we will go using
+        * napi cache. Otherwise we will try to use napi frag cache.
+        */
+       if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE))
+               skb = napi_alloc_skb(napi, skb_size);
+       else
+               skb = prepare_frag_skb(napi, skb_size);
+
+       if (unlikely(!skb))
+               return NULL;
+
+       copy_ipoib_buf(skb, data, size);
+
+       return skb;
+}
diff --git a/drivers/infiniband/hw/hfi1/netdev.h b/drivers/infiniband/hw/hfi1/netdev.h
new file mode 100644 (file)
index 0000000..8992dfe
--- /dev/null
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+#ifndef HFI1_NETDEV_H
+#define HFI1_NETDEV_H
+
+#include "hfi.h"
+
+#include <linux/netdevice.h>
+#include <linux/xarray.h>
+
+/**
+ * struct hfi1_netdev_rxq - Receive Queue for HFI
+ * dummy netdev. Both IPoIB and VNIC netdevices will be working on
+ * top of this device.
+ * @napi: napi object
+ * @priv: ptr to netdev_priv
+ * @rcd:  ptr to receive context data
+ */
+struct hfi1_netdev_rxq {
+       struct napi_struct napi;
+       struct hfi1_netdev_priv *priv;
+       struct hfi1_ctxtdata *rcd;
+};
+
+/*
+ * Number of netdev contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_MAX_NETDEV_CTXTS   8
+
+/* Number of NETDEV RSM entries */
+#define NUM_NETDEV_MAP_ENTRIES HFI1_MAX_NETDEV_CTXTS
+
+/**
+ * struct hfi1_netdev_priv: data required to setup and run HFI netdev.
+ * @dd:                hfi1_devdata
+ * @rxq:       pointer to dummy netdev receive queues.
+ * @num_rx_q:  number of receive queues
+ * @rmt_index: first free index in RMT Array
+ * @msix_start: first free MSI-X interrupt vector.
+ * @dev_tbl:   netdev table for unique identifier VNIC and IPoIb VLANs.
+ * @enabled:   atomic counter of netdevs enabling receive queues.
+ *             When 0 NAPI will be disabled.
+ * @netdevs:   atomic counter of netdevs using dummy netdev.
+ *             When 0 receive queues will be freed.
+ */
+struct hfi1_netdev_priv {
+       struct hfi1_devdata *dd;
+       struct hfi1_netdev_rxq *rxq;
+       int num_rx_q;
+       int rmt_start;
+       struct xarray dev_tbl;
+       /* count of enabled napi polls */
+       atomic_t enabled;
+       /* count of netdevs on top */
+       atomic_t netdevs;
+};
+
+static inline
+struct hfi1_netdev_priv *hfi1_netdev_priv(struct net_device *dev)
+{
+       return (struct hfi1_netdev_priv *)&dev[1];
+}
+
+static inline
+int hfi1_netdev_ctxt_count(struct hfi1_devdata *dd)
+{
+       struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+       return priv->num_rx_q;
+}
+
+static inline
+struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt)
+{
+       struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+       return priv->rxq[ctxt].rcd;
+}
+
+int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data);
+void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id);
+void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id);
+void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id);
+
+#endif /* HFI1_NETDEV_H */
diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
new file mode 100644 (file)
index 0000000..3e286cb
--- /dev/null
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for netdev RX functionality
+ */
+
+#include "sdma.h"
+#include "verbs.h"
+#include "netdev.h"
+#include "hfi.h"
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <rdma/ib_verbs.h>
+
+/**
+ * hfi1_netdev_add_data - Registers data with unique identifier
+ * to be requested later this is needed for VNIC and IPoIB VLANs
+ * implementations.
+ * This call is protected by mutex idr_lock.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ * @data: data to be associated with index
+ */
+int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data)
+{
+       struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+       return xa_insert(&priv->dev_tbl, id, data, GFP_NOWAIT);
+}
+
+/**
+ * hfi1_netdev_remove_data - Removes data with previously given id.
+ * Returns the reference to removed entry.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id)
+{
+       struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+       return xa_erase(&priv->dev_tbl, id);
+}
+
+/**
+ * hfi1_netdev_get_data - Gets data with given id
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id)
+{
+       struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+       return xa_load(&priv->dev_tbl, id);
+}
+
+/**
+ * hfi1_netdev_get_first_dat - Gets first entry with greater or equal id.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id)
+{
+       struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+       unsigned long index = *start_id;
+       void *ret;
+
+       ret = xa_find(&priv->dev_tbl, &index, UINT_MAX, XA_PRESENT);
+       *start_id = (int)index;
+       return ret;
+}