OSDN Git Service

net/mlx5e: XDP, Support Enhanced Multi-Packet TX WQE
authorTariq Toukan <tariqt@mellanox.com>
Wed, 21 Nov 2018 12:08:06 +0000 (14:08 +0200)
committerSaeed Mahameed <saeedm@mellanox.com>
Fri, 21 Dec 2018 06:54:19 +0000 (22:54 -0800)
Add support for the HW feature of multi-packet WQE in XDP
xmit flow.

The conventional TX descriptor (WQE, Work Queue Element) serves
a single packet. Our HW has support for multi-packet WQE (MPWQE)
in which a single descriptor serves multiple TX packets.

This reduces both the PCI overhead and the CPU cycles wasted on
writing them.

In this patch we add support for the HW feature, which is supported
starting from ConnectX-5.

Performance:
Tested packet rate for UDP 64Byte multi-stream over ConnectX-5 NICs.
CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz

XDP_TX:
We see a huge gain on single port ConnectX-5, and reach the 100 Mpps
milestone.
* Single-port HCA:
Before:   70 Mpps
After:   100 Mpps (+42.8%)

* Dual-port HCA:
Before: 51.7 Mpps
After:  57.3 Mpps (+10.8%)

* In both cases we tested traffic on one port and for now On Dual-port HCAs
  we see only small gain, we are working to overcome this bottleneck, but
  for the moment only with experimental firmware on dual port HCAs we can
  reach the wanted numbers as seen on Single-port HCAs.

XDP_REDIRECT:
Redirect from (A) ConnectX-5 to (B) ConnectX-5.
Due to a setup limitation, (A) and (B) are on different NUMA nodes,
so absolute performance numbers are not optimal.
Note:
  Below is the transmit rate of (B), not the redirect rate of (A)
  which is in some cases higher.

* (B) is single-port:
Before:   77 Mpps
After:    90 Mpps (+16.8%)

* (B) is dual-port:
Before:  61 Mpps
After:   72 Mpps (+18%)

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
include/linux/mlx5/device.h

index 20b3432..8f5545d 100644 (file)
@@ -416,6 +416,16 @@ struct mlx5e_xdp_wqe_info {
        u8 num_ds;
 };
 
+struct mlx5e_xdp_mpwqe {
+       /* Current MPWQE session */
+       struct mlx5e_tx_wqe *wqe;
+       u8                   ds_count;
+       u8                   max_ds_count;
+};
+
+struct mlx5e_xdpsq;
+typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*,
+                                       struct mlx5e_xdp_info*);
 struct mlx5e_xdpsq {
        /* data path */
 
@@ -428,12 +438,14 @@ struct mlx5e_xdpsq {
        u32                        xdpi_fifo_pc ____cacheline_aligned_in_smp;
        u16                        pc;
        struct mlx5_wqe_ctrl_seg   *doorbell_cseg;
+       struct mlx5e_xdp_mpwqe     mpwqe;
 
        struct mlx5e_cq            cq;
 
        /* read only */
        struct mlx5_wq_cyc         wq;
        struct mlx5e_xdpsq_stats  *stats;
+       mlx5e_fp_xmit_xdp_frame    xmit_xdp_frame;
        struct {
                struct mlx5e_xdp_wqe_info *wqe_info;
                struct mlx5e_xdp_info_fifo xdpi_fifo;
index 5e5e43e..3740177 100644 (file)
@@ -47,7 +47,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
                                   xdpi.xdpf->len, PCI_DMA_TODEVICE);
        xdpi.di = *di;
 
-       return mlx5e_xmit_xdp_frame(sq, &xdpi);
+       return sq->xmit_xdp_frame(sq, &xdpi);
 }
 
 /* returns true if packet was consumed by xdp */
@@ -102,7 +102,98 @@ xdp_abort:
        }
 }
 
-bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
+static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+{
+       struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+       struct mlx5_wq_cyc *wq = &sq->wq;
+       u8  wqebbs;
+       u16 pi;
+
+       mlx5e_xdpsq_fetch_wqe(sq, &session->wqe);
+
+       prefetchw(session->wqe->data);
+       session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
+
+       pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
+ * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
+ * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
+ * full-session WQE be cache-aligned.
+ */
+#if L1_CACHE_BYTES < 128
+#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
+#else
+#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
+#endif
+
+       wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi),
+                      MLX5E_XDP_MPW_MAX_WQEBBS);
+
+       session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs;
+}
+
+static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
+{
+       struct mlx5_wq_cyc       *wq    = &sq->wq;
+       struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+       struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
+       u16 ds_count = session->ds_count;
+       u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+       struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+
+       cseg->opmod_idx_opcode =
+               cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
+       cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
+
+       wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
+       wi->num_ds     = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT;
+
+       sq->pc += wi->num_wqebbs;
+
+       sq->doorbell_cseg = cseg;
+
+       session->wqe = NULL; /* Close session */
+}
+
+static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
+                                      struct mlx5e_xdp_info *xdpi)
+{
+       struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+       struct mlx5e_xdpsq_stats *stats = sq->stats;
+
+       dma_addr_t dma_addr    = xdpi->dma_addr;
+       struct xdp_frame *xdpf = xdpi->xdpf;
+       unsigned int dma_len   = xdpf->len;
+
+       if (unlikely(sq->hw_mtu < dma_len)) {
+               stats->err++;
+               return false;
+       }
+
+       if (unlikely(!session->wqe)) {
+               if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+                                                    MLX5_SEND_WQE_MAX_WQEBBS))) {
+                       /* SQ is full, ring doorbell */
+                       mlx5e_xmit_xdp_doorbell(sq);
+                       stats->full++;
+                       return false;
+               }
+
+               mlx5e_xdp_mpwqe_session_start(sq);
+       }
+
+       mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len);
+
+       if (unlikely(session->ds_count == session->max_ds_count))
+               mlx5e_xdp_mpwqe_complete(sq);
+
+       mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+       stats->xmit++;
+       return true;
+}
+
+static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
 {
        struct mlx5_wq_cyc       *wq   = &sq->wq;
        u16                       pi   = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -304,7 +395,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 
                xdpi.xdpf = xdpf;
 
-               if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) {
+               if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) {
                        dma_unmap_single(sq->pdev, xdpi.dma_addr,
                                         xdpf->len, DMA_TO_DEVICE);
                        xdp_return_frame_rx_napi(xdpf);
@@ -312,8 +403,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
                }
        }
 
-       if (flags & XDP_XMIT_FLUSH)
+       if (flags & XDP_XMIT_FLUSH) {
+               if (sq->mpwqe.wqe)
+                       mlx5e_xdp_mpwqe_complete(sq);
                mlx5e_xmit_xdp_doorbell(sq);
+       }
 
        return n - drops;
 }
@@ -322,6 +416,9 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
 {
        struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
 
+       if (xdpsq->mpwqe.wqe)
+               mlx5e_xdp_mpwqe_complete(xdpsq);
+
        mlx5e_xmit_xdp_doorbell(xdpsq);
 
        if (xdpsq->redirect_flush) {
@@ -329,3 +426,10 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
                xdpsq->redirect_flush = false;
        }
 }
+
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
+{
+       sq->xmit_xdp_frame = is_mpw ?
+               mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
+}
+
index fd689ed..3a67cb3 100644 (file)
 #define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
                                 MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
 #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_TX_DS_COUNT \
-       ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
+#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
+       (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
+#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
 
 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
                      void *va, u16 *rx_headroom, u32 *len);
 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq);
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
-bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi);
 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
                   u32 flags);
 
@@ -58,6 +59,28 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
 }
 
 static inline void
+mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, dma_addr_t dma_addr, u16 dma_len)
+{
+       struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+       struct mlx5_wqe_data_seg *dseg =
+               (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count++;
+
+       dseg->addr       = cpu_to_be64(dma_addr);
+       dseg->byte_count = cpu_to_be32(dma_len);
+       dseg->lkey       = sq->mkey_be;
+}
+
+static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq,
+                                        struct mlx5e_tx_wqe **wqe)
+{
+       struct mlx5_wq_cyc *wq = &sq->wq;
+       u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+       *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+       memset(*wqe, 0, sizeof(**wqe));
+}
+
+static inline void
 mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
                     struct mlx5e_xdp_info *xi)
 {
index 7086c73..07b16e5 100644 (file)
@@ -61,6 +61,7 @@ struct mlx5e_rq_param {
 struct mlx5e_sq_param {
        u32                        sqc[MLX5_ST_SZ_DW(sqc)];
        struct mlx5_wq_param       wq;
+       bool                       is_mpw;
 };
 
 struct mlx5e_cq_param {
@@ -1586,11 +1587,8 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
                            struct mlx5e_xdpsq *sq,
                            bool is_redirect)
 {
-       unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
        struct mlx5e_create_sq_param csp = {};
-       unsigned int inline_hdr_sz = 0;
        int err;
-       int i;
 
        err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect);
        if (err)
@@ -1606,27 +1604,35 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
        if (err)
                goto err_free_xdpsq;
 
-       if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
-               inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
-               ds_cnt++;
-       }
+       mlx5e_set_xmit_fp(sq, param->is_mpw);
+
+       if (!param->is_mpw) {
+               unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
+               unsigned int inline_hdr_sz = 0;
+               int i;
 
-       /* Pre initialize fixed WQE fields */
-       for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
-               struct mlx5e_xdp_wqe_info *wi  = &sq->db.wqe_info[i];
-               struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(&sq->wq, i);
-               struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
-               struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
-               struct mlx5_wqe_data_seg *dseg;
+               if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+                       inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+                       ds_cnt++;
+               }
+
+               /* Pre initialize fixed WQE fields */
+               for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
+                       struct mlx5e_xdp_wqe_info *wi  = &sq->db.wqe_info[i];
+                       struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(&sq->wq, i);
+                       struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+                       struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
+                       struct mlx5_wqe_data_seg *dseg;
 
-               cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
-               eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+                       cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+                       eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
 
-               dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
-               dseg->lkey = sq->mkey_be;
+                       dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
+                       dseg->lkey = sq->mkey_be;
 
-               wi->num_wqebbs = 1;
-               wi->num_ds     = 1;
+                       wi->num_wqebbs = 1;
+                       wi->num_ds     = 1;
+               }
        }
 
        return 0;
@@ -2335,6 +2341,7 @@ static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
 
        mlx5e_build_sq_param_common(priv, param);
        MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+       param->is_mpw = MLX5_CAP_ETH(priv->mdev, enhanced_multi_pkt_send_wqe);
 }
 
 static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
index 4674b9e..8c4a820 100644 (file)
@@ -421,6 +421,7 @@ enum {
        MLX5_OPCODE_ATOMIC_MASKED_FA    = 0x15,
        MLX5_OPCODE_BIND_MW             = 0x18,
        MLX5_OPCODE_CONFIG_CMD          = 0x1f,
+       MLX5_OPCODE_ENHANCED_MPSW       = 0x29,
 
        MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00,
        MLX5_RECV_OPCODE_SEND           = 0x01,