From 0dcf7f500b0a56c0c58deeeac7692fe213aa08b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 21 Mar 2022 11:42:05 +0100 Subject: [PATCH] nfp: use TX ring pointer write back Newer versions of the PCIe microcode support writing back the position of the TX pointer back into host memory. This speeds up TX completions, because we avoid a read from device memory (replacing PCIe read with DMA coherent read). Signed-off-by: Jakub Kicinski Signed-off-by: Fei Qin Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfd3/dp.c | 5 ++-- drivers/net/ethernet/netronome/nfp/nfp_net.h | 7 ++++++ .../net/ethernet/netronome/nfp/nfp_net_common.c | 9 ++++++- .../net/ethernet/netronome/nfp/nfp_net_debugfs.c | 5 +++- drivers/net/ethernet/netronome/nfp/nfp_net_dp.c | 29 +++++++++++++++++++--- drivers/net/ethernet/netronome/nfp/nfp_net_dp.h | 8 ++++++ 6 files changed, 56 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c index 619f4d09e4e0..7db56abaa582 100644 --- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c @@ -392,7 +392,7 @@ void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) return; /* Work out how many descriptors have been transmitted */ - qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); if (qcp_rd_p == tx_ring->qcp_rd_p) return; @@ -467,13 +467,14 @@ void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) static bool nfp_nfd3_xdp_complete(struct nfp_net_tx_ring *tx_ring) { struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; u32 done_pkts = 0, done_bytes = 0; bool done_all; int idx, todo; u32 qcp_rd_p; /* Work out how many descriptors have been transmitted */ - qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); if (qcp_rd_p == tx_ring->qcp_rd_p) return true; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 4e288b8f3510..3c386972f69a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -126,6 +126,7 @@ struct nfp_nfd3_tx_buf; * @r_vec: Back pointer to ring vector structure * @idx: Ring index from Linux's perspective * @qcp_q: Pointer to base of the QCP TX queue + * @txrwb: TX pointer write back area * @cnt: Size of the queue in number of descriptors * @wr_p: TX ring write pointer (free running) * @rd_p: TX ring read pointer (free running) @@ -145,6 +146,7 @@ struct nfp_net_tx_ring { u32 idx; u8 __iomem *qcp_q; + u64 *txrwb; u32 cnt; u32 wr_p; @@ -444,6 +446,8 @@ struct nfp_stat_pair { * @ctrl_bar: Pointer to mapped control BAR * * @ops: Callbacks and parameters for this vNIC's NFD version + * @txrwb: TX pointer write back area (indexed by queue id) + * @txrwb_dma: TX pointer write back area DMA address * @txd_cnt: Size of the TX ring in number of min size packets * @rxd_cnt: Size of the RX ring in number of min size packets * @num_r_vecs: Number of used ring vectors @@ -480,6 +484,9 @@ struct nfp_net_dp { const struct nfp_dp_ops *ops; + u64 *txrwb; + dma_addr_t txrwb_dma; + unsigned int txd_cnt; unsigned int rxd_cnt; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index dd234f5228f1..5cac5563028c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1427,6 +1427,8 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn) new->rx_rings = NULL; new->num_r_vecs = 0; new->num_stack_tx_rings = 0; + new->txrwb = NULL; + new->txrwb_dma = 0; return new; } @@ -1963,7 +1965,7 @@ void nfp_net_info(struct nfp_net *nn) nn->fw_ver.resv, nn->fw_ver.class, nn->fw_ver.major, nn->fw_ver.minor, nn->max_mtu); - nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", nn->cap, nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", @@ -1981,6 +1983,7 @@ void nfp_net_info(struct nfp_net *nn) nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER ? "CTAG_FILTER " : "", nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", + nn->cap & NFP_NET_CFG_CTRL_TXRWB ? "TXRWB " : "", nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "", nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ? @@ -2352,6 +2355,10 @@ int nfp_net_init(struct nfp_net *nn) nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD; } + /* Enable TX pointer writeback, if supported */ + if (nn->cap & NFP_NET_CFG_CTRL_TXRWB) + nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXRWB; + /* Stash the re-configuration queue away. First odd queue in TX Bar */ nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c index 791203d07ac7..d8b735ccf899 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c @@ -99,11 +99,14 @@ static int nfp_tx_q_show(struct seq_file *file, void *data) d_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); d_wr_p = nfp_qcp_wr_ptr_read(tx_ring->qcp_q); - seq_printf(file, "TX[%02d,%02d%s]: cnt=%u dma=%pad host=%p H_RD=%u H_WR=%u D_RD=%u D_WR=%u\n", + seq_printf(file, "TX[%02d,%02d%s]: cnt=%u dma=%pad host=%p H_RD=%u H_WR=%u D_RD=%u D_WR=%u", tx_ring->idx, tx_ring->qcidx, tx_ring == r_vec->tx_ring ? "" : "xdp", tx_ring->cnt, &tx_ring->dma, tx_ring->txds, tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p); + if (tx_ring->txrwb) + seq_printf(file, " TXRWB=%llu", *tx_ring->txrwb); + seq_putc(file, '\n'); nfp_net_debugfs_print_tx_descs(file, &nn->dp, r_vec, tx_ring, d_rd_p, d_wr_p); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c index 431bd2c13221..34dd94811df3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c @@ -44,12 +44,13 @@ void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) /** * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring * @tx_ring: TX ring structure + * @dp: NFP Net data path struct * @r_vec: IRQ vector servicing this ring * @idx: Ring index * @is_xdp: Is this an XDP TX ring? */ static void -nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring, +nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring, struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, unsigned int idx, bool is_xdp) { @@ -61,6 +62,7 @@ nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring, u64_stats_init(&tx_ring->r_vec->tx_sync); tx_ring->qcidx = tx_ring->idx * nn->stride_tx; + tx_ring->txrwb = dp->txrwb ? &dp->txrwb[idx] : NULL; tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx); } @@ -187,14 +189,22 @@ int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp) if (!dp->tx_rings) return -ENOMEM; + if (dp->ctrl & NFP_NET_CFG_CTRL_TXRWB) { + dp->txrwb = dma_alloc_coherent(dp->dev, + dp->num_tx_rings * sizeof(u64), + &dp->txrwb_dma, GFP_KERNEL); + if (!dp->txrwb) + goto err_free_rings; + } + for (r = 0; r < dp->num_tx_rings; r++) { int bias = 0; if (r >= dp->num_stack_tx_rings) bias = dp->num_stack_tx_rings; - nfp_net_tx_ring_init(&dp->tx_rings[r], &nn->r_vecs[r - bias], - r, bias); + nfp_net_tx_ring_init(&dp->tx_rings[r], dp, + &nn->r_vecs[r - bias], r, bias); if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r])) goto err_free_prev; @@ -211,6 +221,10 @@ err_free_prev: err_free_ring: nfp_net_tx_ring_free(dp, &dp->tx_rings[r]); } + if (dp->txrwb) + dma_free_coherent(dp->dev, dp->num_tx_rings * sizeof(u64), + dp->txrwb, dp->txrwb_dma); +err_free_rings: kfree(dp->tx_rings); return -ENOMEM; } @@ -224,6 +238,9 @@ void nfp_net_tx_rings_free(struct nfp_net_dp *dp) nfp_net_tx_ring_free(dp, &dp->tx_rings[r]); } + if (dp->txrwb) + dma_free_coherent(dp->dev, dp->num_tx_rings * sizeof(u64), + dp->txrwb, dp->txrwb_dma); kfree(dp->tx_rings); } @@ -377,6 +394,11 @@ nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring, unsigned int idx) { nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma); + if (tx_ring->txrwb) { + *tx_ring->txrwb = 0; + nn_writeq(nn, NFP_NET_CFG_TXR_WB_ADDR(idx), + nn->dp.txrwb_dma + idx * sizeof(u64)); + } nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt)); nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry); } @@ -388,6 +410,7 @@ void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx) nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), 0); nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), 0); + nn_writeq(nn, NFP_NET_CFG_TXR_WB_ADDR(idx), 0); nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), 0); nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), 0); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h index 81be8d17fa93..99579722aacf 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h @@ -60,6 +60,14 @@ static inline void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring) tx_ring->wr_ptr_add = 0; } +static inline u32 +nfp_net_read_tx_cmpl(struct nfp_net_tx_ring *tx_ring, struct nfp_net_dp *dp) +{ + if (tx_ring->txrwb) + return *tx_ring->txrwb; + return nfp_qcp_rd_ptr_read(tx_ring->qcp_q); +} + static inline void nfp_net_free_frag(void *frag, bool xdp) { if (!xdp) -- 2.11.0