OSDN Git Service

ixgbe: add AF_XDP zero-copy Rx support
authorBjörn Töpel <bjorn.topel@intel.com>
Tue, 2 Oct 2018 08:00:32 +0000 (10:00 +0200)
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>
Wed, 3 Oct 2018 19:51:14 +0000 (12:51 -0700)
This patch adds zero-copy Rx support for AF_XDP sockets. Instead of
allocating buffers of type MEM_TYPE_PAGE_SHARED, the Rx frames are
allocated as MEM_TYPE_ZERO_COPY when AF_XDP is enabled for a certain
queue.

All AF_XDP specific functions are added to a new file, ixgbe_xsk.c.

Note that when AF_XDP zero-copy is enabled, the XDP action XDP_PASS
will allocate a new buffer and copy the zero-copy frame prior passing
it to the kernel stack.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Tested-by: William Tu <u9012063@gmail.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
drivers/net/ethernet/intel/ixgbe/Makefile
drivers/net/ethernet/intel/ixgbe/ixgbe.h
drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c [new file with mode: 0644]

index 5414685..ca6b0c4 100644 (file)
@@ -8,7 +8,8 @@ obj-$(CONFIG_IXGBE) += ixgbe.o
 
 ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \
               ixgbe_82599.o ixgbe_82598.o ixgbe_phy.o ixgbe_sriov.o \
-              ixgbe_mbx.o ixgbe_x540.o ixgbe_x550.o ixgbe_lib.o ixgbe_ptp.o
+              ixgbe_mbx.o ixgbe_x540.o ixgbe_x550.o ixgbe_lib.o ixgbe_ptp.o \
+              ixgbe_xsk.o
 
 ixgbe-$(CONFIG_IXGBE_DCB) +=  ixgbe_dcb.o ixgbe_dcb_82598.o \
                               ixgbe_dcb_82599.o ixgbe_dcb_nl.o
index 265db17..7a7679e 100644 (file)
@@ -228,13 +228,17 @@ struct ixgbe_tx_buffer {
 struct ixgbe_rx_buffer {
        struct sk_buff *skb;
        dma_addr_t dma;
-       struct page *page;
-#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
-       __u32 page_offset;
-#else
-       __u16 page_offset;
-#endif
-       __u16 pagecnt_bias;
+       union {
+               struct {
+                       struct page *page;
+                       __u32 page_offset;
+                       __u16 pagecnt_bias;
+               };
+               struct {
+                       void *addr;
+                       u64 handle;
+               };
+       };
 };
 
 struct ixgbe_queue_stats {
@@ -348,6 +352,10 @@ struct ixgbe_ring {
                struct ixgbe_rx_queue_stats rx_stats;
        };
        struct xdp_rxq_info xdp_rxq;
+       struct xdp_umem *xsk_umem;
+       struct zero_copy_allocator zca; /* ZC allocator anchor */
+       u16 ring_idx;           /* {rx,tx,xdp}_ring back reference idx */
+       u16 rx_buf_len;
 } ____cacheline_internodealigned_in_smp;
 
 enum ixgbe_ring_f_enum {
@@ -765,6 +773,11 @@ struct ixgbe_adapter {
 #ifdef CONFIG_XFRM_OFFLOAD
        struct ixgbe_ipsec *ipsec;
 #endif /* CONFIG_XFRM_OFFLOAD */
+
+       /* AF_XDP zero-copy */
+       struct xdp_umem **xsk_umems;
+       u16 num_xsk_umems_used;
+       u16 num_xsk_umems;
 };
 
 static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter)
index d361f57..62e6499 100644 (file)
@@ -1055,7 +1055,7 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
        int txr_remaining = adapter->num_tx_queues;
        int xdp_remaining = adapter->num_xdp_queues;
        int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0;
-       int err;
+       int err, i;
 
        /* only one q_vector if MSI-X is disabled. */
        if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
@@ -1097,6 +1097,21 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
                xdp_idx += xqpv;
        }
 
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               if (adapter->rx_ring[i])
+                       adapter->rx_ring[i]->ring_idx = i;
+       }
+
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               if (adapter->tx_ring[i])
+                       adapter->tx_ring[i]->ring_idx = i;
+       }
+
+       for (i = 0; i < adapter->num_xdp_queues; i++) {
+               if (adapter->xdp_ring[i])
+                       adapter->xdp_ring[i]->ring_idx = i;
+       }
+
        return 0;
 
 err_out:
index 681ed9f..cad4c12 100644 (file)
@@ -34,6 +34,7 @@
 #include <net/tc_act/tc_mirred.h>
 #include <net/vxlan.h>
 #include <net/mpls.h>
+#include <net/xdp_sock.h>
 
 #include "ixgbe.h"
 #include "ixgbe_common.h"
@@ -3176,7 +3177,10 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
                per_ring_budget = budget;
 
        ixgbe_for_each_ring(ring, q_vector->rx) {
-               int cleaned = ixgbe_clean_rx_irq(q_vector, ring,
+               int cleaned = ring->xsk_umem ?
+                             ixgbe_clean_rx_irq_zc(q_vector, ring,
+                                                   per_ring_budget) :
+                             ixgbe_clean_rx_irq(q_vector, ring,
                                                 per_ring_budget);
 
                work_done += cleaned;
@@ -3704,10 +3708,27 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter,
        srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
 
        /* configure the packet buffer length */
-       if (test_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state))
+       if (rx_ring->xsk_umem) {
+               u32 xsk_buf_len = rx_ring->xsk_umem->chunk_size_nohr -
+                                 XDP_PACKET_HEADROOM;
+
+               /* If the MAC support setting RXDCTL.RLPML, the
+                * SRRCTL[n].BSIZEPKT is set to PAGE_SIZE and
+                * RXDCTL.RLPML is set to the actual UMEM buffer
+                * size. If not, then we are stuck with a 1k buffer
+                * size resolution. In this case frames larger than
+                * the UMEM buffer size viewed in a 1k resolution will
+                * be dropped.
+                */
+               if (hw->mac.type != ixgbe_mac_82599EB)
+                       srrctl |= PAGE_SIZE >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+               else
+                       srrctl |= xsk_buf_len >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+       } else if (test_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state)) {
                srrctl |= IXGBE_RXBUFFER_3K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
-       else
+       } else {
                srrctl |= IXGBE_RXBUFFER_2K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+       }
 
        /* configure descriptor type */
        srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
@@ -4030,6 +4051,19 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
        u32 rxdctl;
        u8 reg_idx = ring->reg_idx;
 
+       xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+       ring->xsk_umem = ixgbe_xsk_umem(adapter, ring);
+       if (ring->xsk_umem) {
+               ring->zca.free = ixgbe_zca_free;
+               WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+                                                  MEM_TYPE_ZERO_COPY,
+                                                  &ring->zca));
+
+       } else {
+               WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+                                                  MEM_TYPE_PAGE_SHARED, NULL));
+       }
+
        /* disable queue to avoid use of these values while updating state */
        rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
        rxdctl &= ~IXGBE_RXDCTL_ENABLE;
@@ -4079,6 +4113,17 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
 #endif
        }
 
+       if (ring->xsk_umem && hw->mac.type != ixgbe_mac_82599EB) {
+               u32 xsk_buf_len = ring->xsk_umem->chunk_size_nohr -
+                                 XDP_PACKET_HEADROOM;
+
+               rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
+                           IXGBE_RXDCTL_RLPML_EN);
+               rxdctl |= xsk_buf_len | IXGBE_RXDCTL_RLPML_EN;
+
+               ring->rx_buf_len = xsk_buf_len;
+       }
+
        /* initialize rx_buffer_info */
        memset(ring->rx_buffer_info, 0,
               sizeof(struct ixgbe_rx_buffer) * ring->count);
@@ -4092,7 +4137,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
        IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
 
        ixgbe_rx_desc_queue_enable(adapter, ring);
-       ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
+       if (ring->xsk_umem)
+               ixgbe_alloc_rx_buffers_zc(ring, ixgbe_desc_unused(ring));
+       else
+               ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
 }
 
 static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter)
@@ -5206,6 +5254,11 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
        u16 i = rx_ring->next_to_clean;
        struct ixgbe_rx_buffer *rx_buffer = &rx_ring->rx_buffer_info[i];
 
+       if (rx_ring->xsk_umem) {
+               ixgbe_xsk_clean_rx_ring(rx_ring);
+               goto skip_free;
+       }
+
        /* Free all the Rx ring sk_buffs */
        while (i != rx_ring->next_to_alloc) {
                if (rx_buffer->skb) {
@@ -5244,6 +5297,7 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
                }
        }
 
+skip_free:
        rx_ring->next_to_alloc = 0;
        rx_ring->next_to_clean = 0;
        rx_ring->next_to_use = 0;
@@ -6439,7 +6493,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
        struct device *dev = rx_ring->dev;
        int orig_node = dev_to_node(dev);
        int ring_node = -1;
-       int size, err;
+       int size;
 
        size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
 
@@ -6476,13 +6530,6 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
                             rx_ring->queue_index) < 0)
                goto err;
 
-       err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq,
-                                        MEM_TYPE_PAGE_SHARED, NULL);
-       if (err) {
-               xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
-               goto err;
-       }
-
        rx_ring->xdp_prog = adapter->xdp_prog;
 
        return 0;
@@ -10198,6 +10245,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
                xdp->prog_id = adapter->xdp_prog ?
                        adapter->xdp_prog->aux->id : 0;
                return 0;
+       case XDP_QUERY_XSK_UMEM:
+               return ixgbe_xsk_umem_query(adapter, &xdp->xsk.umem,
+                                           xdp->xsk.queue_id);
+       case XDP_SETUP_XSK_UMEM:
+               return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem,
+                                           xdp->xsk.queue_id);
+
        default:
                return -EINVAL;
        }
index 3780d31..cf219f4 100644 (file)
@@ -23,4 +23,19 @@ void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
 void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring);
 void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring);
 
+struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
+                               struct ixgbe_ring *ring);
+int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem,
+                        u16 qid);
+int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
+                        u16 qid);
+
+void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
+
+void ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count);
+int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
+                         struct ixgbe_ring *rx_ring,
+                         const int budget);
+void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring);
+
 #endif /* #define _IXGBE_TXRX_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
new file mode 100644 (file)
index 0000000..e876ff1
--- /dev/null
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
+#include <net/xdp.h>
+
+#include "ixgbe.h"
+#include "ixgbe_txrx_common.h"
+
+struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
+                               struct ixgbe_ring *ring)
+{
+       bool xdp_on = READ_ONCE(adapter->xdp_prog);
+       int qid = ring->ring_idx;
+
+       if (!adapter->xsk_umems || !adapter->xsk_umems[qid] ||
+           qid >= adapter->num_xsk_umems || !xdp_on)
+               return NULL;
+
+       return adapter->xsk_umems[qid];
+}
+
+static int ixgbe_alloc_xsk_umems(struct ixgbe_adapter *adapter)
+{
+       if (adapter->xsk_umems)
+               return 0;
+
+       adapter->num_xsk_umems_used = 0;
+       adapter->num_xsk_umems = adapter->num_rx_queues;
+       adapter->xsk_umems = kcalloc(adapter->num_xsk_umems,
+                                    sizeof(*adapter->xsk_umems),
+                                    GFP_KERNEL);
+       if (!adapter->xsk_umems) {
+               adapter->num_xsk_umems = 0;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static int ixgbe_add_xsk_umem(struct ixgbe_adapter *adapter,
+                             struct xdp_umem *umem,
+                             u16 qid)
+{
+       int err;
+
+       err = ixgbe_alloc_xsk_umems(adapter);
+       if (err)
+               return err;
+
+       adapter->xsk_umems[qid] = umem;
+       adapter->num_xsk_umems_used++;
+
+       return 0;
+}
+
+static void ixgbe_remove_xsk_umem(struct ixgbe_adapter *adapter, u16 qid)
+{
+       adapter->xsk_umems[qid] = NULL;
+       adapter->num_xsk_umems_used--;
+
+       if (adapter->num_xsk_umems == 0) {
+               kfree(adapter->xsk_umems);
+               adapter->xsk_umems = NULL;
+               adapter->num_xsk_umems = 0;
+       }
+}
+
+static int ixgbe_xsk_umem_dma_map(struct ixgbe_adapter *adapter,
+                                 struct xdp_umem *umem)
+{
+       struct device *dev = &adapter->pdev->dev;
+       unsigned int i, j;
+       dma_addr_t dma;
+
+       for (i = 0; i < umem->npgs; i++) {
+               dma = dma_map_page_attrs(dev, umem->pgs[i], 0, PAGE_SIZE,
+                                        DMA_BIDIRECTIONAL, IXGBE_RX_DMA_ATTR);
+               if (dma_mapping_error(dev, dma))
+                       goto out_unmap;
+
+               umem->pages[i].dma = dma;
+       }
+
+       return 0;
+
+out_unmap:
+       for (j = 0; j < i; j++) {
+               dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+                                    DMA_BIDIRECTIONAL, IXGBE_RX_DMA_ATTR);
+               umem->pages[i].dma = 0;
+       }
+
+       return -1;
+}
+
+static void ixgbe_xsk_umem_dma_unmap(struct ixgbe_adapter *adapter,
+                                    struct xdp_umem *umem)
+{
+       struct device *dev = &adapter->pdev->dev;
+       unsigned int i;
+
+       for (i = 0; i < umem->npgs; i++) {
+               dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+                                    DMA_BIDIRECTIONAL, IXGBE_RX_DMA_ATTR);
+
+               umem->pages[i].dma = 0;
+       }
+}
+
+static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
+                                struct xdp_umem *umem,
+                                u16 qid)
+{
+       struct xdp_umem_fq_reuse *reuseq;
+       bool if_running;
+       int err;
+
+       if (qid >= adapter->num_rx_queues)
+               return -EINVAL;
+
+       if (adapter->xsk_umems) {
+               if (qid >= adapter->num_xsk_umems)
+                       return -EINVAL;
+               if (adapter->xsk_umems[qid])
+                       return -EBUSY;
+       }
+
+       reuseq = xsk_reuseq_prepare(adapter->rx_ring[0]->count);
+       if (!reuseq)
+               return -ENOMEM;
+
+       xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+       err = ixgbe_xsk_umem_dma_map(adapter, umem);
+       if (err)
+               return err;
+
+       if_running = netif_running(adapter->netdev) &&
+                    READ_ONCE(adapter->xdp_prog);
+
+       if (if_running)
+               ixgbe_txrx_ring_disable(adapter, qid);
+
+       err = ixgbe_add_xsk_umem(adapter, umem, qid);
+
+       if (if_running)
+               ixgbe_txrx_ring_enable(adapter, qid);
+
+       return err;
+}
+
+static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
+{
+       bool if_running;
+
+       if (!adapter->xsk_umems || qid >= adapter->num_xsk_umems ||
+           !adapter->xsk_umems[qid])
+               return -EINVAL;
+
+       if_running = netif_running(adapter->netdev) &&
+                    READ_ONCE(adapter->xdp_prog);
+
+       if (if_running)
+               ixgbe_txrx_ring_disable(adapter, qid);
+
+       ixgbe_xsk_umem_dma_unmap(adapter, adapter->xsk_umems[qid]);
+       ixgbe_remove_xsk_umem(adapter, qid);
+
+       if (if_running)
+               ixgbe_txrx_ring_enable(adapter, qid);
+
+       return 0;
+}
+
+int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem,
+                        u16 qid)
+{
+       if (qid >= adapter->num_rx_queues)
+               return -EINVAL;
+
+       if (adapter->xsk_umems) {
+               if (qid >= adapter->num_xsk_umems)
+                       return -EINVAL;
+               *umem = adapter->xsk_umems[qid];
+               return 0;
+       }
+
+       *umem = NULL;
+       return 0;
+}
+
+int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
+                        u16 qid)
+{
+       return umem ? ixgbe_xsk_umem_enable(adapter, umem, qid) :
+               ixgbe_xsk_umem_disable(adapter, qid);
+}
+
+static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
+                           struct ixgbe_ring *rx_ring,
+                           struct xdp_buff *xdp)
+{
+       int err, result = IXGBE_XDP_PASS;
+       struct bpf_prog *xdp_prog;
+       struct xdp_frame *xdpf;
+       u32 act;
+
+       rcu_read_lock();
+       xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+       act = bpf_prog_run_xdp(xdp_prog, xdp);
+       xdp->handle += xdp->data - xdp->data_hard_start;
+       switch (act) {
+       case XDP_PASS:
+               break;
+       case XDP_TX:
+               xdpf = convert_to_xdp_frame(xdp);
+               if (unlikely(!xdpf)) {
+                       result = IXGBE_XDP_CONSUMED;
+                       break;
+               }
+               result = ixgbe_xmit_xdp_ring(adapter, xdpf);
+               break;
+       case XDP_REDIRECT:
+               err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+               result = !err ? IXGBE_XDP_REDIR : IXGBE_XDP_CONSUMED;
+               break;
+       default:
+               bpf_warn_invalid_xdp_action(act);
+               /* fallthrough */
+       case XDP_ABORTED:
+               trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+               /* fallthrough -- handle aborts by dropping packet */
+       case XDP_DROP:
+               result = IXGBE_XDP_CONSUMED;
+               break;
+       }
+       rcu_read_unlock();
+       return result;
+}
+
+static struct
+ixgbe_rx_buffer *ixgbe_get_rx_buffer_zc(struct ixgbe_ring *rx_ring,
+                                       unsigned int size)
+{
+       struct ixgbe_rx_buffer *bi;
+
+       bi = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+
+       /* we are reusing so sync this buffer for CPU use */
+       dma_sync_single_range_for_cpu(rx_ring->dev,
+                                     bi->dma, 0,
+                                     size,
+                                     DMA_BIDIRECTIONAL);
+
+       return bi;
+}
+
+static void ixgbe_reuse_rx_buffer_zc(struct ixgbe_ring *rx_ring,
+                                    struct ixgbe_rx_buffer *obi)
+{
+       unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
+       u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
+       u16 nta = rx_ring->next_to_alloc;
+       struct ixgbe_rx_buffer *nbi;
+
+       nbi = &rx_ring->rx_buffer_info[rx_ring->next_to_alloc];
+       /* update, and store next to alloc */
+       nta++;
+       rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+       /* transfer page from old buffer to new buffer */
+       nbi->dma = obi->dma & mask;
+       nbi->dma += hr;
+
+       nbi->addr = (void *)((unsigned long)obi->addr & mask);
+       nbi->addr += hr;
+
+       nbi->handle = obi->handle & mask;
+       nbi->handle += rx_ring->xsk_umem->headroom;
+
+       obi->addr = NULL;
+       obi->skb = NULL;
+}
+
+void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
+{
+       struct ixgbe_rx_buffer *bi;
+       struct ixgbe_ring *rx_ring;
+       u64 hr, mask;
+       u16 nta;
+
+       rx_ring = container_of(alloc, struct ixgbe_ring, zca);
+       hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
+       mask = rx_ring->xsk_umem->chunk_mask;
+
+       nta = rx_ring->next_to_alloc;
+       bi = rx_ring->rx_buffer_info;
+
+       nta++;
+       rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+       handle &= mask;
+
+       bi->dma = xdp_umem_get_dma(rx_ring->xsk_umem, handle);
+       bi->dma += hr;
+
+       bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
+       bi->addr += hr;
+
+       bi->handle = (u64)handle + rx_ring->xsk_umem->headroom;
+}
+
+static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring,
+                                 struct ixgbe_rx_buffer *bi)
+{
+       struct xdp_umem *umem = rx_ring->xsk_umem;
+       void *addr = bi->addr;
+       u64 handle, hr;
+
+       if (addr)
+               return true;
+
+       if (!xsk_umem_peek_addr(umem, &handle)) {
+               rx_ring->rx_stats.alloc_rx_page_failed++;
+               return false;
+       }
+
+       hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+       bi->dma = xdp_umem_get_dma(umem, handle);
+       bi->dma += hr;
+
+       bi->addr = xdp_umem_get_data(umem, handle);
+       bi->addr += hr;
+
+       bi->handle = handle + umem->headroom;
+
+       xsk_umem_discard_addr(umem);
+       return true;
+}
+
+static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring *rx_ring,
+                                      struct ixgbe_rx_buffer *bi)
+{
+       struct xdp_umem *umem = rx_ring->xsk_umem;
+       u64 handle, hr;
+
+       if (!xsk_umem_peek_addr_rq(umem, &handle)) {
+               rx_ring->rx_stats.alloc_rx_page_failed++;
+               return false;
+       }
+
+       handle &= rx_ring->xsk_umem->chunk_mask;
+
+       hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+       bi->dma = xdp_umem_get_dma(umem, handle);
+       bi->dma += hr;
+
+       bi->addr = xdp_umem_get_data(umem, handle);
+       bi->addr += hr;
+
+       bi->handle = handle + umem->headroom;
+
+       xsk_umem_discard_addr_rq(umem);
+       return true;
+}
+
+static __always_inline bool
+__ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count,
+                           bool alloc(struct ixgbe_ring *rx_ring,
+                                      struct ixgbe_rx_buffer *bi))
+{
+       union ixgbe_adv_rx_desc *rx_desc;
+       struct ixgbe_rx_buffer *bi;
+       u16 i = rx_ring->next_to_use;
+       bool ok = true;
+
+       /* nothing to do */
+       if (!cleaned_count)
+               return true;
+
+       rx_desc = IXGBE_RX_DESC(rx_ring, i);
+       bi = &rx_ring->rx_buffer_info[i];
+       i -= rx_ring->count;
+
+       do {
+               if (!alloc(rx_ring, bi)) {
+                       ok = false;
+                       break;
+               }
+
+               /* sync the buffer for use by the device */
+               dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+                                                bi->page_offset,
+                                                rx_ring->rx_buf_len,
+                                                DMA_BIDIRECTIONAL);
+
+               /* Refresh the desc even if buffer_addrs didn't change
+                * because each write-back erases this info.
+                */
+               rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+
+               rx_desc++;
+               bi++;
+               i++;
+               if (unlikely(!i)) {
+                       rx_desc = IXGBE_RX_DESC(rx_ring, 0);
+                       bi = rx_ring->rx_buffer_info;
+                       i -= rx_ring->count;
+               }
+
+               /* clear the length for the next_to_use descriptor */
+               rx_desc->wb.upper.length = 0;
+
+               cleaned_count--;
+       } while (cleaned_count);
+
+       i += rx_ring->count;
+
+       if (rx_ring->next_to_use != i) {
+               rx_ring->next_to_use = i;
+
+               /* update next to alloc since we have filled the ring */
+               rx_ring->next_to_alloc = i;
+
+               /* Force memory writes to complete before letting h/w
+                * know there are new descriptors to fetch.  (Only
+                * applicable for weak-ordered memory model archs,
+                * such as IA-64).
+                */
+               wmb();
+               writel(i, rx_ring->tail);
+       }
+
+       return ok;
+}
+
+void ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count)
+{
+       __ixgbe_alloc_rx_buffers_zc(rx_ring, count,
+                                   ixgbe_alloc_buffer_slow_zc);
+}
+
+static bool ixgbe_alloc_rx_buffers_fast_zc(struct ixgbe_ring *rx_ring,
+                                          u16 count)
+{
+       return __ixgbe_alloc_rx_buffers_zc(rx_ring, count,
+                                          ixgbe_alloc_buffer_zc);
+}
+
+static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring,
+                                             struct ixgbe_rx_buffer *bi,
+                                             struct xdp_buff *xdp)
+{
+       unsigned int metasize = xdp->data - xdp->data_meta;
+       unsigned int datasize = xdp->data_end - xdp->data;
+       struct sk_buff *skb;
+
+       /* allocate a skb to store the frags */
+       skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+                              xdp->data_end - xdp->data_hard_start,
+                              GFP_ATOMIC | __GFP_NOWARN);
+       if (unlikely(!skb))
+               return NULL;
+
+       skb_reserve(skb, xdp->data - xdp->data_hard_start);
+       memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+       if (metasize)
+               skb_metadata_set(skb, metasize);
+
+       ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+       return skb;
+}
+
+static void ixgbe_inc_ntc(struct ixgbe_ring *rx_ring)
+{
+       u32 ntc = rx_ring->next_to_clean + 1;
+
+       ntc = (ntc < rx_ring->count) ? ntc : 0;
+       rx_ring->next_to_clean = ntc;
+       prefetch(IXGBE_RX_DESC(rx_ring, ntc));
+}
+
+int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
+                         struct ixgbe_ring *rx_ring,
+                         const int budget)
+{
+       unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+       struct ixgbe_adapter *adapter = q_vector->adapter;
+       u16 cleaned_count = ixgbe_desc_unused(rx_ring);
+       unsigned int xdp_res, xdp_xmit = 0;
+       bool failure = false;
+       struct sk_buff *skb;
+       struct xdp_buff xdp;
+
+       xdp.rxq = &rx_ring->xdp_rxq;
+
+       while (likely(total_rx_packets < budget)) {
+               union ixgbe_adv_rx_desc *rx_desc;
+               struct ixgbe_rx_buffer *bi;
+               unsigned int size;
+
+               /* return some buffers to hardware, one at a time is too slow */
+               if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) {
+                       failure = failure ||
+                                 !ixgbe_alloc_rx_buffers_fast_zc(rx_ring,
+                                                                cleaned_count);
+                       cleaned_count = 0;
+               }
+
+               rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+               size = le16_to_cpu(rx_desc->wb.upper.length);
+               if (!size)
+                       break;
+
+               /* This memory barrier is needed to keep us from reading
+                * any other fields out of the rx_desc until we know the
+                * descriptor has been written back
+                */
+               dma_rmb();
+
+               bi = ixgbe_get_rx_buffer_zc(rx_ring, size);
+
+               if (unlikely(!ixgbe_test_staterr(rx_desc,
+                                                IXGBE_RXD_STAT_EOP))) {
+                       struct ixgbe_rx_buffer *next_bi;
+
+                       ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+                       ixgbe_inc_ntc(rx_ring);
+                       next_bi =
+                              &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+                       next_bi->skb = ERR_PTR(-EINVAL);
+                       continue;
+               }
+
+               if (unlikely(bi->skb)) {
+                       ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+                       ixgbe_inc_ntc(rx_ring);
+                       continue;
+               }
+
+               xdp.data = bi->addr;
+               xdp.data_meta = xdp.data;
+               xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
+               xdp.data_end = xdp.data + size;
+               xdp.handle = bi->handle;
+
+               xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, &xdp);
+
+               if (xdp_res) {
+                       if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) {
+                               xdp_xmit |= xdp_res;
+                               bi->addr = NULL;
+                               bi->skb = NULL;
+                       } else {
+                               ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+                       }
+                       total_rx_packets++;
+                       total_rx_bytes += size;
+
+                       cleaned_count++;
+                       ixgbe_inc_ntc(rx_ring);
+                       continue;
+               }
+
+               /* XDP_PASS path */
+               skb = ixgbe_construct_skb_zc(rx_ring, bi, &xdp);
+               if (!skb) {
+                       rx_ring->rx_stats.alloc_rx_buff_failed++;
+                       break;
+               }
+
+               cleaned_count++;
+               ixgbe_inc_ntc(rx_ring);
+
+               if (eth_skb_pad(skb))
+                       continue;
+
+               total_rx_bytes += skb->len;
+               total_rx_packets++;
+
+               ixgbe_process_skb_fields(rx_ring, rx_desc, skb);
+               ixgbe_rx_skb(q_vector, skb);
+       }
+
+       if (xdp_xmit & IXGBE_XDP_REDIR)
+               xdp_do_flush_map();
+
+       if (xdp_xmit & IXGBE_XDP_TX) {
+               struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
+
+               /* Force memory writes to complete before letting h/w
+                * know there are new descriptors to fetch.
+                */
+               wmb();
+               writel(ring->next_to_use, ring->tail);
+       }
+
+       u64_stats_update_begin(&rx_ring->syncp);
+       rx_ring->stats.packets += total_rx_packets;
+       rx_ring->stats.bytes += total_rx_bytes;
+       u64_stats_update_end(&rx_ring->syncp);
+       q_vector->rx.total_packets += total_rx_packets;
+       q_vector->rx.total_bytes += total_rx_bytes;
+
+       return failure ? budget : (int)total_rx_packets;
+}
+
+void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring)
+{
+       u16 i = rx_ring->next_to_clean;
+       struct ixgbe_rx_buffer *bi = &rx_ring->rx_buffer_info[i];
+
+       while (i != rx_ring->next_to_alloc) {
+               xsk_umem_fq_reuse(rx_ring->xsk_umem, bi->handle);
+               i++;
+               bi++;
+               if (i == rx_ring->count) {
+                       i = 0;
+                       bi = rx_ring->rx_buffer_info;
+               }
+       }
+}