OSDN Git Service

eth: bnxt: handle invalid Tx completions more gracefully
authorJakub Kicinski <kuba@kernel.org>
Thu, 20 Jul 2023 01:04:40 +0000 (18:04 -0700)
committerJakub Kicinski <kuba@kernel.org>
Fri, 21 Jul 2023 03:09:13 +0000 (20:09 -0700)
Invalid Tx completions should never happen (tm) but when they do
they crash the host, because driver blindly trusts that there is
a valid skb pointer on the ring.

The completions I've seen appear to be some form of FW / HW
miscalculation or staleness, they have typical (small) values
(<100), but they are most often higher than number of queued
descriptors. They usually happen after boot.

Instead of crashing, print a warning and schedule a reset.

Link: https://lore.kernel.org/r/20230720010440.1967136-4-kuba@kernel.org
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c

index 7b545d2..a3bbd13 100644 (file)
@@ -331,6 +331,22 @@ static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
        rxr->rx_next_cons = 0xffff;
 }
 
+void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
+                         int idx)
+{
+       struct bnxt_napi *bnapi = txr->bnapi;
+
+       if (bnapi->tx_fault)
+               return;
+
+       netdev_err(bp->dev, "Invalid Tx completion (ring:%d tx_pkts:%d cons:%u prod:%u i:%d)",
+                  txr->txq_index, bnapi->tx_pkts,
+                  txr->tx_cons, txr->tx_prod, idx);
+       WARN_ON_ONCE(1);
+       bnapi->tx_fault = 1;
+       bnxt_queue_sp_work(bp, BNXT_RESET_TASK_SP_EVENT);
+}
+
 const u16 bnxt_lhint_arr[] = {
        TX_BD_FLAGS_LHINT_512_AND_SMALLER,
        TX_BD_FLAGS_LHINT_512_TO_1023,
@@ -690,6 +706,11 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
                skb = tx_buf->skb;
                tx_buf->skb = NULL;
 
+               if (unlikely(!skb)) {
+                       bnxt_sched_reset_txr(bp, txr, i);
+                       return;
+               }
+
                tx_bytes += skb->len;
 
                if (tx_buf->is_push) {
@@ -2576,7 +2597,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 
 static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi)
 {
-       if (bnapi->tx_pkts) {
+       if (bnapi->tx_pkts && !bnapi->tx_fault) {
                bnapi->tx_int(bp, bnapi, bnapi->tx_pkts);
                bnapi->tx_pkts = 0;
        }
@@ -9429,6 +9450,8 @@ static void bnxt_enable_napi(struct bnxt *bp)
                struct bnxt_napi *bnapi = bp->bnapi[i];
                struct bnxt_cp_ring_info *cpr;
 
+               bnapi->tx_fault = 0;
+
                cpr = &bnapi->cp_ring;
                if (bnapi->in_reset)
                        cpr->sw_stats.rx.rx_resets++;
index 080e734..9d16757 100644 (file)
@@ -1008,6 +1008,7 @@ struct bnxt_napi {
                                          int);
        int                     tx_pkts;
        u8                      events;
+       u8                      tx_fault:1;
 
        u32                     flags;
 #define BNXT_NAPI_FLAG_XDP     0x1
@@ -2329,6 +2330,8 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num);
 int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init);
 void bnxt_tx_disable(struct bnxt *bp);
 void bnxt_tx_enable(struct bnxt *bp);
+void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
+                         int idx);
 void bnxt_report_link(struct bnxt *bp);
 int bnxt_update_link(struct bnxt *bp, bool chng_link_state);
 int bnxt_hwrm_set_pause(struct bnxt *);
index 4efa5fe..5b6fbdc 100644 (file)
@@ -149,6 +149,7 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
                        tx_buf->action = 0;
                        tx_buf->xdpf = NULL;
                } else if (tx_buf->action == XDP_TX) {
+                       tx_buf->action = 0;
                        rx_doorbell_needed = true;
                        last_tx_cons = tx_cons;
 
@@ -158,6 +159,9 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
                                tx_buf = &txr->tx_buf_ring[tx_cons];
                                page_pool_recycle_direct(rxr->page_pool, tx_buf->page);
                        }
+               } else {
+                       bnxt_sched_reset_txr(bp, txr, i);
+                       return;
                }
                tx_cons = NEXT_TX(tx_cons);
        }