OSDN Git Service

pds_core: add devlink health facilities
authorShannon Nelson <shannon.nelson@amd.com>
Wed, 19 Apr 2023 17:04:17 +0000 (10:04 -0700)
committerDavid S. Miller <davem@davemloft.net>
Fri, 21 Apr 2023 07:29:12 +0000 (08:29 +0100)
Add devlink health reporting on top of our fw watchdog.

Example:
  # devlink health show pci/0000:2b:00.0 reporter fw
  pci/0000:2b:00.0:
    reporter fw
      state healthy error 0 recover 0
  # devlink health diagnose pci/0000:2b:00.0 reporter fw
   Status: healthy State: 1 Generation: 0 Recoveries: 0

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
drivers/net/ethernet/amd/pds_core/Makefile
drivers/net/ethernet/amd/pds_core/core.c
drivers/net/ethernet/amd/pds_core/core.h
drivers/net/ethernet/amd/pds_core/devlink.c [new file with mode: 0644]
drivers/net/ethernet/amd/pds_core/main.c

index 99a7002..5b88173 100644 (file)
@@ -26,6 +26,18 @@ messages such as these::
   pds_core 0000:b6:00.0: 252.048 Gb/s available PCIe bandwidth (16.0 GT/s PCIe x16 link)
   pds_core 0000:b6:00.0: FW: 1.60.0-73
 
+Health Reporters
+================
+
+The driver supports a devlink health reporter for FW status::
+
+  # devlink health show pci/0000:2b:00.0 reporter fw
+  pci/0000:2b:00.0:
+    reporter fw
+      state healthy error 0 recover 0
+  # devlink health diagnose pci/0000:2b:00.0 reporter fw
+   Status: healthy State: 1 Generation: 0 Recoveries: 0
+
 Support
 =======
 
index 95a6c31..eaca855 100644 (file)
@@ -4,6 +4,7 @@
 obj-$(CONFIG_PDS_CORE) := pds_core.o
 
 pds_core-y := main.o \
+             devlink.o \
              dev.o \
              core.o
 
index 701d274..ab85313 100644 (file)
@@ -5,7 +5,7 @@
 
 int pdsc_setup(struct pdsc *pdsc, bool init)
 {
-       int err = 0;
+       int err;
 
        if (init)
                err = pdsc_dev_init(pdsc);
@@ -42,6 +42,8 @@ static void pdsc_fw_down(struct pdsc *pdsc)
                return;
        }
 
+       devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
+
        pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
 }
 
@@ -58,6 +60,10 @@ static void pdsc_fw_up(struct pdsc *pdsc)
        if (err)
                goto err_out;
 
+       pdsc->fw_recoveries++;
+       devlink_health_reporter_state_update(pdsc->fw_reporter,
+                                            DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+
        return;
 
 err_out:
index 83c528a..32aa38c 100644 (file)
@@ -68,6 +68,8 @@ struct pdsc {
        struct timer_list wdtimer;
        unsigned int wdtimer_period;
        struct work_struct health_work;
+       struct devlink_health_reporter *fw_reporter;
+       u32 fw_recoveries;
 
        struct pdsc_devinfo dev_info;
        struct pds_core_dev_identity dev_ident;
@@ -88,6 +90,10 @@ struct pdsc {
        u64 __iomem *kern_dbpage;
 };
 
+int pdsc_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+                             struct devlink_fmsg *fmsg,
+                             struct netlink_ext_ack *extack);
+
 void pdsc_debugfs_create(void);
 void pdsc_debugfs_destroy(void);
 void pdsc_debugfs_add_dev(struct pdsc *pdsc);
diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
new file mode 100644 (file)
index 0000000..3b05b1a
--- /dev/null
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include "core.h"
+
+int pdsc_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+                             struct devlink_fmsg *fmsg,
+                             struct netlink_ext_ack *extack)
+{
+       struct pdsc *pdsc = devlink_health_reporter_priv(reporter);
+       int err;
+
+       mutex_lock(&pdsc->config_lock);
+
+       if (test_bit(PDSC_S_FW_DEAD, &pdsc->state))
+               err = devlink_fmsg_string_pair_put(fmsg, "Status", "dead");
+       else if (!pdsc_is_fw_good(pdsc))
+               err = devlink_fmsg_string_pair_put(fmsg, "Status", "unhealthy");
+       else
+               err = devlink_fmsg_string_pair_put(fmsg, "Status", "healthy");
+
+       mutex_unlock(&pdsc->config_lock);
+
+       if (err)
+               return err;
+
+       err = devlink_fmsg_u32_pair_put(fmsg, "State",
+                                       pdsc->fw_status &
+                                               ~PDS_CORE_FW_STS_F_GENERATION);
+       if (err)
+               return err;
+
+       err = devlink_fmsg_u32_pair_put(fmsg, "Generation",
+                                       pdsc->fw_generation >> 4);
+       if (err)
+               return err;
+
+       return devlink_fmsg_u32_pair_put(fmsg, "Recoveries",
+                                        pdsc->fw_recoveries);
+}
index c9fbf1d..54f3aed 100644 (file)
@@ -130,10 +130,16 @@ static int pdsc_init_vf(struct pdsc *vf)
        return -1;
 }
 
+static const struct devlink_health_reporter_ops pdsc_fw_reporter_ops = {
+       .name = "fw",
+       .diagnose = pdsc_fw_reporter_diagnose,
+};
+
 #define PDSC_WQ_NAME_LEN 24
 
 static int pdsc_init_pf(struct pdsc *pdsc)
 {
+       struct devlink_health_reporter *hr;
        char wq_name[PDSC_WQ_NAME_LEN];
        struct devlink *dl;
        int err;
@@ -172,6 +178,16 @@ static int pdsc_init_pf(struct pdsc *pdsc)
 
        dl = priv_to_devlink(pdsc);
        devl_lock(dl);
+
+       hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc);
+       if (IS_ERR(hr)) {
+               dev_warn(pdsc->dev, "Failed to create fw reporter: %pe\n", hr);
+               err = PTR_ERR(hr);
+               devl_unlock(dl);
+               goto err_out_teardown;
+       }
+       pdsc->fw_reporter = hr;
+
        devl_register(dl);
        devl_unlock(dl);
 
@@ -180,6 +196,8 @@ static int pdsc_init_pf(struct pdsc *pdsc)
 
        return 0;
 
+err_out_teardown:
+       pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
 err_out_unmap_bars:
        mutex_unlock(&pdsc->config_lock);
        del_timer_sync(&pdsc->wdtimer);
@@ -283,6 +301,10 @@ static void pdsc_remove(struct pci_dev *pdev)
        dl = priv_to_devlink(pdsc);
        devl_lock(dl);
        devl_unregister(dl);
+       if (pdsc->fw_reporter) {
+               devl_health_reporter_destroy(pdsc->fw_reporter);
+               pdsc->fw_reporter = NULL;
+       }
        devl_unlock(dl);
 
        if (!pdev->is_virtfn) {