OSDN Git Service

habanalabs: Add a printout with the name of a busy engine
authorTomer Tayar <ttayar@habana.ai>
Thu, 7 Mar 2019 12:26:02 +0000 (14:26 +0200)
committerOded Gabbay <oded.gabbay@gmail.com>
Thu, 7 Mar 2019 12:26:02 +0000 (14:26 +0200)
Print the name of a busy engine when checking if a device is idle.
The change is done mainly to help a user to pinpoint problems in his
topology's recipe.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/habanalabs.h
drivers/misc/habanalabs/habanalabs_ioctl.c

index 3b34627..6309794 100644 (file)
@@ -2783,6 +2783,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
        dma_addr_t fence_dma_addr;
        struct hl_cb *cb;
        u32 tmp, timeout;
+       char buf[16] = {};
        int rc;
 
        if (hdev->pldm)
@@ -2790,9 +2791,10 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
        else
                timeout = HL_DEVICE_TIMEOUT_USEC;
 
-       if (!hdev->asic_funcs->is_device_idle(hdev)) {
+       if (!hdev->asic_funcs->is_device_idle(hdev, buf, sizeof(buf))) {
                dev_err_ratelimited(hdev->dev,
-                       "Can't send KMD job on QMAN0 if device is not idle\n");
+                       "Can't send KMD job on QMAN0 because %s is busy\n",
+                       buf);
                return -EBUSY;
        }
 
@@ -4691,7 +4693,7 @@ static void goya_disable_clock_gating(struct hl_device *hdev)
 
 }
 
-static bool goya_is_device_idle(struct hl_device *hdev)
+static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size)
 {
        u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg;
        int i;
@@ -4703,7 +4705,7 @@ static bool goya_is_device_idle(struct hl_device *hdev)
 
                if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) !=
                                DMA_QM_IDLE_MASK)
-                       return false;
+                       return HL_ENG_BUSY(buf, size, "DMA%d_QM", i);
        }
 
        offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
@@ -4715,31 +4717,31 @@ static bool goya_is_device_idle(struct hl_device *hdev)
 
                if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) !=
                                TPC_QM_IDLE_MASK)
-                       return false;
+                       return HL_ENG_BUSY(buf, size, "TPC%d_QM", i);
 
                if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) !=
                                TPC_CMDQ_IDLE_MASK)
-                       return false;
+                       return HL_ENG_BUSY(buf, size, "TPC%d_CMDQ", i);
 
                if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) !=
                                TPC_CFG_IDLE_MASK)
-                       return false;
+                       return HL_ENG_BUSY(buf, size, "TPC%d_CFG", i);
        }
 
        if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) !=
                        MME_QM_IDLE_MASK)
-               return false;
+               return HL_ENG_BUSY(buf, size, "MME_QM");
 
        if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) !=
                        MME_CMDQ_IDLE_MASK)
-               return false;
+               return HL_ENG_BUSY(buf, size, "MME_CMDQ");
 
        if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) !=
                        MME_ARCH_IDLE_MASK)
-               return false;
+               return HL_ENG_BUSY(buf, size, "MME_ARCH");
 
        if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK)
-               return false;
+               return HL_ENG_BUSY(buf, size, "MME");
 
        return true;
 }
index ed3649d..9cc8417 100644 (file)
@@ -555,7 +555,7 @@ struct hl_asic_funcs {
        int (*send_heartbeat)(struct hl_device *hdev);
        void (*enable_clock_gating)(struct hl_device *hdev);
        void (*disable_clock_gating)(struct hl_device *hdev);
-       bool (*is_device_idle)(struct hl_device *hdev);
+       bool (*is_device_idle)(struct hl_device *hdev, char *buf, size_t size);
        int (*soft_reset_late_init)(struct hl_device *hdev);
        void (*hw_queues_lock)(struct hl_device *hdev);
        void (*hw_queues_unlock)(struct hl_device *hdev);
@@ -1010,6 +1010,12 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
        WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \
                        (val) << REG_FIELD_SHIFT(reg, field))
 
+#define HL_ENG_BUSY(buf, size, fmt, ...) ({ \
+               if (buf) \
+                       snprintf(buf, size, fmt, ##__VA_ARGS__); \
+               false; \
+       })
+
 struct hwmon_chip_info;
 
 /**
index 19b96af..37f9de8 100644 (file)
@@ -93,7 +93,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
        if ((!max_size) || (!out))
                return -EINVAL;
 
-       hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev);
+       hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL, 0);
 
        return copy_to_user(out, &hw_idle,
                min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;