OSDN Git Service

habanalabs: halt the engines before hard-reset
authorOded Gabbay <oded.gabbay@gmail.com>
Mon, 23 Dec 2019 15:51:48 +0000 (17:51 +0200)
committerOded Gabbay <oded.gabbay@gmail.com>
Tue, 11 Feb 2020 09:12:47 +0000 (11:12 +0200)
The driver must halt the engines before doing hard-reset, otherwise the
device can go into undefined state. There is a place where the driver
didn't do that and this patch fixes it.

Reviewed-by: Tomer Tayar <ttayar@habana.ai>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/device.c
drivers/misc/habanalabs/goya/goya.c

index b155e95..166883b 100644 (file)
@@ -1189,6 +1189,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
        if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
                dev_info(hdev->dev,
                        "H/W state is dirty, must reset before initializing\n");
+               hdev->asic_funcs->halt_engines(hdev, true);
                hdev->asic_funcs->hw_fini(hdev, true);
        }
 
index 7344e8a..f24fe90 100644 (file)
@@ -895,6 +895,11 @@ void goya_init_dma_qmans(struct hl_device *hdev)
  */
 static void goya_disable_external_queues(struct hl_device *hdev)
 {
+       struct goya_device *goya = hdev->asic_specific;
+
+       if (!(goya->hw_cap_initialized & HW_CAP_DMA))
+               return;
+
        WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
        WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
        WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
@@ -956,6 +961,11 @@ static int goya_stop_external_queues(struct hl_device *hdev)
 {
        int rc, retval = 0;
 
+       struct goya_device *goya = hdev->asic_specific;
+
+       if (!(goya->hw_cap_initialized & HW_CAP_DMA))
+               return retval;
+
        rc = goya_stop_queue(hdev,
                        mmDMA_QM_0_GLBL_CFG1,
                        mmDMA_QM_0_CP_STS,
@@ -1744,9 +1754,18 @@ void goya_init_tpc_qmans(struct hl_device *hdev)
  */
 static void goya_disable_internal_queues(struct hl_device *hdev)
 {
+       struct goya_device *goya = hdev->asic_specific;
+
+       if (!(goya->hw_cap_initialized & HW_CAP_MME))
+               goto disable_tpc;
+
        WREG32(mmMME_QM_GLBL_CFG0, 0);
        WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
 
+disable_tpc:
+       if (!(goya->hw_cap_initialized & HW_CAP_TPC))
+               return;
+
        WREG32(mmTPC0_QM_GLBL_CFG0, 0);
        WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
 
@@ -1782,8 +1801,12 @@ static void goya_disable_internal_queues(struct hl_device *hdev)
  */
 static int goya_stop_internal_queues(struct hl_device *hdev)
 {
+       struct goya_device *goya = hdev->asic_specific;
        int rc, retval = 0;
 
+       if (!(goya->hw_cap_initialized & HW_CAP_MME))
+               goto stop_tpc;
+
        /*
         * Each queue (QMAN) is a separate H/W logic. That means that each
         * QMAN can be stopped independently and failure to stop one does NOT
@@ -1810,6 +1833,10 @@ static int goya_stop_internal_queues(struct hl_device *hdev)
                retval = -EIO;
        }
 
+stop_tpc:
+       if (!(goya->hw_cap_initialized & HW_CAP_TPC))
+               return retval;
+
        rc = goya_stop_queue(hdev,
                        mmTPC0_QM_GLBL_CFG1,
                        mmTPC0_QM_CP_STS,
@@ -1975,6 +2002,11 @@ static int goya_stop_internal_queues(struct hl_device *hdev)
 
 static void goya_dma_stall(struct hl_device *hdev)
 {
+       struct goya_device *goya = hdev->asic_specific;
+
+       if (!(goya->hw_cap_initialized & HW_CAP_DMA))
+               return;
+
        WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
        WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
        WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
@@ -1984,6 +2016,11 @@ static void goya_dma_stall(struct hl_device *hdev)
 
 static void goya_tpc_stall(struct hl_device *hdev)
 {
+       struct goya_device *goya = hdev->asic_specific;
+
+       if (!(goya->hw_cap_initialized & HW_CAP_TPC))
+               return;
+
        WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
        WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
        WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
@@ -1996,6 +2033,11 @@ static void goya_tpc_stall(struct hl_device *hdev)
 
 static void goya_mme_stall(struct hl_device *hdev)
 {
+       struct goya_device *goya = hdev->asic_specific;
+
+       if (!(goya->hw_cap_initialized & HW_CAP_MME))
+               return;
+
        WREG32(mmMME_STALL, 0xFFFFFFFF);
 }