OSDN Git Service

habanalabs: force user to set device debug mode
authorOded Gabbay <oded.gabbay@gmail.com>
Sat, 4 May 2019 14:36:06 +0000 (17:36 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Sat, 4 May 2019 14:36:06 +0000 (17:36 +0300)
This patch adds the implementation of the HL_DEBUG_OP_SET_MODE opcode in
the DEBUG IOCTL.

It forces the user who wants to debug the device to set the device into
debug mode before he can configure the debug engines. The patch also makes
sure to disable debug mode upon user releasing FD, in case the user forgot
to disable debug mode.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/context.c
drivers/misc/habanalabs/device.c
drivers/misc/habanalabs/habanalabs.h
drivers/misc/habanalabs/habanalabs_drv.c
drivers/misc/habanalabs/habanalabs_ioctl.c

index f4c92f1..280f462 100644 (file)
@@ -31,7 +31,9 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
                 * Coresight might be still working by accessing addresses
                 * related to the stopped engines. Hence stop it explicitly.
                 */
-               hdev->asic_funcs->halt_coresight(hdev);
+               if (hdev->in_debug)
+                       hl_device_set_debug_mode(hdev, false);
+
                hl_vm_ctx_fini(ctx);
                hl_asid_free(hdev, ctx->asid);
        }
index 0b19d3e..640d24f 100644 (file)
@@ -231,6 +231,7 @@ static int device_early_init(struct hl_device *hdev)
 
        mutex_init(&hdev->fd_open_cnt_lock);
        mutex_init(&hdev->send_cpu_message_lock);
+       mutex_init(&hdev->debug_lock);
        mutex_init(&hdev->mmu_cache_lock);
        INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
        spin_lock_init(&hdev->hw_queues_mirror_lock);
@@ -262,6 +263,7 @@ early_fini:
 static void device_early_fini(struct hl_device *hdev)
 {
        mutex_destroy(&hdev->mmu_cache_lock);
+       mutex_destroy(&hdev->debug_lock);
        mutex_destroy(&hdev->send_cpu_message_lock);
 
        hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
@@ -420,6 +422,52 @@ int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
        return 1;
 }
 
+int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
+{
+       int rc = 0;
+
+       mutex_lock(&hdev->debug_lock);
+
+       if (!enable) {
+               if (!hdev->in_debug) {
+                       dev_err(hdev->dev,
+                               "Failed to disable debug mode because device was not in debug mode\n");
+                       rc = -EFAULT;
+                       goto out;
+               }
+
+               hdev->asic_funcs->halt_coresight(hdev);
+               hdev->in_debug = 0;
+
+               goto out;
+       }
+
+       if (hdev->in_debug) {
+               dev_err(hdev->dev,
+                       "Failed to enable debug mode because device is already in debug mode\n");
+               rc = -EFAULT;
+               goto out;
+       }
+
+       mutex_lock(&hdev->fd_open_cnt_lock);
+
+       if (atomic_read(&hdev->fd_open_cnt) > 1) {
+               dev_err(hdev->dev,
+                       "Failed to enable debug mode. More then a single user is using the device\n");
+               rc = -EPERM;
+               goto unlock_fd_open_lock;
+       }
+
+       hdev->in_debug = 1;
+
+unlock_fd_open_lock:
+       mutex_unlock(&hdev->fd_open_cnt_lock);
+out:
+       mutex_unlock(&hdev->debug_lock);
+
+       return rc;
+}
+
 /*
  * hl_device_suspend - initiate device suspend
  *
index 7ca97df..f090293 100644 (file)
@@ -1117,6 +1117,7 @@ struct hl_device_reset_work {
  *                    lock here so we can flush user processes which are opening
  *                    the device while we are trying to hard reset it
  * @send_cpu_message_lock: enforces only one message in KMD <-> ArmCP queue.
+ * @debug_lock: protects critical section of setting debug mode for device
  * @asic_prop: ASIC specific immutable properties.
  * @asic_funcs: ASIC specific functions.
  * @asic_specific: ASIC specific information to use only from ASIC files.
@@ -1159,6 +1160,8 @@ struct hl_device_reset_work {
  * @mmu_enable: is MMU enabled.
  * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
  * @dma_mask: the dma mask that was set for this device
+ * @in_debug: is device under debug. This, together with fd_open_cnt, enforces
+ *            that only a single user is configuring the debug infrastructure.
  */
 struct hl_device {
        struct pci_dev                  *pdev;
@@ -1188,6 +1191,7 @@ struct hl_device {
        /* TODO: remove fd_open_cnt_lock for multiple process support */
        struct mutex                    fd_open_cnt_lock;
        struct mutex                    send_cpu_message_lock;
+       struct mutex                    debug_lock;
        struct asic_fixed_properties    asic_prop;
        const struct hl_asic_funcs      *asic_funcs;
        void                            *asic_specific;
@@ -1230,6 +1234,7 @@ struct hl_device {
        u8                              init_done;
        u8                              device_cpu_disabled;
        u8                              dma_mask;
+       u8                              in_debug;
 
        /* Parameters for bring-up */
        u8                              mmu_enable;
@@ -1325,6 +1330,7 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
 int hl_device_open(struct inode *inode, struct file *filp);
 bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
 enum hl_device_status hl_device_status(struct hl_device *hdev);
+int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
 int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
                enum hl_asic_type asic_type, int minor);
 void destroy_hdev(struct hl_device *hdev);
index d8b47bb..42a8c0b 100644 (file)
@@ -105,6 +105,14 @@ int hl_device_open(struct inode *inode, struct file *filp)
                return -EPERM;
        }
 
+       if (hdev->in_debug) {
+               dev_err_ratelimited(hdev->dev,
+                       "Can't open %s because it is being debugged by another user\n",
+                       dev_name(hdev->dev));
+               mutex_unlock(&hdev->fd_open_cnt_lock);
+               return -EPERM;
+       }
+
        if (atomic_read(&hdev->fd_open_cnt)) {
                dev_info_ratelimited(hdev->dev,
                        "Can't open %s because another user is working on it\n",
index b7a0eec..6783751 100644 (file)
@@ -254,10 +254,18 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
        case HL_DEBUG_OP_BMON:
        case HL_DEBUG_OP_SPMU:
        case HL_DEBUG_OP_TIMESTAMP:
+               if (!hdev->in_debug) {
+                       dev_err(hdev->dev,
+                               "Rejecting debug configuration request because device not in debug mode\n");
+                       return -EFAULT;
+               }
                args->input_size =
                        min(args->input_size, hl_debug_struct_size[args->op]);
                rc = debug_coresight(hdev, args);
                break;
+       case HL_DEBUG_OP_SET_MODE:
+               rc = hl_device_set_debug_mode(hdev, (bool) args->enable);
+               break;
        default:
                dev_err(hdev->dev, "Invalid request %d\n", args->op);
                rc = -ENOTTY;