}
static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr,
- u64 cb_handle, u32 *usage_cnt)
+ u64 cb_handle, u32 flags, u32 *usage_cnt, u64 *device_va)
{
+ struct hl_vm_va_block *va_block;
struct hl_cb *cb;
u32 handle;
int rc = 0;
goto out;
}
- *usage_cnt = atomic_read(&cb->cs_cnt);
+ if (flags & HL_CB_FLAGS_GET_DEVICE_VA) {
+ va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node);
+ if (va_block) {
+ *device_va = va_block->start;
+ } else {
+ dev_err(hdev->dev, "CB is not mapped to the device's MMU\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ } else {
+ *usage_cnt = atomic_read(&cb->cs_cnt);
+ }
out:
spin_unlock(&mgr->cb_lock);
union hl_cb_args *args = data;
struct hl_device *hdev = hpriv->hdev;
enum hl_device_status status;
- u64 handle = 0;
+ u64 handle = 0, device_va;
u32 usage_cnt = 0;
int rc;
case HL_CB_OP_INFO:
rc = hl_cb_info(hdev, &hpriv->cb_mgr, args->in.cb_handle,
- &usage_cnt);
- memset(args, 0, sizeof(*args));
- args->out.usage_cnt = usage_cnt;
+ args->in.flags,
+ &usage_cnt,
+ &device_va);
+
+ memset(&args->out, 0, sizeof(args->out));
+
+ if (args->in.flags & HL_CB_FLAGS_GET_DEVICE_VA)
+ args->out.device_va = device_va;
+ else
+ args->out.usage_cnt = usage_cnt;
break;
default:
}
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
+ struct hl_cb_mgr *cb_mgr, u64 timeout_us,
+ u64 cq_counters_handle, u64 cq_counters_offset,
+ u64 target_value, struct hl_user_interrupt *interrupt,
+ u32 *status,
+ u64 *timestamp)
+{
+ struct hl_user_pending_interrupt *pend;
+ unsigned long timeout, flags;
+ long completion_rc;
+ struct hl_cb *cb;
+ int rc = 0;
+ u32 handle;
+
+ timeout = hl_usecs64_to_jiffies(timeout_us);
+
+ hl_ctx_get(hdev, ctx);
+
+ cq_counters_handle >>= PAGE_SHIFT;
+ handle = (u32) cq_counters_handle;
+
+ cb = hl_cb_get(hdev, cb_mgr, handle);
+ if (!cb) {
+ hl_ctx_put(ctx);
+ return -EINVAL;
+ }
+
+ pend = kzalloc(sizeof(*pend), GFP_KERNEL);
+ if (!pend) {
+ hl_cb_put(cb);
+ hl_ctx_put(ctx);
+ return -ENOMEM;
+ }
+
+ hl_fence_init(&pend->fence, ULONG_MAX);
+
+ pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
+ pend->cq_target_value = target_value;
+
+ /* We check for completion value as interrupt could have been received
+ * before we added the node to the wait list
+ */
+ if (*pend->cq_kernel_addr >= target_value) {
+ *status = HL_WAIT_CS_STATUS_COMPLETED;
+ /* There was no interrupt, we assume the completion is now. */
+ pend->fence.timestamp = ktime_get();
+ }
+
+ if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
+ goto set_timestamp;
+
+ /* Add pending user interrupt to relevant list for the interrupt
+ * handler to monitor
+ */
+ spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+ list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
+ spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+
+ /* Wait for interrupt handler to signal completion */
+ completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
+ timeout);
+ if (completion_rc > 0) {
+ *status = HL_WAIT_CS_STATUS_COMPLETED;
+ } else {
+ if (completion_rc == -ERESTARTSYS) {
+ dev_err_ratelimited(hdev->dev,
+ "user process got signal while waiting for interrupt ID %d\n",
+ interrupt->interrupt_id);
+ rc = -EINTR;
+ *status = HL_WAIT_CS_STATUS_ABORTED;
+ } else {
+ if (pend->fence.error == -EIO) {
+ dev_err_ratelimited(hdev->dev,
+ "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
+ pend->fence.error);
+ rc = -EIO;
+ *status = HL_WAIT_CS_STATUS_ABORTED;
+ } else {
+ dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n",
+ interrupt->interrupt_id);
+ rc = -ETIMEDOUT;
+ }
+ *status = HL_WAIT_CS_STATUS_BUSY;
+ }
+ }
+
+ spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+ list_del(&pend->wait_list_node);
+ spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+
+set_timestamp:
+ *timestamp = ktime_to_ns(pend->fence.timestamp);
+
+ kfree(pend);
+ hl_cb_put(cb);
+ hl_ctx_put(ctx);
+
+ return rc;
+}
+
+static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx,
u64 timeout_us, u64 user_address,
u64 target_value, struct hl_user_interrupt *interrupt,
hl_ctx_get(hdev, ctx);
- pend = kmalloc(sizeof(*pend), GFP_KERNEL);
+ pend = kzalloc(sizeof(*pend), GFP_KERNEL);
if (!pend) {
hl_ctx_put(ctx);
return -ENOMEM;
else
interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
- rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
+ if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
+ rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr,
+ args->in.interrupt_timeout_us, args->in.cq_counters_handle,
+ args->in.cq_counters_offset,
+ args->in.target, interrupt, &status,
+ ×tamp);
+ else
+ rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
args->in.interrupt_timeout_us, args->in.addr,
args->in.target, interrupt, &status,
×tamp);
* pending on an interrupt
* @wait_list_node: node in the list of user threads pending on an interrupt
* @fence: hl fence object for interrupt completion
+ * @cq_target_value: CQ target value
+ * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt
+ * handler for taget value comparison
*/
struct hl_user_pending_interrupt {
struct list_head wait_list_node;
struct hl_fence fence;
+ u64 cq_target_value;
+ u64 *cq_kernel_addr;
};
/**
spin_lock(&user_cq->wait_list_lock);
list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) {
- pend->fence.timestamp = now;
- complete_all(&pend->fence.completion);
+ if ((pend->cq_kernel_addr &&
+ *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
+ !pend->cq_kernel_addr) {
+ pend->fence.timestamp = now;
+ complete_all(&pend->fence.completion);
+ }
}
spin_unlock(&user_cq->wait_list_lock);
}
#define HL_MAX_CB_SIZE (0x200000 - 32)
/* Indicates whether the command buffer should be mapped to the device's MMU */
-#define HL_CB_FLAGS_MAP 0x1
+#define HL_CB_FLAGS_MAP 0x1
+
+/* Used with HL_CB_OP_INFO opcode to get the device va address for kernel mapped CB */
+#define HL_CB_FLAGS_GET_DEVICE_VA 0x2
struct hl_cb_in {
/* Handle of CB or 0 if we want to create one */
/* Handle of CB */
__u64 cb_handle;
- /* Information about CB */
- struct {
- /* Usage count of CB */
- __u32 usage_cnt;
- __u32 pad;
+ union {
+ /* Information about CB */
+ struct {
+ /* Usage count of CB */
+ __u32 usage_cnt;
+ __u32 pad;
+ };
+
+ /* CB mapped address to device MMU */
+ __u64 device_va;
};
};
};
struct hl_cs_out out;
};
-#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2
-#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
-#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4
+#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2
+#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
+#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4
+#define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10
#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32
};
struct {
- /* User address for completion comparison.
- * upon interrupt, driver will compare the value pointed
- * by this address with the supplied target value.
- * in order not to perform any comparison, set address
- * to all 1s.
- * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
- */
- __u64 addr;
+ union {
+ /* User address for completion comparison.
+ * upon interrupt, driver will compare the value pointed
+ * by this address with the supplied target value.
+ * in order not to perform any comparison, set address
+ * to all 1s.
+ * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
+ */
+ __u64 addr;
+
+ /* cq_counters_handle to a kernel mapped cb which contains
+ * cq counters.
+ * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set
+ */
+ __u64 cq_counters_handle;
+ };
+
/* Target value for completion comparison */
__u64 target;
};
*/
__u64 interrupt_timeout_us;
};
+
+ /*
+ * cq counter offset inside the counters cb pointed by cq_counters_handle above.
+ * upon interrupt, driver will compare the value pointed
+ * by this address (cq_counters_handle + cq_counters_offset)
+ * with the supplied target value.
+ * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set
+ */
+ __u64 cq_counters_offset;
};
#define HL_WAIT_CS_STATUS_COMPLETED 0