From b02220536cb66ce1e357d78c944d6be07f1e1051 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Wed, 1 Dec 2021 10:52:27 +0200 Subject: [PATCH] habanalabs: wait again for multi-CS if no CS completed The original multi-CS design assumption that stream masters are used exclusively (i.e. multi-CS with set of stream master QIDs will not get completed by CS not from the multi-CS set) is inaccurate. Thus multi-CS behavior is now modified not to treat such case as an error. Instead, if we have multi-CS completion but we detect that no CS from the list is actually completed we will do another multi-CS wait (with modified timeout). Signed-off-by: Ohad Sharabi Reviewed-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/command_submission.c | 97 +++++++++++----------- drivers/misc/habanalabs/common/habanalabs.h | 4 +- 2 files changed, 50 insertions(+), 51 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index a63ebbc04787..f58fff3671d6 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -545,13 +545,6 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) * mcs fences. */ fence->mcs_handling_done = true; - /* - * Since CS (and its related fence) can be associated with only one - * multi CS context, once it triggered multi CS completion no need to - * continue checking other multi CS contexts. - */ - spin_unlock(&mcs_compl->lock); - break; } spin_unlock(&mcs_compl->lock); @@ -2498,6 +2491,21 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, return rc; } +static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs) +{ + if (usecs <= U32_MAX) + return usecs_to_jiffies(usecs); + + /* + * If the value in nanoseconds is larger than 64 bit, use the largest + * 64 bit value. + */ + if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC))) + return nsecs_to_jiffies(U64_MAX); + + return nsecs_to_jiffies(usecs * NSEC_PER_USEC); +} + /* * hl_wait_multi_cs_completion_init - init completion structure * @@ -2534,8 +2542,7 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init( } if (i == MULTI_CS_MAX_USER_CTX) { - dev_err(hdev->dev, - "no available multi-CS completion structure\n"); + dev_err(hdev->dev, "no available multi-CS completion structure\n"); return ERR_PTR(-ENOMEM); } return mcs_compl; @@ -2566,27 +2573,18 @@ static void hl_wait_multi_cs_completion_fini( * * @return 0 on success, otherwise non 0 error code */ -static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data) +static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data, + struct multi_cs_completion *mcs_compl) { - struct hl_device *hdev = mcs_data->ctx->hdev; - struct multi_cs_completion *mcs_compl; long completion_rc; - mcs_compl = hl_wait_multi_cs_completion_init(hdev, - mcs_data->stream_master_qid_map); - if (IS_ERR(mcs_compl)) - return PTR_ERR(mcs_compl); - - completion_rc = wait_for_completion_interruptible_timeout( - &mcs_compl->completion, - usecs_to_jiffies(mcs_data->timeout_us)); + completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion, + mcs_data->timeout_jiffies); /* update timestamp */ if (completion_rc > 0) mcs_data->timestamp = mcs_compl->timestamp; - hl_wait_multi_cs_completion_fini(mcs_compl); - mcs_data->wait_status = completion_rc; return 0; @@ -2619,6 +2617,7 @@ void hl_multi_cs_completion_init(struct hl_device *hdev) */ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) { + struct multi_cs_completion *mcs_compl; struct hl_device *hdev = hpriv->hdev; struct multi_cs_data mcs_data = {0}; union hl_wait_cs_args *args = data; @@ -2686,12 +2685,19 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) goto put_ctx; /* wait (with timeout) for the first CS to be completed */ - mcs_data.timeout_us = args->in.timeout_us; - rc = hl_wait_multi_cs_completion(&mcs_data); - if (rc) + mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us); + + mcs_compl = hl_wait_multi_cs_completion_init(hdev, mcs_data.stream_master_qid_map); + if (IS_ERR(mcs_compl)) { + rc = PTR_ERR(mcs_compl); goto put_ctx; + } + + while (true) { + rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl); + if (rc || (mcs_data.wait_status == 0)) + break; - if (mcs_data.wait_status > 0) { /* * poll fences once again to update the CS map. * no timestamp should be updated this time. @@ -2699,18 +2705,26 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) mcs_data.update_ts = false; rc = hl_cs_poll_fences(&mcs_data); + if (mcs_data.completion_bitmap) + break; + /* * if hl_wait_multi_cs_completion returned before timeout (i.e. - * it got a completion) we expect to see at least one CS - * completed after the poll function. + * it got a completion) it either got completed by CS in the multi CS list + * (in which case the indication will be non empty completion_bitmap) or it + * got completed by CS submitted to one of the shared stream master but + * not in the multi CS list (in which case we should wait again but reinit + * the completion, modify the timeout and set timestamp as zero to let a CS + * related to the current multi-CS set a new, relevant, timestamp) */ - if (!mcs_data.completion_bitmap) { - dev_warn_ratelimited(hdev->dev, - "Multi-CS got completion on wait but no CS completed\n"); - rc = -EFAULT; - } + /* wait again with modified timeout */ + mcs_data.timeout_jiffies = mcs_data.wait_status; + reinit_completion(&mcs_compl->completion); + mcs_compl->timestamp = 0; } + hl_wait_multi_cs_completion_fini(mcs_compl); + put_ctx: hl_ctx_put(ctx); kfree(fence_arr); @@ -2741,7 +2755,7 @@ free_seq_arr: } /* update if some CS was gone */ - if (mcs_data.timestamp) + if (!mcs_data.timestamp) args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; } else { args->out.status = HL_WAIT_CS_STATUS_BUSY; @@ -2807,21 +2821,6 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } -static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs) -{ - if (usecs <= U32_MAX) - return usecs_to_jiffies(usecs); - - /* - * If the value in nanoseconds is larger than 64 bit, use the largest - * 64 bit value. - */ - if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC))) - return nsecs_to_jiffies(U64_MAX); - - return nsecs_to_jiffies(usecs * NSEC_PER_USEC); -} - static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 user_address, u64 target_value, struct hl_user_interrupt *interrupt, diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index df1935952c28..eda1c70f6966 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2362,7 +2362,7 @@ struct multi_cs_completion { * @ctx: pointer to the context structure * @fence_arr: array of fences of all CSs * @seq_arr: array of CS sequence numbers - * @timeout_us: timeout in usec for waiting for CS to complete + * @timeout_jiffies: timeout in jiffies for waiting for CS to complete * @timestamp: timestamp of first completed CS * @wait_status: wait for CS status * @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0) @@ -2376,7 +2376,7 @@ struct multi_cs_data { struct hl_ctx *ctx; struct hl_fence **fence_arr; u64 *seq_arr; - s64 timeout_us; + s64 timeout_jiffies; s64 timestamp; long wait_status; u32 completion_bitmap; -- 2.11.0