From 20cd88a775381e3ea4c2e88447a79f8715db1af7 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Mon, 4 Jul 2022 08:33:57 +0300 Subject: [PATCH] habanalabs: fixes to the poll-timeout macros - use conventional internal macro variables (double underscore prefix) - adjust address casting - on register poll using ELBI use ELBI read rather than BAR read on error condition - remove unused macro Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 119 +++++++++++++++++++++------- 1 file changed, 90 insertions(+), 29 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 72cb12f2068a..3c51eaca521c 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2473,9 +2473,11 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); /* Timeout should be longer when working with simulator but cap the * increased timeout to some maximum */ -#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \ +#define hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, elbi) \ ({ \ ktime_t __timeout; \ + u32 __elbi_read; \ + int __rc = 0; \ if (hdev->pdev) \ __timeout = ktime_add_us(ktime_get(), timeout_us); \ else \ @@ -2484,19 +2486,103 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); (u64) HL_SIM_MAX_TIMEOUT_US)); \ might_sleep_if(sleep_us); \ for (;;) { \ - (val) = RREG32(addr); \ + if (elbi) { \ + __rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \ + if (__rc) \ + break; \ + (val) = __elbi_read; \ + } else {\ + (val) = RREG32((u32)addr); \ + } \ if (cond) \ break; \ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ - (val) = RREG32(addr); \ + if (elbi) { \ + __rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \ + if (__rc) \ + break; \ + (val) = __elbi_read; \ + } else {\ + (val) = RREG32((u32)addr); \ + } \ break; \ } \ if (sleep_us) \ usleep_range((sleep_us >> 2) + 1, sleep_us); \ } \ - (cond) ? 0 : -ETIMEDOUT; \ + __rc ? __rc : ((cond) ? 0 : -ETIMEDOUT); \ }) +#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \ + hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, false) + +#define hl_poll_timeout_elbi(hdev, addr, val, cond, sleep_us, timeout_us) \ + hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, true) + +/* + * poll array of register addresses. + * condition is satisfied if all registers values match the expected value. + * once some register in the array satisfies the condition it will not be polled again, + * this is done both for efficiency and due to some registers are "clear on read". + * TODO: use read from PCI bar in other places in the code (SW-91406) + */ +#define hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \ + timeout_us, elbi) \ +({ \ + ktime_t __timeout; \ + u64 __elem_bitmask; \ + u32 __read_val; \ + u8 __arr_idx; \ + int __rc = 0; \ + \ + if (hdev->pdev) \ + __timeout = ktime_add_us(ktime_get(), timeout_us); \ + else \ + __timeout = ktime_add_us(ktime_get(),\ + min(((u64)timeout_us * 10), \ + (u64) HL_SIM_MAX_TIMEOUT_US)); \ + \ + might_sleep_if(sleep_us); \ + if (arr_size >= 64) \ + __rc = -EINVAL; \ + else \ + __elem_bitmask = BIT_ULL(arr_size) - 1; \ + for (;;) { \ + if (__rc) \ + break; \ + for (__arr_idx = 0; __arr_idx < (arr_size); __arr_idx++) { \ + if (!(__elem_bitmask & BIT_ULL(__arr_idx))) \ + continue; \ + if (elbi) { \ + __rc = hl_pci_elbi_read(hdev, (addr_arr)[__arr_idx], &__read_val); \ + if (__rc) \ + break; \ + } else { \ + __read_val = RREG32((u32)(addr_arr)[__arr_idx]); \ + } \ + if (__read_val == (expected_val)) \ + __elem_bitmask &= ~BIT_ULL(__arr_idx); \ + } \ + if (__rc || (__elem_bitmask == 0)) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \ + break; \ + if (sleep_us) \ + usleep_range((sleep_us >> 2) + 1, sleep_us); \ + } \ + __rc ? __rc : ((__elem_bitmask == 0) ? 0 : -ETIMEDOUT); \ +}) + +#define hl_poll_reg_array_timeout(hdev, addr_arr, arr_size, expected_val, sleep_us, \ + timeout_us) \ + hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \ + timeout_us, false) + +#define hl_poll_reg_array_timeout_elbi(hdev, addr_arr, arr_size, expected_val, sleep_us, \ + timeout_us) \ + hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \ + timeout_us, true) + /* * address in this macro points always to a memory location in the * host's (server's) memory. That location is updated asynchronously @@ -2540,31 +2626,6 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); (cond) ? 0 : -ETIMEDOUT; \ }) -#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \ - timeout_us) \ -({ \ - ktime_t __timeout; \ - if (hdev->pdev) \ - __timeout = ktime_add_us(ktime_get(), timeout_us); \ - else \ - __timeout = ktime_add_us(ktime_get(),\ - min((u64)(timeout_us * 10), \ - (u64) HL_SIM_MAX_TIMEOUT_US)); \ - might_sleep_if(sleep_us); \ - for (;;) { \ - (val) = readl(addr); \ - if (cond) \ - break; \ - if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ - (val) = readl(addr); \ - break; \ - } \ - if (sleep_us) \ - usleep_range((sleep_us >> 2) + 1, sleep_us); \ - } \ - (cond) ? 0 : -ETIMEDOUT; \ -}) - #define HL_USR_MAPPED_BLK_INIT(blk, base, sz) \ ({ \ struct user_mapped_block *p = blk; \ -- 2.11.0