From 0f45d4dc2b15e137346e1e3f064a24302e1c9048 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 2 Jan 2020 18:41:26 -0500 Subject: [PATCH] ac: add ac_build_readlane without optimization barrier Acked-by: Pierre-Eric Pelloux-Prayer --- src/amd/llvm/ac_llvm_build.c | 18 ++++++++++++++---- src/amd/llvm/ac_llvm_build.h | 3 +++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index f789ff5a368..3640c7c22fa 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -3595,8 +3595,6 @@ _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef l LLVMTypeRef type = LLVMTypeOf(src); LLVMValueRef result; - ac_build_optimization_barrier(ctx, &src); - src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); if (lane) lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, ""); @@ -3613,13 +3611,17 @@ _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef l /** * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic. + * + * The optimization barrier is not needed if the value is the same in all lanes + * or if this is called in the outermost block. + * * @param ctx * @param src * @param lane - id of the lane or NULL for the first active lane * @return value of the lane */ -LLVMValueRef -ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane) +LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, + LLVMValueRef src, LLVMValueRef lane) { LLVMTypeRef src_type = LLVMTypeOf(src); src = ac_to_integer(ctx, src); @@ -3649,6 +3651,14 @@ ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef la } LLVMValueRef +ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane) +{ + ac_build_optimization_barrier(ctx, &src); + + return ac_build_readlane_no_opt_barrier(ctx, src, lane); +} + +LLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane) { return ac_build_intrinsic(ctx, "llvm.amdgcn.writelane", ctx->i32, diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 56ec7613009..4be06a9c104 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -655,6 +655,9 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask); +LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, + LLVMValueRef src, LLVMValueRef lane); + LLVMValueRef ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane); -- 2.11.0