From 7fdd553b6622b759e8bbdda3c55cd5e4246fe65c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 15 Oct 2014 17:58:34 +0000 Subject: [PATCH] R600: Fix miscompiles when BFE has multiple uses SimplifyDemandedBits would break the other uses of the operand. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219819 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 17 ++++++++++------- test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll | 22 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index bac4ca03b33..fa384db230b 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -2170,13 +2170,16 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, BitsFrom, ShiftVal); } - APInt KnownZero, KnownOne; - TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), - !DCI.isBeforeLegalizeOps()); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) || - TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) { - DCI.CommitTargetLoweringOpt(TLO); + if (BitsFrom.hasOneUse()) { + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) || + TLI.SimplifyDemandedBits(BitsFrom, Demanded, + KnownZero, KnownOne, TLO)) { + DCI.CommitTargetLoweringOpt(TLO); + } } break; diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll index d065c2384f1..1d239af7aa4 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll @@ -552,3 +552,25 @@ define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 ret void } + +; Make sure that SimplifyDemandedBits doesn't cause the and to be +; reduced to the bits demanded by the bfe. + +; XXX: The operand to v_bfe_u32 could also just directly be the load register. +; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg: +; SI: BUFFER_LOAD_DWORD [[ARG:v[0-9]+]] +; SI: V_AND_B32_e32 [[AND:v[0-9]+]], 63, [[ARG]] +; SI: V_BFE_U32 [[BFE:v[0-9]+]], [[AND]], 2, 2 +; SI-DAG: BUFFER_STORE_DWORD [[AND]] +; SI-DAG: BUFFER_STORE_DWORD [[BFE]] +; SI: S_ENDPGM +define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, + i32 addrspace(1)* %out1, + i32 addrspace(1)* %in) nounwind { + %src = load i32 addrspace(1)* %in, align 4 + %and = and i32 %src, 63 + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4 + store i32 %and, i32 addrspace(1)* %out1, align 4 + ret void +} -- 2.11.0