From: Matt Arsenault Date: Fri, 12 Dec 2014 02:30:37 +0000 (+0000) Subject: R600: Fix min/max matching problems with unordered compares X-Git-Tag: android-x86-7.1-r4~54490 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=002ca4ca3f0f0e7d4c1a20f0f15843674e6051af;p=android-x86%2Fexternal-llvm.git R600: Fix min/max matching problems with unordered compares The returned operand needs to be permuted for the unordered compares. Also fix incorrectly producing fmin_legacy / fmax_legacy for f64, which don't exist. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224094 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 0e34b4625c8..5783d4398ed 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1038,17 +1038,21 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, } /// \brief Generate Min/Max node -SDValue AMDGPUTargetLowering::CombineFMinMax(SDLoc DL, - EVT VT, - SDValue LHS, - SDValue RHS, - SDValue True, - SDValue False, - SDValue CC, - SelectionDAG &DAG) const { +SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, + DAGCombinerInfo &DCI) const { + if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + return SDValue(); + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) return SDValue(); + SelectionDAG &DAG = DCI.DAG; ISD::CondCode CCOpcode = cast(CC)->get(); switch (CCOpcode) { case ISD::SETOEQ: @@ -1065,33 +1069,51 @@ SDValue AMDGPUTargetLowering::CombineFMinMax(SDLoc DL, case ISD::SETO: break; case ISD::SETULE: - case ISD::SETULT: + case ISD::SETULT: { + // Unordered. + // + // We will allow this before legalization since we expand unordered compares + // ordinarily. + if (LHS == True) + return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); + return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); + } case ISD::SETOLE: case ISD::SETOLT: case ISD::SETLE: case ISD::SETLT: { - if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - break; + // Ordered. Assume ordered for undefined. + + // Only do this after legalization to avoid interfering with other combines + // which might occur. + if (DCI.getDAGCombineLevel() < AfterLegalizeDAG && + !DCI.isCalledByLegalizer()) + return SDValue(); // We need to permute the operands to get the correct NaN behavior. The // selected operand is the second one based on the failing compare with NaN, // so permute it based on the compare type the hardware uses. if (LHS == True) - return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); - return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); + return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); + return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); + } + case ISD::SETUGE: + case ISD::SETUGT: { + if (LHS == True) + return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); + return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); } case ISD::SETGT: case ISD::SETGE: - case ISD::SETUGE: case ISD::SETOGE: - case ISD::SETUGT: case ISD::SETOGT: { - if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - break; + if (DCI.getDAGCombineLevel() < AfterLegalizeDAG && + !DCI.isCalledByLegalizer()) + return SDValue(); if (LHS == True) - return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); - return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); + return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); + return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); } case ISD::SETCC_INVALID: llvm_unreachable("Invalid setcc condcode!"); @@ -2276,24 +2298,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, simplifyI24(N1, DCI); return SDValue(); } - case ISD::SELECT_CC: { - SDLoc DL(N); - EVT VT = N->getValueType(0); - - if (VT == MVT::f32 || - (VT == MVT::f64 && - Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDValue True = N->getOperand(2); - SDValue False = N->getOperand(3); - SDValue CC = N->getOperand(4); - - return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); - } - - break; - } case ISD::SELECT: { SDValue Cond = N->getOperand(0); if (Cond.getOpcode() == ISD::SETCC) { @@ -2306,11 +2310,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, SDValue True = N->getOperand(1); SDValue False = N->getOperand(2); - if (VT == MVT::f32 || - (VT == MVT::f64 && - Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) { - return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); - } + if (VT == MVT::f32) + return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI); // TODO: Implement min / max Evergreen instructions. if (VT == MVT::i32 && diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 7386eaea73d..64ec0245f98 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -145,14 +145,14 @@ public: SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const; - SDValue CombineFMinMax(SDLoc DL, - EVT VT, - SDValue LHS, - SDValue RHS, - SDValue True, - SDValue False, - SDValue CC, - SelectionDAG &DAG) const; + SDValue CombineFMinMaxLegacy(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, + DAGCombinerInfo &DCI) const; SDValue CombineIMinMax(SDLoc DL, EVT VT, SDValue LHS, diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 1a84546fa5a..fb7514e26da 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1118,6 +1118,13 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const SDValue CC = Op.getOperand(4); SDValue Temp; + if (VT == MVT::f32) { + DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr); + SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI); + if (MinMax) + return MinMax; + } + // LHS and RHS are guaranteed to be the same value type EVT CompareVT = LHS.getValueType(); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index aeba3518f7b..8c3f1403ca2 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1514,6 +1514,7 @@ let isCommutable = 1 in { defm V_MAC_LEGACY_F32 : VOP2Inst , "v_mac_legacy_f32", VOP_F32_F32_F32 >; +} // End isCommutable = 1 defm V_MIN_LEGACY_F32 : VOP2Inst , "v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy @@ -1522,6 +1523,7 @@ defm V_MAX_LEGACY_F32 : VOP2Inst , "v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy >; +let isCommutable = 1 in { defm V_LSHR_B32 : VOP2Inst , "v_lshr_b32", VOP_I32_I32_I32, srl>; defm V_ASHR_I32 : VOP2Inst , "v_ashr_i32", VOP_I32_I32_I32, sra diff --git a/test/CodeGen/R600/fmax_legacy.f64.ll b/test/CodeGen/R600/fmax_legacy.f64.ll new file mode 100644 index 00000000000..a615825a45d --- /dev/null +++ b/test/CodeGen/R600/fmax_legacy.f64.ll @@ -0,0 +1,67 @@ +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; Make sure we don't try to form FMAX_LEGACY nodes with f64 + +declare i32 @llvm.r600.read.tidig.x() #1 + +; FUNC-LABEL: @test_fmax_legacy_uge_f64 +define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp uge double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_fmax_legacy_oge_f64 +define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp oge double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_fmax_legacy_ugt_f64 +define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp ugt double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_fmax_legacy_ogt_f64 +define void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp ogt double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/fmax_legacy.ll b/test/CodeGen/R600/fmax_legacy.ll index 8415f34d04c..43e9de10880 100644 --- a/test/CodeGen/R600/fmax_legacy.ll +++ b/test/CodeGen/R600/fmax_legacy.ll @@ -6,7 +6,7 @@ declare i32 @llvm.r600.read.tidig.x() #1 ; FUNC-LABEL: @test_fmax_legacy_uge_f32 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] ; EG: MAX define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() #1 @@ -44,7 +44,7 @@ define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace( ; FUNC-LABEL: @test_fmax_legacy_ugt_f32 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] ; EG: MAX define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() #1 diff --git a/test/CodeGen/R600/fmin_legacy.f64.ll b/test/CodeGen/R600/fmin_legacy.f64.ll new file mode 100644 index 00000000000..51dcd06f939 --- /dev/null +++ b/test/CodeGen/R600/fmin_legacy.f64.ll @@ -0,0 +1,77 @@ +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.r600.read.tidig.x() #1 + +; FUNC-LABEL: @test_fmin_legacy_f64 +define void @test_fmin_legacy_f64(<4 x double> addrspace(1)* %out, <4 x double> inreg %reg0) #0 { + %r0 = extractelement <4 x double> %reg0, i32 0 + %r1 = extractelement <4 x double> %reg0, i32 1 + %r2 = fcmp uge double %r0, %r1 + %r3 = select i1 %r2, double %r1, double %r0 + %vec = insertelement <4 x double> undef, double %r3, i32 0 + store <4 x double> %vec, <4 x double> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: @test_fmin_legacy_ule_f64 +define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp ule double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_fmin_legacy_ole_f64 +define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp ole double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_fmin_legacy_olt_f64 +define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp olt double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_fmin_legacy_ult_f64 +define void @test_fmin_legacy_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp ult double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/fmin_legacy.ll b/test/CodeGen/R600/fmin_legacy.ll index 3f60b609b40..229d008bada 100644 --- a/test/CodeGen/R600/fmin_legacy.ll +++ b/test/CodeGen/R600/fmin_legacy.ll @@ -19,7 +19,7 @@ define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> in ; FUNC-LABEL: @test_fmin_legacy_ule_f32 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() #1 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid @@ -73,7 +73,7 @@ define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace( ; FUNC-LABEL: @test_fmin_legacy_ult_f32 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() #1 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid