From adb194b4580e25946c7d540b670eaa417d639046 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 14 Jul 2015 14:15:03 +0000 Subject: [PATCH] AMDGPU/SI: Add support for shrinking v_cndmask_b32_e32 instructions Reviewers: arsenm Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D11061 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242146 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIShrinkInstructions.cpp | 33 ++++++-- test/CodeGen/AMDGPU/llvm.round.ll | 4 +- test/CodeGen/AMDGPU/select-vectors.ll | 116 ++++++++++++++--------------- test/CodeGen/AMDGPU/select64.ll | 4 +- test/CodeGen/AMDGPU/sint_to_fp.f64.ll | 8 +- test/CodeGen/AMDGPU/uint_to_fp.f64.ll | 10 +-- test/CodeGen/AMDGPU/vselect.ll | 34 +++++---- test/CodeGen/AMDGPU/xor.ll | 4 +- 8 files changed, 118 insertions(+), 95 deletions(-) diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index e7511e6a457..5d00bdd6a9b 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -95,13 +95,18 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, // a register allocation hint pre-regalloc and then do the shrining // post-regalloc. if (Src2) { - if (MI.getOpcode() != AMDGPU::V_MAC_F32_e64) - return false; + switch (MI.getOpcode()) { + default: return false; - const MachineOperand *Src2Mod = - TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers); - if (!isVGPR(Src2, TRI, MRI) || (Src2Mod && Src2Mod->getImm() != 0)) - return false; + case AMDGPU::V_MAC_F32_e64: + if (!isVGPR(Src2, TRI, MRI) || + TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers)) + return false; + break; + + case AMDGPU::V_CNDMASK_B32_e64: + break; + } } const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); @@ -250,6 +255,22 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { continue; } + if (Op32 == AMDGPU::V_CNDMASK_B32_e32) { + // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC + // instructions. + const MachineOperand *Src2 = + TII->getNamedOperand(MI, AMDGPU::OpName::src2); + if (!Src2->isReg()) + continue; + unsigned SReg = Src2->getReg(); + if (TargetRegisterInfo::isVirtualRegister(SReg)) { + MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC); + continue; + } + if (SReg != AMDGPU::VCC) + continue; + } + // We can shrink this instruction DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';); diff --git a/test/CodeGen/AMDGPU/llvm.round.ll b/test/CodeGen/AMDGPU/llvm.round.ll index f5f124d915a..d0e49243ffa 100644 --- a/test/CodeGen/AMDGPU/llvm.round.ll +++ b/test/CodeGen/AMDGPU/llvm.round.ll @@ -9,8 +9,8 @@ ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]] ; SI: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]] ; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]] -; SI: v_cmp_le_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0.5, |[[SUB]]| -; SI: v_cndmask_b32_e64 [[SEL:v[0-9]+]], 0, [[VX]], [[CMP]] +; SI: v_cmp_le_f32_e64 vcc, 0.5, |[[SUB]]| +; SI: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, [[VX]] ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]] ; SI: buffer_store_dword [[RESULT]] diff --git a/test/CodeGen/AMDGPU/select-vectors.ll b/test/CodeGen/AMDGPU/select-vectors.ll index 59082c65cc8..94758ad84c1 100644 --- a/test/CodeGen/AMDGPU/select-vectors.ll +++ b/test/CodeGen/AMDGPU/select-vectors.ll @@ -6,10 +6,10 @@ ; FUNC-LABEL: {{^}}select_v4i8: -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind { %cmp = icmp eq i8 %c, 0 %select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b @@ -18,10 +18,10 @@ define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, } ; FUNC-LABEL: {{^}}select_v4i16: -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <4 x i16> %a, <4 x i16> %b @@ -30,8 +30,8 @@ define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> } ; FUNC-LABEL: {{^}}select_v2i32: -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 ; SI: buffer_store_dwordx2 define void @select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind { %cmp = icmp eq i32 %c, 0 @@ -41,10 +41,10 @@ define void @select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> } ; FUNC-LABEL: {{^}}select_v4i32: -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 ; SI: buffer_store_dwordx4 define void @select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind { %cmp = icmp eq i32 %c, 0 @@ -54,14 +54,14 @@ define void @select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> } ; FUNC-LABEL: {{^}}select_v8i32: -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b @@ -88,14 +88,14 @@ define void @select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x f } ; FUNC-LABEL: {{^}}select_v8f32: -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <8 x float> %a, <8 x float> %b @@ -104,10 +104,10 @@ define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x f } ; FUNC-LABEL: {{^}}select_v2f64: -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <2 x double> %a, <2 x double> %b @@ -116,14 +116,14 @@ define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x } ; FUNC-LABEL: {{^}}select_v4f64: -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <4 x double> %a, <4 x double> %b @@ -132,22 +132,22 @@ define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x } ; FUNC-LABEL: {{^}}select_v8f64: -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 +; SI: v_cndmask_b32_e32 define void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <8 x double> %a, <8 x double> %b diff --git a/test/CodeGen/AMDGPU/select64.ll b/test/CodeGen/AMDGPU/select64.ll index 5cebb30dc72..13fb575b2b1 100644 --- a/test/CodeGen/AMDGPU/select64.ll +++ b/test/CodeGen/AMDGPU/select64.ll @@ -55,8 +55,8 @@ define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspa ; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0 ; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]] ; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]] -; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}} -; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}} +; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}} +; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}} ; CHECK: s_endpgm define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %cmp = icmp ugt i32 %cond, 5 diff --git a/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll index da4e91db3a3..0db7cdc171b 100644 --- a/test/CodeGen/AMDGPU/sint_to_fp.f64.ll +++ b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll @@ -12,11 +12,11 @@ define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) { ; FIXME: select on 0, 0 ; SI-LABEL: {{^}}sint_to_fp_i1_f64: -; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], +; SI: v_cmp_eq_i32_e64 vcc, ; We can't fold the SGPRs into v_cndmask_b32_e64, because it already -; uses an SGPR for [[CMP]] -; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]] -; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]] +; uses an SGPR (implicit vcc). +; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} +; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, vcc ; SI: buffer_store_dwordx2 ; SI: s_endpgm define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) { diff --git a/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll index dfec8eb15cb..6f608df5e9f 100644 --- a/test/CodeGen/AMDGPU/uint_to_fp.f64.ll +++ b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll @@ -72,11 +72,11 @@ define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i ; FIXME: select on 0, 0 ; SI-LABEL: {{^}}uint_to_fp_i1_to_f64: -; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], -; We can't fold the SGPRs into v_cndmask_b32_e64, because it already -; uses an SGPR for [[CMP]] -; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]] -; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]] +; SI: v_cmp_eq_i32_e64 vcc +; We can't fold the SGPRs into v_cndmask_b32_e32, because it already +; uses an SGPR (implicit vcc). +; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} +; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, vcc ; SI: buffer_store_dwordx2 ; SI: s_endpgm define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) { diff --git a/test/CodeGen/AMDGPU/vselect.ll b/test/CodeGen/AMDGPU/vselect.ll index a3014b03d2b..dc1f1ea11b0 100644 --- a/test/CodeGen/AMDGPU/vselect.ll +++ b/test/CodeGen/AMDGPU/vselect.ll @@ -1,14 +1,14 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s -;RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s +;RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=VI --check-prefix=FUNC %s + +;FUNC-LABEL: {{^}}test_select_v2i32: -;EG: {{^}}test_select_v2i32: ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: {{^}}test_select_v2i32: -;SI: v_cndmask_b32_e64 ;SI: v_cndmask_b32_e64 +;SI: v_cndmask_b32_e32 define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) { entry: @@ -20,13 +20,13 @@ entry: ret void } -;EG: {{^}}test_select_v2f32: +;FUNC-LABEL: {{^}}test_select_v2f32: + ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: {{^}}test_select_v2f32: -;SI: v_cndmask_b32_e64 ;SI: v_cndmask_b32_e64 +;SI: v_cndmask_b32_e32 define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) { entry: @@ -38,17 +38,19 @@ entry: ret void } -;EG: {{^}}test_select_v4i32: +;FUNC-LABEL: {{^}}test_select_v4i32: + ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: {{^}}test_select_v4i32: -;SI: v_cndmask_b32_e64 -;SI: v_cndmask_b32_e64 -;SI: v_cndmask_b32_e64 -;SI: v_cndmask_b32_e64 +; FIXME: The shrinking does not happen on tonga + +;SI: v_cndmask_b32 +;SI: v_cndmask_b32 +;SI: v_cndmask_b32 +;SI: v_cndmask_b32 define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) { entry: @@ -60,7 +62,7 @@ entry: ret void } -;EG: {{^}}test_select_v4f32: +;FUNC-LABEL: {{^}}test_select_v4f32: ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} diff --git a/test/CodeGen/AMDGPU/xor.ll b/test/CodeGen/AMDGPU/xor.ll index 089db59eabc..ddb920af29d 100644 --- a/test/CodeGen/AMDGPU/xor.ll +++ b/test/CodeGen/AMDGPU/xor.ll @@ -42,8 +42,8 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in ; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}} ; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}} -; SI: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]] -; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[XOR]] +; SI: s_xor_b64 [[XOR:vcc]], [[CMP0]], [[CMP1]] +; SI: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { -- 2.11.0