const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
+ const unsigned Mov = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
E = MBB.rend();
if (A->getOpcode() == AndN2)
MaskValue = ~MaskValue;
- if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
- MI.killsRegister(CondReg, TRI))
+ if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC)) {
+ if (!MI.killsRegister(CondReg, TRI)) {
+ // Replace AND with MOV
+ if (MaskValue == 0) {
+ BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)
+ .addImm(0);
+ } else {
+ BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)
+ .addReg(ExecReg);
+ }
+ }
+ // Remove AND instruction
A->eraseFromParent();
+ }
bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
if (SReg == ExecReg) {
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
S_ENDPGM 0
...
+---
+# GCN-LABEL: name: and_0_mov
+# GCN: bb.2:
+# GCN-NOT: S_AND
+# GCN: $vcc = S_MOV_B64 0
+# GCN-NEXT: S_BRANCH %bb.1
+name: and_0_mov
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 0
+ $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ S_ENDPGM 0
+...
+---
+# GCN-LABEL: name: andn2_m1_mov
+# GCN: bb.2:
+# GCN-NOT: S_ANDN2
+# GCN: $vcc = S_MOV_B64 0
+# GCN-NEXT: S_BRANCH %bb.1
+name: andn2_m1_mov
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ S_ENDPGM 0
+...
+---
+# GCN-LABEL: name: and_m1_mov
+# GCN: bb.2:
+# GCN-NOT: S_AND
+# GCN: $vcc = S_MOV_B64 $exec
+# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name: and_m1_mov
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ S_ENDPGM 0
+...
+---
+# GCN-LABEL: name: andn2_0_mov
+# GCN: bb.2:
+# GCN-NOT: S_ANDN2
+# GCN: $vcc = S_MOV_B64 $exec
+# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name: andn2_0_mov
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 0
+ $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ S_ENDPGM 0
+...
+---
+# GCN-LABEL: name: and_0_scc_req
+# GCN: bb.2:
+# GCN-NOT: S_MOV_
+# GCN: S_AND_
+# GCN-NEXT: S_BRANCH %bb.1
+name: and_0_scc_req
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 0
+ $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
+ S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ S_ENDPGM 0
+...
+---
+# GCN-LABEL: name: andn2_m1_scc_req
+# GCN: bb.2:
+# GCN-NOT: S_MOV_
+# GCN: S_ANDN2_
+# GCN-NEXT: S_BRANCH %bb.1
+name: andn2_m1_scc_req
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
+ S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ S_ENDPGM 0
+...
}
; GCN-LABEL: {{^}}test_branch_true:
-; GFX1032: s_and_b32 vcc_lo, exec_lo, -1
-; GFX1064: s_and_b64 vcc, exec, -1
+; GFX1032: s_mov_b32 vcc_lo, exec_lo
+; GFX1064: s_mov_b64 vcc, exec
define amdgpu_kernel void @test_branch_true() #2 {
entry:
br i1 true, label %for.end, label %for.body.lr.ph