return AMDGPU::NoRegister;
}
+/// If \p MI is a logical operation on an exec value,
+/// return the register copied to.
+static unsigned isLogicalOpOnExec(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::S_AND_B64:
+ case AMDGPU::S_OR_B64:
+ case AMDGPU::S_XOR_B64:
+ case AMDGPU::S_ANDN2_B64:
+ case AMDGPU::S_ORN2_B64:
+ case AMDGPU::S_NAND_B64:
+ case AMDGPU::S_NOR_B64:
+ case AMDGPU::S_XNOR_B64: {
+ const MachineOperand &Src1 = MI.getOperand(1);
+ if (Src1.isReg() && Src1.getReg() == AMDGPU::EXEC)
+ return MI.getOperand(0).getReg();
+ const MachineOperand &Src2 = MI.getOperand(2);
+ if (Src2.isReg() && Src2.getReg() == AMDGPU::EXEC)
+ return MI.getOperand(0).getReg();
+ }
+ }
+
+ return AMDGPU::NoRegister;
+}
+
static unsigned getSaveExecOp(unsigned Opc) {
switch (Opc) {
case AMDGPU::S_AND_B64:
// Scan backwards to find the def.
auto CopyToExecInst = &*I;
auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
- if (CopyFromExecInst == E)
+ if (CopyFromExecInst == E) {
+ auto PrepareExecInst = std::next(I);
+ if (PrepareExecInst == E)
+ continue;
+ // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec
+ if (CopyToExecInst->getOperand(1).isKill() &&
+ isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
+ DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);
+
+ PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
+
+ DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');
+
+ CopyToExecInst->eraseFromParent();
+ }
+
continue;
+ }
if (isLiveOut(MBB, CopyToExec)) {
// The copied register is live out and has a second use in another block.
--- /dev/null
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+# GCN-LABEL: name: reduce_and_saveexec
+# GCN: %exec = S_AND_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_and_saveexec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %vcc = IMPLICIT_DEF
+ %sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc, implicit-def %scc
+ %exec = COPY killed %sgpr0_sgpr1
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_and_saveexec_commuted
+# GCN: %exec = S_AND_B64 killed %vcc, %exec
+# GCN-NEXT: S_ENDPGM
+name: reduce_and_saveexec_commuted
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %vcc = IMPLICIT_DEF
+ %sgpr0_sgpr1 = S_AND_B64 killed %vcc, %exec, implicit-def %scc
+ %exec = COPY killed %sgpr0_sgpr1
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_and_saveexec_liveout
+# GCN: %sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc
+# GCN-NEXT: %exec = COPY
+name: reduce_and_saveexec_liveout
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %vcc = IMPLICIT_DEF
+ %sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc, implicit-def %scc
+ %exec = COPY %sgpr0_sgpr1
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_saveexec
+# GCN: %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc
+# GCN-NEXT: S_ENDPGM
+name: and_saveexec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %vcc = IMPLICIT_DEF
+ %sgpr0_sgpr1 = COPY %exec
+ %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
+ %exec = S_MOV_B64_term %sgpr2_sgpr3
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_or_saveexec
+# GCN: %exec = S_OR_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_or_saveexec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %vcc = IMPLICIT_DEF
+ %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+ %exec = COPY killed %sgpr0_sgpr1
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_xor_saveexec
+# GCN: %exec = S_XOR_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_xor_saveexec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %vcc = IMPLICIT_DEF
+ %sgpr0_sgpr1 = S_XOR_B64 %exec, killed %vcc, implicit-def %scc
+ %exec = COPY killed %sgpr0_sgpr1
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_andn2_saveexec
+# GCN: %exec = S_ANDN2_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_andn2_saveexec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %vcc = IMPLICIT_DEF
+ %sgpr0_sgpr1 = S_ANDN2_B64 %exec, killed %vcc, implicit-def %scc
+ %exec = COPY killed %sgpr0_sgpr1
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_orn2_saveexec
+# GCN: %exec = S_ORN2_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_orn2_saveexec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %vcc = IMPLICIT_DEF
+ %sgpr0_sgpr1 = S_ORN2_B64 %exec, killed %vcc, implicit-def %scc
+ %exec = COPY killed %sgpr0_sgpr1
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_nand_saveexec
+# GCN: %exec = S_NAND_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_nand_saveexec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %vcc = IMPLICIT_DEF
+ %sgpr0_sgpr1 = S_NAND_B64 %exec, killed %vcc, implicit-def %scc
+ %exec = COPY killed %sgpr0_sgpr1
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_nor_saveexec
+# GCN: %exec = S_NOR_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_nor_saveexec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %vcc = IMPLICIT_DEF
+ %sgpr0_sgpr1 = S_NOR_B64 %exec, killed %vcc, implicit-def %scc
+ %exec = COPY killed %sgpr0_sgpr1
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: reduce_xnor_saveexec
+# GCN: %exec = S_XNOR_B64 %exec, killed %vcc
+# GCN-NEXT: S_ENDPGM
+name: reduce_xnor_saveexec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %vcc = IMPLICIT_DEF
+ %sgpr0_sgpr1 = S_XNOR_B64 %exec, killed %vcc, implicit-def %scc
+ %exec = COPY killed %sgpr0_sgpr1
+ S_ENDPGM
+...
+---