From b726d071b4aa46004228fc38ee5bfd167f999bfe Mon Sep 17 00:00:00 2001 From: alex-t Date: Fri, 22 May 2020 12:15:57 +0300 Subject: [PATCH] [AMDGPU] Reject moving PHI to VALU if the only VGPR input originated from move immediate Summary: PHIs result register class is set to VGPR or SGPR depending on the cross block value divergence. In some cases uniform PHI need to be converted to return VGPR to prevent the oddnumber of moves values from VGPR to SGPR and back. PHI should certainly return VGPR if it has at least one VGPR input. This change adds the exception. We don't want to convert uniform PHI to VGPRs in case the only VGPR input is a VGPR to SGPR COPY and definition od the source VGPR in this COPY is move immediate. bb.0: %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %2:sreg_32 = ..... bb.1: %3:sreg_32 = PHI %1, %bb.3, %2, %bb.1 S_BRANCH %bb.3 bb.3: %1:sreg_32 = COPY %0 S_BRANCH %bb.2 Reviewers: rampitec Reviewed By: rampitec Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80434 --- llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 18 +++- .../test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir | 97 ++++++++++++++++++++++ 2 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 7283c33fe98..ef64c5674bd 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -835,8 +835,22 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) { } else if (Def->isCopy() && TRI->isVectorRegister(*MRI, Def->getOperand(1).getReg())) { - hasVGPRInput = true; - break; + Register SrcReg = Def->getOperand(1).getReg(); + MachineInstr *SrcDef = MRI->getVRegDef(SrcReg); + unsigned SMovOp; + int64_t Imm; + if (!isSafeToFoldImmIntoCopy(Def, SrcDef, TII, SMovOp, Imm)) { + hasVGPRInput = true; + break; + } else { + // Formally, if we did not do this right away + // it would be done on the next iteration of the + // runOnMachineFunction main loop. But why not if we can? + MachineFunction *MF = MI.getParent()->getParent(); + Def->getOperand(1).ChangeToImmediate(Imm); + Def->addImplicitDefUseOperands(*MF); + Def->setDesc(TII->get(SMovOp)); + } } } diff --git a/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir new file mode 100644 index 00000000000..ff061dec039 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir @@ -0,0 +1,97 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fix-sgpr-copies -o - %s | FileCheck -check-prefix=GCN %s +--- +# GCN_LABEL: phi_moveimm_input +# GCN-NOT: %{{[0-9]+}}:vgpr_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1 +# GCN: %{{[0-9]+}}:sreg_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1 + +name: phi_moveimm_input +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1 + liveins: $sgpr0, $sgpr1 + + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + + %4:sreg_32 = COPY $sgpr0 + %5:sreg_32 = COPY $sgpr1 + + bb.1: + successors: %bb.2 + %2:sreg_32 = S_ADD_U32 %4, %5, implicit-def $scc + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + %3:sreg_32 = PHI %1, %bb.3, %2, %bb.1 + S_BRANCH %bb.3 + + bb.3: + successors: %bb.2 + %1:sreg_32 = COPY %0 + S_BRANCH %bb.2 +... + +--- +# GCN_LABEL: phi_moveimm_subreg_input +# GCN-NOT: %{{[0-9]+}}:sreg_64 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1 +# GCN: %{{[0-9]+}}:vreg_64 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1 +name: phi_moveimm_subreg_input +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1 + liveins: $sgpr0, $sgpr1 + + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + + %4:sreg_32 = COPY $sgpr0 + %5:sreg_32 = COPY $sgpr1 + + bb.1: + successors: %bb.2 + undef %2.sub0:sreg_64 = S_ADD_U32 %4, %5, implicit-def $scc + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + %3:sreg_64 = PHI %1, %bb.3, %2, %bb.1 + S_BRANCH %bb.3 + + bb.3: + successors: %bb.2 + undef %1.sub0:sreg_64 = COPY %0 + S_BRANCH %bb.2 +... + + +--- +# GCN_LABEL: phi_moveimm_bad_opcode_input +# GCN-NOT: %{{[0-9]+}}:sreg_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1 +# GCN: %{{[0-9]+}}:vgpr_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1 +name: phi_moveimm_bad_opcode_input +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1 + liveins: $sgpr0, $sgpr1, $vgpr0 + %6:vgpr_32 = COPY $vgpr0 + %0:vgpr_32 = V_MOV_B32_sdwa 0, %6:vgpr_32, 0, 5, 2, 4, implicit $exec, implicit %6:vgpr_32(tied-def 0) + + %4:sreg_32 = COPY $sgpr0 + %5:sreg_32 = COPY $sgpr1 + + bb.1: + + successors: %bb.2 + %2:sreg_32 = S_ADD_U32 %4, %5, implicit-def $scc + S_BRANCH %bb.2 + bb.2: + successors: %bb.3 + %3:sreg_32 = PHI %1, %bb.3, %2, %bb.1 + S_BRANCH %bb.3 + bb.3: + successors: %bb.2 + %1:sreg_32 = COPY %0 + S_BRANCH %bb.2 +... -- 2.11.0