From b726d071b4aa46004228fc38ee5bfd167f999bfe Mon Sep 17 00:00:00 2001
From: alex-t <alexander.timofeev@amd.com>
Date: Fri, 22 May 2020 12:15:57 +0300
Subject: [PATCH] [AMDGPU] Reject moving PHI to VALU if the only VGPR input
 originated from move immediate

Summary:
PHIs result register class is set to VGPR or SGPR depending on the cross block value divergence.
         In some cases uniform PHI need to be converted to return VGPR to prevent the oddnumber of moves values from VGPR to SGPR and back.
         PHI should certainly return VGPR if it has at least one VGPR input. This change adds the exception.
         We don't want to convert uniform PHI to VGPRs in case the only VGPR input is a VGPR to SGPR COPY and definition od the
         source VGPR in this COPY is move immediate.

  bb.0:

     %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     %2:sreg_32 = .....

  bb.1:
     %3:sreg_32 = PHI %1, %bb.3, %2, %bb.1
     S_BRANCH %bb.3

  bb.3:
     %1:sreg_32 = COPY %0
     S_BRANCH %bb.2

Reviewers: rampitec

Reviewed By: rampitec

Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80434
---
 llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp         | 18 +++-
 .../test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir | 97 ++++++++++++++++++++++
 2 files changed, 113 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir

diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 7283c33fe98..ef64c5674bd 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -835,8 +835,22 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
     }
     else if (Def->isCopy() &&
       TRI->isVectorRegister(*MRI, Def->getOperand(1).getReg())) {
-      hasVGPRInput = true;
-      break;
+      Register SrcReg = Def->getOperand(1).getReg();
+      MachineInstr *SrcDef = MRI->getVRegDef(SrcReg);
+      unsigned SMovOp;
+      int64_t Imm;
+      if (!isSafeToFoldImmIntoCopy(Def, SrcDef, TII, SMovOp, Imm)) {
+        hasVGPRInput = true;
+        break;
+      } else {
+        // Formally, if we did not do this right away
+        // it would be done on the next iteration of the
+        // runOnMachineFunction main loop. But why not if we can?
+        MachineFunction *MF = MI.getParent()->getParent();
+        Def->getOperand(1).ChangeToImmediate(Imm);
+        Def->addImplicitDefUseOperands(*MF);
+        Def->setDesc(TII->get(SMovOp));
+      }
     }
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir
new file mode 100644
index 00000000000..ff061dec039
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir
@@ -0,0 +1,97 @@
+# RUN: llc  -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fix-sgpr-copies -o - %s | FileCheck -check-prefix=GCN %s
+---
+# GCN_LABEL: phi_moveimm_input
+# GCN-NOT: %{{[0-9]+}}:vgpr_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
+# GCN:     %{{[0-9]+}}:sreg_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
+
+name:            phi_moveimm_input
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1
+    liveins: $sgpr0, $sgpr1
+
+    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+    %4:sreg_32 = COPY $sgpr0
+    %5:sreg_32 = COPY $sgpr1
+
+  bb.1:
+    successors: %bb.2
+    %2:sreg_32 =  S_ADD_U32 %4, %5, implicit-def $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+    successors: %bb.3
+    %3:sreg_32 = PHI %1, %bb.3, %2, %bb.1
+    S_BRANCH %bb.3
+
+  bb.3:
+    successors: %bb.2
+    %1:sreg_32 = COPY %0
+    S_BRANCH %bb.2
+...
+
+---
+# GCN_LABEL: phi_moveimm_subreg_input
+# GCN-NOT: %{{[0-9]+}}:sreg_64 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
+# GCN: %{{[0-9]+}}:vreg_64 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
+name:            phi_moveimm_subreg_input
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1
+    liveins: $sgpr0, $sgpr1
+
+    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+    %4:sreg_32 = COPY $sgpr0
+    %5:sreg_32 = COPY $sgpr1
+
+  bb.1:
+    successors: %bb.2
+    undef %2.sub0:sreg_64 =  S_ADD_U32 %4, %5, implicit-def $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+    successors: %bb.3
+    %3:sreg_64 = PHI %1, %bb.3, %2, %bb.1
+    S_BRANCH %bb.3
+
+  bb.3:
+    successors: %bb.2
+    undef %1.sub0:sreg_64 = COPY %0
+    S_BRANCH %bb.2
+...
+
+
+---
+# GCN_LABEL: phi_moveimm_bad_opcode_input
+# GCN-NOT: %{{[0-9]+}}:sreg_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
+# GCN: %{{[0-9]+}}:vgpr_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
+name:            phi_moveimm_bad_opcode_input
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1
+    liveins: $sgpr0, $sgpr1, $vgpr0
+    %6:vgpr_32 = COPY $vgpr0
+    %0:vgpr_32 = V_MOV_B32_sdwa 0, %6:vgpr_32, 0, 5, 2, 4,  implicit $exec, implicit %6:vgpr_32(tied-def 0)
+
+    %4:sreg_32 = COPY $sgpr0
+    %5:sreg_32 = COPY $sgpr1
+
+  bb.1:
+
+    successors: %bb.2
+    %2:sreg_32 =  S_ADD_U32 %4, %5, implicit-def $scc
+    S_BRANCH %bb.2
+  bb.2:
+    successors: %bb.3
+    %3:sreg_32 = PHI %1, %bb.3, %2, %bb.1
+    S_BRANCH %bb.3
+  bb.3:
+    successors: %bb.2
+    %1:sreg_32 = COPY %0
+    S_BRANCH %bb.2
+...
-- 
2.11.0