OSDN Git Service

AMDGPU/GlobalISel: Fix selecting vcc->vcc bank copies
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Mon, 15 Jul 2019 19:46:48 +0000 (19:46 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Mon, 15 Jul 2019 19:46:48 +0000 (19:46 +0000)
The extra test change is correct, although how it arrives there is a
bug that needs work. With wave32, the test for isVCC ambiguously
reports true for an SCC or VCC source. A new allocatable pseudo
register class for SCC may be necesssary.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366119 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir

index f916154..f5a742b 100644 (file)
@@ -116,18 +116,20 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
       return RBI.constrainGenericRegister(DstReg, *RC, MRI);
     }
 
-    // TODO: Should probably leave the copy and let copyPhysReg expand it.
-    if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
-      return false;
+    if (!isVCC(SrcReg, MRI)) {
+      // TODO: Should probably leave the copy and let copyPhysReg expand it.
+      if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
+        return false;
 
-    BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
-      .addImm(0)
-      .addReg(SrcReg);
+      BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
+        .addImm(0)
+        .addReg(SrcReg);
 
-    if (!MRI.getRegClassOrNull(SrcReg))
-      MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
-    I.eraseFromParent();
-    return true;
+      if (!MRI.getRegClassOrNull(SrcReg))
+        MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
+      I.eraseFromParent();
+      return true;
+    }
   }
 
   for (const MachineOperand &MO : I.operands()) {
index 548dde2..81ffcd5 100644 (file)
@@ -90,10 +90,10 @@ body: |
     ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
     ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+    ; WAVE32: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]]
+    ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY4]], implicit $exec
     ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
-    ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
-    ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
-    ; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
+    ; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
     ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
@@ -268,3 +268,31 @@ body: |
     S_ENDPGM 0, implicit %2
 
 ...
+
+---
+
+name:            copy_s1_vcc_to_vcc
+legalized:       true
+regBankSelected: true
+
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+
+    ; WAVE64-LABEL: name: copy_s1_vcc_to_vcc
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]]
+    ; WAVE32-LABEL: name: copy_s1_vcc_to_vcc
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s1) = G_TRUNC %0
+    %2:vcc(s1) = COPY %1
+    %3:vcc(s1) = COPY %2
+    S_ENDPGM 0, implicit %3
+
+...