OSDN Git Service

GlobalISel: Implement narrowScalar for vector extract/insert indexes
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Mon, 15 Jul 2019 19:37:34 +0000 (19:37 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Mon, 15 Jul 2019 19:37:34 +0000 (19:37 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366113 91177308-0d34-0410-b5e6-96231b3b80d8

lib/CodeGen/GlobalISel/LegalizerHelper.cpp
test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir

index d13b1a5..bf3cca4 100644 (file)
@@ -736,6 +736,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     MI.eraseFromParent();
     return Legalized;
   }
+  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+  case TargetOpcode::G_INSERT_VECTOR_ELT: {
+    if (TypeIdx != 2)
+      return UnableToLegalize;
+
+    int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
+    Observer.changingInstr(MI);
+    narrowScalarSrc(MI, NarrowTy, OpIdx);
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
   }
 }
 
index 2fdde78..682eafb 100644 (file)
@@ -646,3 +646,22 @@ body: |
     %2:_(s128) = G_EXTRACT_VECTOR_ELT %0, %1
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
 ...
+
+---
+name: extract_vector_elt_v2i32_varidx_i64
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: extract_vector_elt_v2i32_varidx_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[TRUNC]](s32)
+    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(s64) = COPY $vgpr2_vgpr3
+    %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+    $vgpr0 = COPY %2
+...
index d55f672..2ca336f 100644 (file)
@@ -2,12 +2,12 @@
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s
 
 ---
-name: insert_vector_elt_0_v2i32
+name: insert_vector_elt_0_v2s32
 
 body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2
-    ; CHECK-LABEL: name: insert_vector_elt_0_v2i32
+    ; CHECK-LABEL: name: insert_vector_elt_0_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -19,3 +19,45 @@ body: |
     %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $vgpr0_vgpr1 = COPY %3
 ...
+
+---
+name: insert_vector_elt_v2s32_varidx_i64
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3_vgpr4
+
+    ; CHECK-LABEL: name: insert_vector_elt_v2s32_varidx_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+    ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(s32) = COPY $vgpr2
+    %2:_(s64) = COPY $vgpr3_vgpr4
+    %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
+    $vgpr0_vgpr1 = COPY %3
+...
+
+---
+name: insert_vector_elt_v16s32_varidx_i64
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16, $vgpr17_vgpr18
+
+    ; CHECK-LABEL: name: insert_vector_elt_v16s32_varidx_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr16
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr17_vgpr18
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+    ; CHECK: [[IVEC:%[0-9]+]]:_(<16 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32)
+    ; CHECK: S_ENDPGM 0, implicit [[IVEC]](<16 x s32>)
+    %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(s32) = COPY $vgpr16
+    %2:_(s64) = COPY $vgpr17_vgpr18
+    %3:_(<16 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
+    S_ENDPGM 0, implicit %3
+...