OSDN Git Service

AMDGPU/GlobalISel: Add types to special inputs
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Sun, 5 Jul 2020 17:17:02 +0000 (13:17 -0400)
committerMatt Arsenault <arsenm2@gmail.com>
Mon, 6 Jul 2020 21:00:55 +0000 (17:00 -0400)
When passing special ABI inputs, we have no existing context for the
type to use.

llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

index f41e774..d078fc1 100644 (file)
@@ -83,59 +83,63 @@ void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
   }
 }
 
-std::pair<const ArgDescriptor *, const TargetRegisterClass *>
+std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
 AMDGPUFunctionArgInfo::getPreloadedValue(
-  AMDGPUFunctionArgInfo::PreloadedValue Value) const {
+    AMDGPUFunctionArgInfo::PreloadedValue Value) const {
   switch (Value) {
   case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: {
-    return std::make_pair(
-      PrivateSegmentBuffer ? &PrivateSegmentBuffer : nullptr,
-      &AMDGPU::SGPR_128RegClass);
+    return std::make_tuple(PrivateSegmentBuffer ? &PrivateSegmentBuffer
+                                                : nullptr,
+                           &AMDGPU::SGPR_128RegClass, LLT::vector(4, 32));
   }
   case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR:
-    return std::make_pair(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
-                          &AMDGPU::SGPR_64RegClass);
+    return std::make_tuple(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
+                           &AMDGPU::SGPR_64RegClass,
+                           LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
   case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
-    return std::make_pair(WorkGroupIDX ? &WorkGroupIDX : nullptr,
-                          &AMDGPU::SGPR_32RegClass);
-
+    return std::make_tuple(WorkGroupIDX ? &WorkGroupIDX : nullptr,
+                           &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
   case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
-    return std::make_pair(WorkGroupIDY ? &WorkGroupIDY : nullptr,
-                          &AMDGPU::SGPR_32RegClass);
+    return std::make_tuple(WorkGroupIDY ? &WorkGroupIDY : nullptr,
+                           &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
   case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
-    return std::make_pair(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
-                          &AMDGPU::SGPR_32RegClass);
+    return std::make_tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
+                           &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
   case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
-    return std::make_pair(
-      PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
-      &AMDGPU::SGPR_32RegClass);
+    return std::make_tuple(
+        PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
+        &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
   case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
-    return std::make_pair(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
-                          &AMDGPU::SGPR_64RegClass);
+    return std::make_tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
+                           &AMDGPU::SGPR_64RegClass,
+                           LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
   case AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR:
-    return std::make_pair(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
-                          &AMDGPU::SGPR_64RegClass);
+    return std::make_tuple(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
+                           &AMDGPU::SGPR_64RegClass,
+                           LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
   case AMDGPUFunctionArgInfo::DISPATCH_ID:
-    return std::make_pair(DispatchID ? &DispatchID : nullptr,
-                          &AMDGPU::SGPR_64RegClass);
+    return std::make_tuple(DispatchID ? &DispatchID : nullptr,
+                           &AMDGPU::SGPR_64RegClass, LLT::scalar(64));
   case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT:
-    return std::make_pair(FlatScratchInit ? &FlatScratchInit : nullptr,
-                          &AMDGPU::SGPR_64RegClass);
+    return std::make_tuple(FlatScratchInit ? &FlatScratchInit : nullptr,
+                           &AMDGPU::SGPR_64RegClass, LLT::scalar(64));
   case AMDGPUFunctionArgInfo::DISPATCH_PTR:
-    return std::make_pair(DispatchPtr ? &DispatchPtr : nullptr,
-                          &AMDGPU::SGPR_64RegClass);
+    return std::make_tuple(DispatchPtr ? &DispatchPtr : nullptr,
+                           &AMDGPU::SGPR_64RegClass,
+                           LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
   case AMDGPUFunctionArgInfo::QUEUE_PTR:
-    return std::make_pair(QueuePtr ? &QueuePtr : nullptr,
-                          &AMDGPU::SGPR_64RegClass);
+    return std::make_tuple(QueuePtr ? &QueuePtr : nullptr,
+                           &AMDGPU::SGPR_64RegClass,
+                           LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
   case AMDGPUFunctionArgInfo::WORKITEM_ID_X:
-    return std::make_pair(WorkItemIDX ? &WorkItemIDX : nullptr,
-                          &AMDGPU::VGPR_32RegClass);
+    return std::make_tuple(WorkItemIDX ? &WorkItemIDX : nullptr,
+                           &AMDGPU::VGPR_32RegClass, LLT::scalar(32));
   case AMDGPUFunctionArgInfo::WORKITEM_ID_Y:
-    return std::make_pair(WorkItemIDY ? &WorkItemIDY : nullptr,
-                          &AMDGPU::VGPR_32RegClass);
+    return std::make_tuple(WorkItemIDY ? &WorkItemIDY : nullptr,
+                           &AMDGPU::VGPR_32RegClass, LLT::scalar(32));
   case AMDGPUFunctionArgInfo::WORKITEM_ID_Z:
-    return std::make_pair(WorkItemIDZ ? &WorkItemIDZ : nullptr,
-                          &AMDGPU::VGPR_32RegClass);
+    return std::make_tuple(WorkItemIDZ ? &WorkItemIDZ : nullptr,
+                           &AMDGPU::VGPR_32RegClass, LLT::scalar(32));
   }
   llvm_unreachable("unexpected preloaded value type");
 }
index b4ef0c5..576e6cf 100644 (file)
@@ -12,6 +12,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/Register.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
 
 namespace llvm {
 
@@ -148,7 +149,7 @@ struct AMDGPUFunctionArgInfo {
   ArgDescriptor WorkItemIDY;
   ArgDescriptor WorkItemIDZ;
 
-  std::pair<const ArgDescriptor *, const TargetRegisterClass *>
+  std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
   getPreloadedValue(PreloadedValue Value) const;
 
   static constexpr AMDGPUFunctionArgInfo fixedABILayout();
index a00bd88..bcad30a 100644 (file)
@@ -2442,7 +2442,8 @@ const ArgDescriptor *AMDGPULegalizerInfo::getArgDescriptor(
   const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
   const ArgDescriptor *Arg;
   const TargetRegisterClass *RC;
-  std::tie(Arg, RC) = MFI->getPreloadedValue(ArgType);
+  LLT ArgTy;
+  std::tie(Arg, RC, ArgTy) = MFI->getPreloadedValue(ArgType);
   if (!Arg) {
     LLVM_DEBUG(dbgs() << "Required arg register missing\n");
     return nullptr;
@@ -3178,8 +3179,9 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
 
   const ArgDescriptor *Arg;
   const TargetRegisterClass *RC;
-  std::tie(Arg, RC)
-    = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
+  LLT ArgTy;
+  std::tie(Arg, RC, ArgTy) =
+      MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
   if (!Arg)
     return false;
 
index 3ee48c1..a3135a7 100644 (file)
@@ -1527,9 +1527,10 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
 
   const ArgDescriptor *InputPtrReg;
   const TargetRegisterClass *RC;
+  LLT ArgTy;
 
-  std::tie(InputPtrReg, RC)
-    = Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
+  std::tie(InputPtrReg, RC, ArgTy) =
+      Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
 
   MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
   MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
@@ -1675,8 +1676,9 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
   AMDGPUFunctionArgInfo::PreloadedValue PVID) const {
   const ArgDescriptor *Reg;
   const TargetRegisterClass *RC;
+  LLT Ty;
 
-  std::tie(Reg, RC) = MFI.getPreloadedValue(PVID);
+  std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
   return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
 }
 
@@ -2580,15 +2582,18 @@ void SITargetLowering::passSpecialInputs(
   for (auto InputID : InputRegs) {
     const ArgDescriptor *OutgoingArg;
     const TargetRegisterClass *ArgRC;
+    LLT ArgTy;
 
-    std::tie(OutgoingArg, ArgRC) = CalleeArgInfo->getPreloadedValue(InputID);
+    std::tie(OutgoingArg, ArgRC, ArgTy) =
+        CalleeArgInfo->getPreloadedValue(InputID);
     if (!OutgoingArg)
       continue;
 
     const ArgDescriptor *IncomingArg;
     const TargetRegisterClass *IncomingArgRC;
-    std::tie(IncomingArg, IncomingArgRC)
-      = CallerArgInfo.getPreloadedValue(InputID);
+    LLT Ty;
+    std::tie(IncomingArg, IncomingArgRC, Ty) =
+        CallerArgInfo.getPreloadedValue(InputID);
     assert(IncomingArgRC == ArgRC);
 
     // All special arguments are ints for now.
@@ -2621,24 +2626,25 @@ void SITargetLowering::passSpecialInputs(
   // packed.
   const ArgDescriptor *OutgoingArg;
   const TargetRegisterClass *ArgRC;
+  LLT Ty;
 
-  std::tie(OutgoingArg, ArgRC) =
-    CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
+  std::tie(OutgoingArg, ArgRC, Ty) =
+      CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
   if (!OutgoingArg)
-    std::tie(OutgoingArg, ArgRC) =
-      CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
+    std::tie(OutgoingArg, ArgRC, Ty) =
+        CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
   if (!OutgoingArg)
-    std::tie(OutgoingArg, ArgRC) =
-      CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
+    std::tie(OutgoingArg, ArgRC, Ty) =
+        CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
   if (!OutgoingArg)
     return;
 
-  const ArgDescriptor *IncomingArgX
-    = CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X).first;
-  const ArgDescriptor *IncomingArgY
-    = CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y).first;
-  const ArgDescriptor *IncomingArgZ
-    = CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z).first;
+  const ArgDescriptor *IncomingArgX = std::get<0>(
+      CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X));
+  const ArgDescriptor *IncomingArgY = std::get<0>(
+      CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y));
+  const ArgDescriptor *IncomingArgZ = std::get<0>(
+      CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z));
 
   SDValue InputReg;
   SDLoc SL;
index 7221e01..cf1629f 100644 (file)
@@ -679,13 +679,13 @@ public:
     return ArgInfo;
   }
 
-  std::pair<const ArgDescriptor *, const TargetRegisterClass *>
+  std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
     return ArgInfo.getPreloadedValue(Value);
   }
 
   Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
-    auto Arg = ArgInfo.getPreloadedValue(Value).first;
+    auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
     return Arg ? Arg->getRegister() : Register();
   }