lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

   1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "SIMachineFunctionInfo.h"
  11 #include "AMDGPUArgumentUsageInfo.h"
  12 #include "AMDGPUSubtarget.h"
  13 #include "SIRegisterInfo.h"
  14 #include "Utils/AMDGPUBaseInfo.h"
  15 #include "llvm/ADT/Optional.h"
  16 #include "llvm/CodeGen/MachineBasicBlock.h"
  17 #include "llvm/CodeGen/MachineFrameInfo.h"
  18 #include "llvm/CodeGen/MachineFunction.h"
  19 #include "llvm/CodeGen/MachineRegisterInfo.h"
  20 #include "llvm/IR/CallingConv.h"
  21 #include "llvm/IR/Function.h"
  22 #include <cassert>
  23 #include <vector>
  24
  25 #define MAX_LANES 64
  26
  27 using namespace llvm;
  28
  29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
  30   : AMDGPUMachineFunction(MF),
  31     PrivateSegmentBuffer(false),
  32     DispatchPtr(false),
  33     QueuePtr(false),
  34     KernargSegmentPtr(false),
  35     DispatchID(false),
  36     FlatScratchInit(false),
  37     GridWorkgroupCountX(false),
  38     GridWorkgroupCountY(false),
  39     GridWorkgroupCountZ(false),
  40     WorkGroupIDX(false),
  41     WorkGroupIDY(false),
  42     WorkGroupIDZ(false),
  43     WorkGroupInfo(false),
  44     PrivateSegmentWaveByteOffset(false),
  45     WorkItemIDX(false),
  46     WorkItemIDY(false),
  47     WorkItemIDZ(false),
  48     ImplicitBufferPtr(false),
  49     ImplicitArgPtr(false),
  50     GITPtrHigh(0xffffffff),
  51     HighBitsOf32BitAddress(0) {
  52   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
  53   const Function &F = MF.getFunction();
  54   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
  55   WavesPerEU = ST.getWavesPerEU(F);
  56
  57   if (!isEntryFunction()) {
  58     // Non-entry functions have no special inputs for now, other registers
  59     // required for scratch access.
  60     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
  61     ScratchWaveOffsetReg = AMDGPU::SGPR4;
  62     FrameOffsetReg = AMDGPU::SGPR5;
  63     StackPtrOffsetReg = AMDGPU::SGPR32;
  64
  65     ArgInfo.PrivateSegmentBuffer =
  66       ArgDescriptor::createRegister(ScratchRSrcReg);
  67     ArgInfo.PrivateSegmentWaveByteOffset =
  68       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
  69
  70     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
  71       ImplicitArgPtr = true;
  72   } else {
  73     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
  74       KernargSegmentPtr = true;
  75   }
  76
  77   CallingConv::ID CC = F.getCallingConv();
  78   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
  79     if (!F.arg_empty())
  80       KernargSegmentPtr = true;
  81     WorkGroupIDX = true;
  82     WorkItemIDX = true;
  83   } else if (CC == CallingConv::AMDGPU_PS) {
  84     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
  85   }
  86
  87   if (ST.debuggerEmitPrologue()) {
  88     // Enable everything.
  89     WorkGroupIDX = true;
  90     WorkGroupIDY = true;
  91     WorkGroupIDZ = true;
  92     WorkItemIDX = true;
  93     WorkItemIDY = true;
  94     WorkItemIDZ = true;
  95   } else {
  96     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
  97       WorkGroupIDX = true;
  98
  99     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
 100       WorkGroupIDY = true;
 101
 102     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
 103       WorkGroupIDZ = true;
 104
 105     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
 106       WorkItemIDX = true;
 107
 108     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
 109       WorkItemIDY = true;
 110
 111     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
 112       WorkItemIDZ = true;
 113   }
 114
 115   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 116   bool MaySpill = ST.isVGPRSpillingEnabled(F);
 117   bool HasStackObjects = FrameInfo.hasStackObjects();
 118
 119   if (isEntryFunction()) {
 120     // X, XY, and XYZ are the only supported combinations, so make sure Y is
 121     // enabled if Z is.
 122     if (WorkItemIDZ)
 123       WorkItemIDY = true;
 124
 125     if (HasStackObjects || MaySpill) {
 126       PrivateSegmentWaveByteOffset = true;
 127
 128     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
 129     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
 130         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
 131       ArgInfo.PrivateSegmentWaveByteOffset
 132         = ArgDescriptor::createRegister(AMDGPU::SGPR5);
 133     }
 134   }
 135
 136   bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
 137   if (IsCOV2) {
 138     if (HasStackObjects || MaySpill)
 139       PrivateSegmentBuffer = true;
 140
 141     if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
 142       DispatchPtr = true;
 143
 144     if (F.hasFnAttribute("amdgpu-queue-ptr"))
 145       QueuePtr = true;
 146
 147     if (F.hasFnAttribute("amdgpu-dispatch-id"))
 148       DispatchID = true;
 149   } else if (ST.isMesaGfxShader(MF)) {
 150     if (HasStackObjects || MaySpill)
 151       ImplicitBufferPtr = true;
 152   }
 153
 154   if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
 155     KernargSegmentPtr = true;
 156
 157   if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
 158     // TODO: This could be refined a lot. The attribute is a poor way of
 159     // detecting calls that may require it before argument lowering.
 160     if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
 161       FlatScratchInit = true;
 162   }
 163
 164   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
 165   StringRef S = A.getValueAsString();
 166   if (!S.empty())
 167     S.consumeInteger(0, GITPtrHigh);
 168
 169   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
 170   S = A.getValueAsString();
 171   if (!S.empty())
 172     S.consumeInteger(0, HighBitsOf32BitAddress);
 173 }
 174
 175 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
 176   const SIRegisterInfo &TRI) {
 177   ArgInfo.PrivateSegmentBuffer =
 178     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 179     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
 180   NumUserSGPRs += 4;
 181   return ArgInfo.PrivateSegmentBuffer.getRegister();
 182 }
 183
 184 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
 185   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 186     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 187   NumUserSGPRs += 2;
 188   return ArgInfo.DispatchPtr.getRegister();
 189 }
 190
 191 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
 192   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 193     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 194   NumUserSGPRs += 2;
 195   return ArgInfo.QueuePtr.getRegister();
 196 }
 197
 198 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
 199   ArgInfo.KernargSegmentPtr
 200     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 201     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 202   NumUserSGPRs += 2;
 203   return ArgInfo.KernargSegmentPtr.getRegister();
 204 }
 205
 206 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
 207   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 208     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 209   NumUserSGPRs += 2;
 210   return ArgInfo.DispatchID.getRegister();
 211 }
 212
 213 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
 214   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 215     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 216   NumUserSGPRs += 2;
 217   return ArgInfo.FlatScratchInit.getRegister();
 218 }
 219
 220 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
 221   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 222     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 223   NumUserSGPRs += 2;
 224   return ArgInfo.ImplicitBufferPtr.getRegister();
 225 }
 226
 227 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
 228   for (unsigned I = 0; CSRegs[I]; ++I) {
 229     if (CSRegs[I] == Reg)
 230       return true;
 231   }
 232
 233   return false;
 234 }
 235
 236 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
 237 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
 238                                                     int FI) {
 239   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
 240
 241   // This has already been allocated.
 242   if (!SpillLanes.empty())
 243     return true;
 244
 245   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
 246   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 247   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 248   MachineRegisterInfo &MRI = MF.getRegInfo();
 249   unsigned WaveSize = ST.getWavefrontSize();
 250
 251   unsigned Size = FrameInfo.getObjectSize(FI);
 252   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
 253   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
 254
 255   int NumLanes = Size / 4;
 256
 257   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
 258
 259   // Make sure to handle the case where a wide SGPR spill may span between two
 260   // VGPRs.
 261   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
 262     unsigned LaneVGPR;
 263     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
 264
 265     if (VGPRIndex == 0) {
 266       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
 267       if (LaneVGPR == AMDGPU::NoRegister) {
 268         // We have no VGPRs left for spilling SGPRs. Reset because we will not
 269         // partially spill the SGPR to VGPRs.
 270         SGPRToVGPRSpills.erase(FI);
 271         NumVGPRSpillLanes -= I;
 272         return false;
 273       }
 274
 275       Optional<int> CSRSpillFI;
 276       if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) {
 277         // TODO: Should this be a CreateSpillStackObject? This is technically a
 278         // weird CSR spill.
 279         CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false);
 280       }
 281
 282       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
 283
 284       // Add this register as live-in to all blocks to avoid machine verifer
 285       // complaining about use of an undefined physical register.
 286       for (MachineBasicBlock &BB : MF)
 287         BB.addLiveIn(LaneVGPR);
 288     } else {
 289       LaneVGPR = SpillVGPRs.back().VGPR;
 290     }
 291
 292     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
 293   }
 294
 295   return true;
 296 }
 297
 298 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
 299   for (auto &R : SGPRToVGPRSpills)
 300     MFI.RemoveStackObject(R.first);
 301 }