lib/Target/AMDGPU/SIMachineFunctionInfo.h

   1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  16
  17 #include "AMDGPUMachineFunction.h"
  18 #include "SIRegisterInfo.h"
  19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  20 #include "llvm/CodeGen/PseudoSourceValue.h"
  21 #include "llvm/MC/MCRegisterInfo.h"
  22 #include "llvm/Support/ErrorHandling.h"
  23 #include <array>
  24 #include <cassert>
  25 #include <map>
  26 #include <utility>
  27
  28 namespace llvm {
  29
  30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
  31 public:
  32   explicit AMDGPUImagePseudoSourceValue() :
  33     PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
  34
  35   bool isConstant(const MachineFrameInfo *) const override {
  36     // This should probably be true for most images, but we will start by being
  37     // conservative.
  38     return false;
  39   }
  40
  41   bool isAliased(const MachineFrameInfo *) const override {
  42     // FIXME: If we ever change image intrinsics to accept fat pointers, then
  43     // this could be true for some cases.
  44     return false;
  45   }
  46
  47   bool mayAlias(const MachineFrameInfo*) const override {
  48     // FIXME: If we ever change image intrinsics to accept fat pointers, then
  49     // this could be true for some cases.
  50     return false;
  51   }
  52 };
  53
  54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
  55 public:
  56   explicit AMDGPUBufferPseudoSourceValue() :
  57     PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
  58
  59   bool isConstant(const MachineFrameInfo *) const override {
  60     // This should probably be true for most images, but we will start by being
  61     // conservative.
  62     return false;
  63   }
  64
  65   bool isAliased(const MachineFrameInfo *) const override {
  66     // FIXME: If we ever change image intrinsics to accept fat pointers, then
  67     // this could be true for some cases.
  68     return false;
  69   }
  70
  71   bool mayAlias(const MachineFrameInfo*) const override {
  72     // FIXME: If we ever change image intrinsics to accept fat pointers, then
  73     // this could be true for some cases.
  74     return false;
  75   }
  76 };
  77
  78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
  79 /// tells the hardware which interpolation parameters to load.
  80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
  81   // FIXME: This should be removed and getPreloadedValue moved here.
  82   friend class SIRegisterInfo;
  83
  84   unsigned TIDReg;
  85
  86   // Registers that may be reserved for spilling purposes. These may be the same
  87   // as the input registers.
  88   unsigned ScratchRSrcReg;
  89   unsigned ScratchWaveOffsetReg;
  90
  91   // This is the current function's incremented size from the kernel's scratch
  92   // wave offset register. For an entry function, this is exactly the same as
  93   // the ScratchWaveOffsetReg.
  94   unsigned FrameOffsetReg;
  95
  96   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
  97   unsigned StackPtrOffsetReg;
  98
  99   // Input registers for non-HSA ABI
 100   unsigned PrivateMemoryPtrUserSGPR;
 101
 102   // Input registers setup for the HSA ABI.
 103   // User SGPRs in allocation order.
 104   unsigned PrivateSegmentBufferUserSGPR;
 105   unsigned DispatchPtrUserSGPR;
 106   unsigned QueuePtrUserSGPR;
 107   unsigned KernargSegmentPtrUserSGPR;
 108   unsigned DispatchIDUserSGPR;
 109   unsigned FlatScratchInitUserSGPR;
 110   unsigned PrivateSegmentSizeUserSGPR;
 111   unsigned GridWorkGroupCountXUserSGPR;
 112   unsigned GridWorkGroupCountYUserSGPR;
 113   unsigned GridWorkGroupCountZUserSGPR;
 114
 115   // System SGPRs in allocation order.
 116   unsigned WorkGroupIDXSystemSGPR;
 117   unsigned WorkGroupIDYSystemSGPR;
 118   unsigned WorkGroupIDZSystemSGPR;
 119   unsigned WorkGroupInfoSystemSGPR;
 120   unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
 121
 122   // Graphics info.
 123   unsigned PSInputAddr;
 124   unsigned PSInputEnable;
 125
 126   bool ReturnsVoid;
 127
 128   // A pair of default/requested minimum/maximum flat work group sizes.
 129   // Minimum - first, maximum - second.
 130   std::pair<unsigned, unsigned> FlatWorkGroupSizes;
 131
 132   // A pair of default/requested minimum/maximum number of waves per execution
 133   // unit. Minimum - first, maximum - second.
 134   std::pair<unsigned, unsigned> WavesPerEU;
 135
 136   // Stack object indices for work group IDs.
 137   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
 138   // Stack object indices for work item IDs.
 139   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
 140
 141   AMDGPUBufferPseudoSourceValue BufferPSV;
 142   AMDGPUImagePseudoSourceValue ImagePSV;
 143
 144 private:
 145   unsigned LDSWaveSpillSize;
 146   unsigned ScratchOffsetReg;
 147   unsigned NumUserSGPRs;
 148   unsigned NumSystemSGPRs;
 149
 150   bool HasSpilledSGPRs;
 151   bool HasSpilledVGPRs;
 152   bool HasNonSpillStackObjects;
 153
 154   unsigned NumSpilledSGPRs;
 155   unsigned NumSpilledVGPRs;
 156
 157   // Feature bits required for inputs passed in user SGPRs.
 158   bool PrivateSegmentBuffer : 1;
 159   bool DispatchPtr : 1;
 160   bool QueuePtr : 1;
 161   bool KernargSegmentPtr : 1;
 162   bool DispatchID : 1;
 163   bool FlatScratchInit : 1;
 164   bool GridWorkgroupCountX : 1;
 165   bool GridWorkgroupCountY : 1;
 166   bool GridWorkgroupCountZ : 1;
 167
 168   // Feature bits required for inputs passed in system SGPRs.
 169   bool WorkGroupIDX : 1; // Always initialized.
 170   bool WorkGroupIDY : 1;
 171   bool WorkGroupIDZ : 1;
 172   bool WorkGroupInfo : 1;
 173   bool PrivateSegmentWaveByteOffset : 1;
 174
 175   bool WorkItemIDX : 1; // Always initialized.
 176   bool WorkItemIDY : 1;
 177   bool WorkItemIDZ : 1;
 178
 179   // Private memory buffer
 180   // Compute directly in sgpr[0:1]
 181   // Other shaders indirect 64-bits at sgpr[0:1]
 182   bool PrivateMemoryInputPtr : 1;
 183
 184   MCPhysReg getNextUserSGPR() const {
 185     assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
 186     return AMDGPU::SGPR0 + NumUserSGPRs;
 187   }
 188
 189   MCPhysReg getNextSystemSGPR() const {
 190     return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
 191   }
 192
 193 public:
 194   struct SpilledReg {
 195     unsigned VGPR = AMDGPU::NoRegister;
 196     int Lane = -1;
 197
 198     SpilledReg() = default;
 199     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
 200
 201     bool hasLane() { return Lane != -1;}
 202     bool hasReg() { return VGPR != AMDGPU::NoRegister;}
 203   };
 204
 205 private:
 206   // SGPR->VGPR spilling support.
 207   typedef std::pair<unsigned, unsigned> SpillRegMask;
 208
 209   // Track VGPR + wave index for each subregister of the SGPR spilled to
 210   // frameindex key.
 211   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
 212   unsigned NumVGPRSpillLanes = 0;
 213   SmallVector<unsigned, 2> SpillVGPRs;
 214
 215 public:
 216
 217   SIMachineFunctionInfo(const MachineFunction &MF);
 218
 219   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
 220     auto I = SGPRToVGPRSpills.find(FrameIndex);
 221     return (I == SGPRToVGPRSpills.end()) ?
 222       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
 223   }
 224
 225   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
 226   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
 227
 228   bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
 229   unsigned getTIDReg() const { return TIDReg; };
 230   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
 231
 232   // Add user SGPRs.
 233   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
 234   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
 235   unsigned addQueuePtr(const SIRegisterInfo &TRI);
 236   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
 237   unsigned addDispatchID(const SIRegisterInfo &TRI);
 238   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
 239   unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
 240
 241   // Add system SGPRs.
 242   unsigned addWorkGroupIDX() {
 243     WorkGroupIDXSystemSGPR = getNextSystemSGPR();
 244     NumSystemSGPRs += 1;
 245     return WorkGroupIDXSystemSGPR;
 246   }
 247
 248   unsigned addWorkGroupIDY() {
 249     WorkGroupIDYSystemSGPR = getNextSystemSGPR();
 250     NumSystemSGPRs += 1;
 251     return WorkGroupIDYSystemSGPR;
 252   }
 253
 254   unsigned addWorkGroupIDZ() {
 255     WorkGroupIDZSystemSGPR = getNextSystemSGPR();
 256     NumSystemSGPRs += 1;
 257     return WorkGroupIDZSystemSGPR;
 258   }
 259
 260   unsigned addWorkGroupInfo() {
 261     WorkGroupInfoSystemSGPR = getNextSystemSGPR();
 262     NumSystemSGPRs += 1;
 263     return WorkGroupInfoSystemSGPR;
 264   }
 265
 266   unsigned addPrivateSegmentWaveByteOffset() {
 267     PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
 268     NumSystemSGPRs += 1;
 269     return PrivateSegmentWaveByteOffsetSystemSGPR;
 270   }
 271
 272   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
 273     PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
 274   }
 275
 276   bool hasPrivateSegmentBuffer() const {
 277     return PrivateSegmentBuffer;
 278   }
 279
 280   bool hasDispatchPtr() const {
 281     return DispatchPtr;
 282   }
 283
 284   bool hasQueuePtr() const {
 285     return QueuePtr;
 286   }
 287
 288   bool hasKernargSegmentPtr() const {
 289     return KernargSegmentPtr;
 290   }
 291
 292   bool hasDispatchID() const {
 293     return DispatchID;
 294   }
 295
 296   bool hasFlatScratchInit() const {
 297     return FlatScratchInit;
 298   }
 299
 300   bool hasGridWorkgroupCountX() const {
 301     return GridWorkgroupCountX;
 302   }
 303
 304   bool hasGridWorkgroupCountY() const {
 305     return GridWorkgroupCountY;
 306   }
 307
 308   bool hasGridWorkgroupCountZ() const {
 309     return GridWorkgroupCountZ;
 310   }
 311
 312   bool hasWorkGroupIDX() const {
 313     return WorkGroupIDX;
 314   }
 315
 316   bool hasWorkGroupIDY() const {
 317     return WorkGroupIDY;
 318   }
 319
 320   bool hasWorkGroupIDZ() const {
 321     return WorkGroupIDZ;
 322   }
 323
 324   bool hasWorkGroupInfo() const {
 325     return WorkGroupInfo;
 326   }
 327
 328   bool hasPrivateSegmentWaveByteOffset() const {
 329     return PrivateSegmentWaveByteOffset;
 330   }
 331
 332   bool hasWorkItemIDX() const {
 333     return WorkItemIDX;
 334   }
 335
 336   bool hasWorkItemIDY() const {
 337     return WorkItemIDY;
 338   }
 339
 340   bool hasWorkItemIDZ() const {
 341     return WorkItemIDZ;
 342   }
 343
 344   bool hasPrivateMemoryInputPtr() const {
 345     return PrivateMemoryInputPtr;
 346   }
 347
 348   unsigned getNumUserSGPRs() const {
 349     return NumUserSGPRs;
 350   }
 351
 352   unsigned getNumPreloadedSGPRs() const {
 353     return NumUserSGPRs + NumSystemSGPRs;
 354   }
 355
 356   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
 357     return PrivateSegmentWaveByteOffsetSystemSGPR;
 358   }
 359
 360   /// \brief Returns the physical register reserved for use as the resource
 361   /// descriptor for scratch accesses.
 362   unsigned getScratchRSrcReg() const {
 363     return ScratchRSrcReg;
 364   }
 365
 366   void setScratchRSrcReg(unsigned Reg) {
 367     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
 368     ScratchRSrcReg = Reg;
 369   }
 370
 371   unsigned getScratchWaveOffsetReg() const {
 372     return ScratchWaveOffsetReg;
 373   }
 374
 375   unsigned getFrameOffsetReg() const {
 376     return FrameOffsetReg;
 377   }
 378
 379   void setStackPtrOffsetReg(unsigned Reg) {
 380     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
 381     StackPtrOffsetReg = Reg;
 382   }
 383
 384   unsigned getStackPtrOffsetReg() const {
 385     return StackPtrOffsetReg;
 386   }
 387
 388   void setScratchWaveOffsetReg(unsigned Reg) {
 389     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
 390     ScratchWaveOffsetReg = Reg;
 391     if (isEntryFunction())
 392       FrameOffsetReg = ScratchWaveOffsetReg;
 393   }
 394
 395   unsigned getQueuePtrUserSGPR() const {
 396     return QueuePtrUserSGPR;
 397   }
 398
 399   unsigned getPrivateMemoryPtrUserSGPR() const {
 400     return PrivateMemoryPtrUserSGPR;
 401   }
 402
 403   bool hasSpilledSGPRs() const {
 404     return HasSpilledSGPRs;
 405   }
 406
 407   void setHasSpilledSGPRs(bool Spill = true) {
 408     HasSpilledSGPRs = Spill;
 409   }
 410
 411   bool hasSpilledVGPRs() const {
 412     return HasSpilledVGPRs;
 413   }
 414
 415   void setHasSpilledVGPRs(bool Spill = true) {
 416     HasSpilledVGPRs = Spill;
 417   }
 418
 419   bool hasNonSpillStackObjects() const {
 420     return HasNonSpillStackObjects;
 421   }
 422
 423   void setHasNonSpillStackObjects(bool StackObject = true) {
 424     HasNonSpillStackObjects = StackObject;
 425   }
 426
 427   unsigned getNumSpilledSGPRs() const {
 428     return NumSpilledSGPRs;
 429   }
 430
 431   unsigned getNumSpilledVGPRs() const {
 432     return NumSpilledVGPRs;
 433   }
 434
 435   void addToSpilledSGPRs(unsigned num) {
 436     NumSpilledSGPRs += num;
 437   }
 438
 439   void addToSpilledVGPRs(unsigned num) {
 440     NumSpilledVGPRs += num;
 441   }
 442
 443   unsigned getPSInputAddr() const {
 444     return PSInputAddr;
 445   }
 446
 447   unsigned getPSInputEnable() const {
 448     return PSInputEnable;
 449   }
 450
 451   bool isPSInputAllocated(unsigned Index) const {
 452     return PSInputAddr & (1 << Index);
 453   }
 454
 455   void markPSInputAllocated(unsigned Index) {
 456     PSInputAddr |= 1 << Index;
 457   }
 458
 459   void markPSInputEnabled(unsigned Index) {
 460     PSInputEnable |= 1 << Index;
 461   }
 462
 463   bool returnsVoid() const {
 464     return ReturnsVoid;
 465   }
 466
 467   void setIfReturnsVoid(bool Value) {
 468     ReturnsVoid = Value;
 469   }
 470
 471   /// \returns A pair of default/requested minimum/maximum flat work group sizes
 472   /// for this function.
 473   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
 474     return FlatWorkGroupSizes;
 475   }
 476
 477   /// \returns Default/requested minimum flat work group size for this function.
 478   unsigned getMinFlatWorkGroupSize() const {
 479     return FlatWorkGroupSizes.first;
 480   }
 481
 482   /// \returns Default/requested maximum flat work group size for this function.
 483   unsigned getMaxFlatWorkGroupSize() const {
 484     return FlatWorkGroupSizes.second;
 485   }
 486
 487   /// \returns A pair of default/requested minimum/maximum number of waves per
 488   /// execution unit.
 489   std::pair<unsigned, unsigned> getWavesPerEU() const {
 490     return WavesPerEU;
 491   }
 492
 493   /// \returns Default/requested minimum number of waves per execution unit.
 494   unsigned getMinWavesPerEU() const {
 495     return WavesPerEU.first;
 496   }
 497
 498   /// \returns Default/requested maximum number of waves per execution unit.
 499   unsigned getMaxWavesPerEU() const {
 500     return WavesPerEU.second;
 501   }
 502
 503   /// \returns Stack object index for \p Dim's work group ID.
 504   int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
 505     assert(Dim < 3);
 506     return DebuggerWorkGroupIDStackObjectIndices[Dim];
 507   }
 508
 509   /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
 510   void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
 511     assert(Dim < 3);
 512     DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
 513   }
 514
 515   /// \returns Stack object index for \p Dim's work item ID.
 516   int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
 517     assert(Dim < 3);
 518     return DebuggerWorkItemIDStackObjectIndices[Dim];
 519   }
 520
 521   /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
 522   void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
 523     assert(Dim < 3);
 524     DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
 525   }
 526
 527   /// \returns SGPR used for \p Dim's work group ID.
 528   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
 529     switch (Dim) {
 530     case 0:
 531       assert(hasWorkGroupIDX());
 532       return WorkGroupIDXSystemSGPR;
 533     case 1:
 534       assert(hasWorkGroupIDY());
 535       return WorkGroupIDYSystemSGPR;
 536     case 2:
 537       assert(hasWorkGroupIDZ());
 538       return WorkGroupIDZSystemSGPR;
 539     }
 540     llvm_unreachable("unexpected dimension");
 541   }
 542
 543   /// \returns VGPR used for \p Dim' work item ID.
 544   unsigned getWorkItemIDVGPR(unsigned Dim) const {
 545     switch (Dim) {
 546     case 0:
 547       assert(hasWorkItemIDX());
 548       return AMDGPU::VGPR0;
 549     case 1:
 550       assert(hasWorkItemIDY());
 551       return AMDGPU::VGPR1;
 552     case 2:
 553       assert(hasWorkItemIDZ());
 554       return AMDGPU::VGPR2;
 555     }
 556     llvm_unreachable("unexpected dimension");
 557   }
 558
 559   unsigned getLDSWaveSpillSize() const {
 560     return LDSWaveSpillSize;
 561   }
 562
 563   const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
 564     return &BufferPSV;
 565   }
 566
 567   const AMDGPUImagePseudoSourceValue *getImagePSV() const {
 568     return &ImagePSV;
 569   }
 570 };
 571
 572 } // end namespace llvm
 573
 574 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H