1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
17 #include "AMDGPUMachineFunction.h"
18 #include "SIRegisterInfo.h"
19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20 #include "llvm/CodeGen/PseudoSourceValue.h"
21 #include "llvm/MC/MCRegisterInfo.h"
22 #include "llvm/Support/ErrorHandling.h"
30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
32 explicit AMDGPUImagePseudoSourceValue() :
33 PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
35 bool isConstant(const MachineFrameInfo *) const override {
36 // This should probably be true for most images, but we will start by being
41 bool isAliased(const MachineFrameInfo *) const override {
42 // FIXME: If we ever change image intrinsics to accept fat pointers, then
43 // this could be true for some cases.
47 bool mayAlias(const MachineFrameInfo*) const override {
48 // FIXME: If we ever change image intrinsics to accept fat pointers, then
49 // this could be true for some cases.
54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
56 explicit AMDGPUBufferPseudoSourceValue() :
57 PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
59 bool isConstant(const MachineFrameInfo *) const override {
60 // This should probably be true for most images, but we will start by being
65 bool isAliased(const MachineFrameInfo *) const override {
66 // FIXME: If we ever change image intrinsics to accept fat pointers, then
67 // this could be true for some cases.
71 bool mayAlias(const MachineFrameInfo*) const override {
72 // FIXME: If we ever change image intrinsics to accept fat pointers, then
73 // this could be true for some cases.
78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
79 /// tells the hardware which interpolation parameters to load.
80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
81 // FIXME: This should be removed and getPreloadedValue moved here.
82 friend class SIRegisterInfo;
86 // Registers that may be reserved for spilling purposes. These may be the same
87 // as the input registers.
88 unsigned ScratchRSrcReg;
89 unsigned ScratchWaveOffsetReg;
91 // This is the current function's incremented size from the kernel's scratch
92 // wave offset register. For an entry function, this is exactly the same as
93 // the ScratchWaveOffsetReg.
94 unsigned FrameOffsetReg;
96 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
97 unsigned StackPtrOffsetReg;
99 // Input registers for non-HSA ABI
100 unsigned PrivateMemoryPtrUserSGPR;
102 // Input registers setup for the HSA ABI.
103 // User SGPRs in allocation order.
104 unsigned PrivateSegmentBufferUserSGPR;
105 unsigned DispatchPtrUserSGPR;
106 unsigned QueuePtrUserSGPR;
107 unsigned KernargSegmentPtrUserSGPR;
108 unsigned DispatchIDUserSGPR;
109 unsigned FlatScratchInitUserSGPR;
110 unsigned PrivateSegmentSizeUserSGPR;
111 unsigned GridWorkGroupCountXUserSGPR;
112 unsigned GridWorkGroupCountYUserSGPR;
113 unsigned GridWorkGroupCountZUserSGPR;
115 // System SGPRs in allocation order.
116 unsigned WorkGroupIDXSystemSGPR;
117 unsigned WorkGroupIDYSystemSGPR;
118 unsigned WorkGroupIDZSystemSGPR;
119 unsigned WorkGroupInfoSystemSGPR;
120 unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
123 unsigned PSInputAddr;
124 unsigned PSInputEnable;
128 // A pair of default/requested minimum/maximum flat work group sizes.
129 // Minimum - first, maximum - second.
130 std::pair<unsigned, unsigned> FlatWorkGroupSizes;
132 // A pair of default/requested minimum/maximum number of waves per execution
133 // unit. Minimum - first, maximum - second.
134 std::pair<unsigned, unsigned> WavesPerEU;
136 // Stack object indices for work group IDs.
137 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
138 // Stack object indices for work item IDs.
139 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
141 AMDGPUBufferPseudoSourceValue BufferPSV;
142 AMDGPUImagePseudoSourceValue ImagePSV;
145 unsigned LDSWaveSpillSize;
146 unsigned ScratchOffsetReg;
147 unsigned NumUserSGPRs;
148 unsigned NumSystemSGPRs;
150 bool HasSpilledSGPRs;
151 bool HasSpilledVGPRs;
152 bool HasNonSpillStackObjects;
154 unsigned NumSpilledSGPRs;
155 unsigned NumSpilledVGPRs;
157 // Feature bits required for inputs passed in user SGPRs.
158 bool PrivateSegmentBuffer : 1;
159 bool DispatchPtr : 1;
161 bool KernargSegmentPtr : 1;
163 bool FlatScratchInit : 1;
164 bool GridWorkgroupCountX : 1;
165 bool GridWorkgroupCountY : 1;
166 bool GridWorkgroupCountZ : 1;
168 // Feature bits required for inputs passed in system SGPRs.
169 bool WorkGroupIDX : 1; // Always initialized.
170 bool WorkGroupIDY : 1;
171 bool WorkGroupIDZ : 1;
172 bool WorkGroupInfo : 1;
173 bool PrivateSegmentWaveByteOffset : 1;
175 bool WorkItemIDX : 1; // Always initialized.
176 bool WorkItemIDY : 1;
177 bool WorkItemIDZ : 1;
179 // Private memory buffer
180 // Compute directly in sgpr[0:1]
181 // Other shaders indirect 64-bits at sgpr[0:1]
182 bool PrivateMemoryInputPtr : 1;
184 MCPhysReg getNextUserSGPR() const {
185 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
186 return AMDGPU::SGPR0 + NumUserSGPRs;
189 MCPhysReg getNextSystemSGPR() const {
190 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
195 unsigned VGPR = AMDGPU::NoRegister;
198 SpilledReg() = default;
199 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
201 bool hasLane() { return Lane != -1;}
202 bool hasReg() { return VGPR != AMDGPU::NoRegister;}
206 // SGPR->VGPR spilling support.
207 typedef std::pair<unsigned, unsigned> SpillRegMask;
209 // Track VGPR + wave index for each subregister of the SGPR spilled to
211 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
212 unsigned NumVGPRSpillLanes = 0;
213 SmallVector<unsigned, 2> SpillVGPRs;
217 SIMachineFunctionInfo(const MachineFunction &MF);
219 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
220 auto I = SGPRToVGPRSpills.find(FrameIndex);
221 return (I == SGPRToVGPRSpills.end()) ?
222 ArrayRef<SpilledReg>() : makeArrayRef(I->second);
225 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
226 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
228 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
229 unsigned getTIDReg() const { return TIDReg; };
230 void setTIDReg(unsigned Reg) { TIDReg = Reg; }
233 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
234 unsigned addDispatchPtr(const SIRegisterInfo &TRI);
235 unsigned addQueuePtr(const SIRegisterInfo &TRI);
236 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
237 unsigned addDispatchID(const SIRegisterInfo &TRI);
238 unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
239 unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
242 unsigned addWorkGroupIDX() {
243 WorkGroupIDXSystemSGPR = getNextSystemSGPR();
245 return WorkGroupIDXSystemSGPR;
248 unsigned addWorkGroupIDY() {
249 WorkGroupIDYSystemSGPR = getNextSystemSGPR();
251 return WorkGroupIDYSystemSGPR;
254 unsigned addWorkGroupIDZ() {
255 WorkGroupIDZSystemSGPR = getNextSystemSGPR();
257 return WorkGroupIDZSystemSGPR;
260 unsigned addWorkGroupInfo() {
261 WorkGroupInfoSystemSGPR = getNextSystemSGPR();
263 return WorkGroupInfoSystemSGPR;
266 unsigned addPrivateSegmentWaveByteOffset() {
267 PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
269 return PrivateSegmentWaveByteOffsetSystemSGPR;
272 void setPrivateSegmentWaveByteOffset(unsigned Reg) {
273 PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
276 bool hasPrivateSegmentBuffer() const {
277 return PrivateSegmentBuffer;
280 bool hasDispatchPtr() const {
284 bool hasQueuePtr() const {
288 bool hasKernargSegmentPtr() const {
289 return KernargSegmentPtr;
292 bool hasDispatchID() const {
296 bool hasFlatScratchInit() const {
297 return FlatScratchInit;
300 bool hasGridWorkgroupCountX() const {
301 return GridWorkgroupCountX;
304 bool hasGridWorkgroupCountY() const {
305 return GridWorkgroupCountY;
308 bool hasGridWorkgroupCountZ() const {
309 return GridWorkgroupCountZ;
312 bool hasWorkGroupIDX() const {
316 bool hasWorkGroupIDY() const {
320 bool hasWorkGroupIDZ() const {
324 bool hasWorkGroupInfo() const {
325 return WorkGroupInfo;
328 bool hasPrivateSegmentWaveByteOffset() const {
329 return PrivateSegmentWaveByteOffset;
332 bool hasWorkItemIDX() const {
336 bool hasWorkItemIDY() const {
340 bool hasWorkItemIDZ() const {
344 bool hasPrivateMemoryInputPtr() const {
345 return PrivateMemoryInputPtr;
348 unsigned getNumUserSGPRs() const {
352 unsigned getNumPreloadedSGPRs() const {
353 return NumUserSGPRs + NumSystemSGPRs;
356 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
357 return PrivateSegmentWaveByteOffsetSystemSGPR;
360 /// \brief Returns the physical register reserved for use as the resource
361 /// descriptor for scratch accesses.
362 unsigned getScratchRSrcReg() const {
363 return ScratchRSrcReg;
366 void setScratchRSrcReg(unsigned Reg) {
367 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
368 ScratchRSrcReg = Reg;
371 unsigned getScratchWaveOffsetReg() const {
372 return ScratchWaveOffsetReg;
375 unsigned getFrameOffsetReg() const {
376 return FrameOffsetReg;
379 void setStackPtrOffsetReg(unsigned Reg) {
380 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
381 StackPtrOffsetReg = Reg;
384 unsigned getStackPtrOffsetReg() const {
385 return StackPtrOffsetReg;
388 void setScratchWaveOffsetReg(unsigned Reg) {
389 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
390 ScratchWaveOffsetReg = Reg;
391 if (isEntryFunction())
392 FrameOffsetReg = ScratchWaveOffsetReg;
395 unsigned getQueuePtrUserSGPR() const {
396 return QueuePtrUserSGPR;
399 unsigned getPrivateMemoryPtrUserSGPR() const {
400 return PrivateMemoryPtrUserSGPR;
403 bool hasSpilledSGPRs() const {
404 return HasSpilledSGPRs;
407 void setHasSpilledSGPRs(bool Spill = true) {
408 HasSpilledSGPRs = Spill;
411 bool hasSpilledVGPRs() const {
412 return HasSpilledVGPRs;
415 void setHasSpilledVGPRs(bool Spill = true) {
416 HasSpilledVGPRs = Spill;
419 bool hasNonSpillStackObjects() const {
420 return HasNonSpillStackObjects;
423 void setHasNonSpillStackObjects(bool StackObject = true) {
424 HasNonSpillStackObjects = StackObject;
427 unsigned getNumSpilledSGPRs() const {
428 return NumSpilledSGPRs;
431 unsigned getNumSpilledVGPRs() const {
432 return NumSpilledVGPRs;
435 void addToSpilledSGPRs(unsigned num) {
436 NumSpilledSGPRs += num;
439 void addToSpilledVGPRs(unsigned num) {
440 NumSpilledVGPRs += num;
443 unsigned getPSInputAddr() const {
447 unsigned getPSInputEnable() const {
448 return PSInputEnable;
451 bool isPSInputAllocated(unsigned Index) const {
452 return PSInputAddr & (1 << Index);
455 void markPSInputAllocated(unsigned Index) {
456 PSInputAddr |= 1 << Index;
459 void markPSInputEnabled(unsigned Index) {
460 PSInputEnable |= 1 << Index;
463 bool returnsVoid() const {
467 void setIfReturnsVoid(bool Value) {
471 /// \returns A pair of default/requested minimum/maximum flat work group sizes
472 /// for this function.
473 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
474 return FlatWorkGroupSizes;
477 /// \returns Default/requested minimum flat work group size for this function.
478 unsigned getMinFlatWorkGroupSize() const {
479 return FlatWorkGroupSizes.first;
482 /// \returns Default/requested maximum flat work group size for this function.
483 unsigned getMaxFlatWorkGroupSize() const {
484 return FlatWorkGroupSizes.second;
487 /// \returns A pair of default/requested minimum/maximum number of waves per
489 std::pair<unsigned, unsigned> getWavesPerEU() const {
493 /// \returns Default/requested minimum number of waves per execution unit.
494 unsigned getMinWavesPerEU() const {
495 return WavesPerEU.first;
498 /// \returns Default/requested maximum number of waves per execution unit.
499 unsigned getMaxWavesPerEU() const {
500 return WavesPerEU.second;
503 /// \returns Stack object index for \p Dim's work group ID.
504 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
506 return DebuggerWorkGroupIDStackObjectIndices[Dim];
509 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
510 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
512 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
515 /// \returns Stack object index for \p Dim's work item ID.
516 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
518 return DebuggerWorkItemIDStackObjectIndices[Dim];
521 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
522 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
524 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
527 /// \returns SGPR used for \p Dim's work group ID.
528 unsigned getWorkGroupIDSGPR(unsigned Dim) const {
531 assert(hasWorkGroupIDX());
532 return WorkGroupIDXSystemSGPR;
534 assert(hasWorkGroupIDY());
535 return WorkGroupIDYSystemSGPR;
537 assert(hasWorkGroupIDZ());
538 return WorkGroupIDZSystemSGPR;
540 llvm_unreachable("unexpected dimension");
543 /// \returns VGPR used for \p Dim' work item ID.
544 unsigned getWorkItemIDVGPR(unsigned Dim) const {
547 assert(hasWorkItemIDX());
548 return AMDGPU::VGPR0;
550 assert(hasWorkItemIDY());
551 return AMDGPU::VGPR1;
553 assert(hasWorkItemIDZ());
554 return AMDGPU::VGPR2;
556 llvm_unreachable("unexpected dimension");
559 unsigned getLDSWaveSpillSize() const {
560 return LDSWaveSpillSize;
563 const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
567 const AMDGPUImagePseudoSourceValue *getImagePSV() const {
572 } // end namespace llvm
574 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H