1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
17 #include "AMDGPUArgumentUsageInfo.h"
18 #include "AMDGPUMachineFunction.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/MC/MCRegisterInfo.h"
27 #include "llvm/Support/ErrorHandling.h"
35 class MachineFrameInfo;
36 class MachineFunction;
38 class TargetRegisterClass;
40 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
42 // TODO: Is the img rsrc useful?
43 explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
44 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
46 bool isConstant(const MachineFrameInfo *) const override {
47 // This should probably be true for most images, but we will start by being
52 bool isAliased(const MachineFrameInfo *) const override {
56 bool mayAlias(const MachineFrameInfo *) const override {
61 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
63 explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
64 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
66 bool isConstant(const MachineFrameInfo *) const override {
67 // This should probably be true for most images, but we will start by being
72 bool isAliased(const MachineFrameInfo *) const override {
73 // FIXME: If we ever change image intrinsics to accept fat pointers, then
74 // this could be true for some cases.
78 bool mayAlias(const MachineFrameInfo *) const override {
79 // FIXME: If we ever change image intrinsics to accept fat pointers, then
80 // this could be true for some cases.
85 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
86 /// tells the hardware which interpolation parameters to load.
87 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
88 unsigned TIDReg = AMDGPU::NoRegister;
90 // Registers that may be reserved for spilling purposes. These may be the same
91 // as the input registers.
92 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
93 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
95 // This is the current function's incremented size from the kernel's scratch
96 // wave offset register. For an entry function, this is exactly the same as
97 // the ScratchWaveOffsetReg.
98 unsigned FrameOffsetReg = AMDGPU::FP_REG;
100 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
101 unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
103 AMDGPUFunctionArgInfo ArgInfo;
106 unsigned PSInputAddr = 0;
107 unsigned PSInputEnable = 0;
109 /// Number of bytes of arguments this function has on the stack. If the callee
110 /// is expected to restore the argument stack this should be a multiple of 16,
111 /// all usable during a tail call.
113 /// The alternative would forbid tail call optimisation in some cases: if we
114 /// want to transfer control from a function with 8-bytes of stack-argument
115 /// space to a function with 16-bytes then misalignment of this value would
116 /// make a stack adjustment necessary, which could not be undone by the
118 unsigned BytesInStackArgArea = 0;
120 bool ReturnsVoid = true;
122 // A pair of default/requested minimum/maximum flat work group sizes.
123 // Minimum - first, maximum - second.
124 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
126 // A pair of default/requested minimum/maximum number of waves per execution
127 // unit. Minimum - first, maximum - second.
128 std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
130 // Stack object indices for work group IDs.
131 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
133 // Stack object indices for work item IDs.
134 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
136 DenseMap<const Value *,
137 std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
138 DenseMap<const Value *,
139 std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
142 unsigned LDSWaveSpillSize = 0;
143 unsigned NumUserSGPRs = 0;
144 unsigned NumSystemSGPRs = 0;
146 bool HasSpilledSGPRs = false;
147 bool HasSpilledVGPRs = false;
148 bool HasNonSpillStackObjects = false;
150 unsigned NumSpilledSGPRs = 0;
151 unsigned NumSpilledVGPRs = 0;
153 // Feature bits required for inputs passed in user SGPRs.
154 bool PrivateSegmentBuffer : 1;
155 bool DispatchPtr : 1;
157 bool KernargSegmentPtr : 1;
159 bool FlatScratchInit : 1;
160 bool GridWorkgroupCountX : 1;
161 bool GridWorkgroupCountY : 1;
162 bool GridWorkgroupCountZ : 1;
164 // Feature bits required for inputs passed in system SGPRs.
165 bool WorkGroupIDX : 1; // Always initialized.
166 bool WorkGroupIDY : 1;
167 bool WorkGroupIDZ : 1;
168 bool WorkGroupInfo : 1;
169 bool PrivateSegmentWaveByteOffset : 1;
171 bool WorkItemIDX : 1; // Always initialized.
172 bool WorkItemIDY : 1;
173 bool WorkItemIDZ : 1;
175 // Private memory buffer
176 // Compute directly in sgpr[0:1]
177 // Other shaders indirect 64-bits at sgpr[0:1]
178 bool ImplicitBufferPtr : 1;
180 // Pointer to where the ABI inserts special kernel arguments separate from the
181 // user arguments. This is an offset from the KernargSegmentPtr.
182 bool ImplicitArgPtr : 1;
184 // The hard-wired high half of the address of the global information table
185 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
186 // current hardware only allows a 16 bit value.
189 MCPhysReg getNextUserSGPR() const {
190 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
191 return AMDGPU::SGPR0 + NumUserSGPRs;
194 MCPhysReg getNextSystemSGPR() const {
195 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
200 unsigned VGPR = AMDGPU::NoRegister;
203 SpilledReg() = default;
204 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
206 bool hasLane() { return Lane != -1;}
207 bool hasReg() { return VGPR != AMDGPU::NoRegister;}
210 struct SGPRSpillVGPRCSR {
211 // VGPR used for SGPR spills
214 // If the VGPR is a CSR, the stack slot used to save/restore it in the
218 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
222 // SGPR->VGPR spilling support.
223 using SpillRegMask = std::pair<unsigned, unsigned>;
225 // Track VGPR + wave index for each subregister of the SGPR spilled to
227 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
228 unsigned NumVGPRSpillLanes = 0;
229 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
232 SIMachineFunctionInfo(const MachineFunction &MF);
234 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
235 auto I = SGPRToVGPRSpills.find(FrameIndex);
236 return (I == SGPRToVGPRSpills.end()) ?
237 ArrayRef<SpilledReg>() : makeArrayRef(I->second);
240 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
244 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
245 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
247 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }
248 unsigned getTIDReg() const { return TIDReg; }
249 void setTIDReg(unsigned Reg) { TIDReg = Reg; }
251 unsigned getBytesInStackArgArea() const {
252 return BytesInStackArgArea;
255 void setBytesInStackArgArea(unsigned Bytes) {
256 BytesInStackArgArea = Bytes;
260 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
261 unsigned addDispatchPtr(const SIRegisterInfo &TRI);
262 unsigned addQueuePtr(const SIRegisterInfo &TRI);
263 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
264 unsigned addDispatchID(const SIRegisterInfo &TRI);
265 unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
266 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
269 unsigned addWorkGroupIDX() {
270 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
272 return ArgInfo.WorkGroupIDX.getRegister();
275 unsigned addWorkGroupIDY() {
276 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
278 return ArgInfo.WorkGroupIDY.getRegister();
281 unsigned addWorkGroupIDZ() {
282 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
284 return ArgInfo.WorkGroupIDZ.getRegister();
287 unsigned addWorkGroupInfo() {
288 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
290 return ArgInfo.WorkGroupInfo.getRegister();
293 // Add special VGPR inputs
294 void setWorkItemIDX(ArgDescriptor Arg) {
295 ArgInfo.WorkItemIDX = Arg;
298 void setWorkItemIDY(ArgDescriptor Arg) {
299 ArgInfo.WorkItemIDY = Arg;
302 void setWorkItemIDZ(ArgDescriptor Arg) {
303 ArgInfo.WorkItemIDZ = Arg;
306 unsigned addPrivateSegmentWaveByteOffset() {
307 ArgInfo.PrivateSegmentWaveByteOffset
308 = ArgDescriptor::createRegister(getNextSystemSGPR());
310 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
313 void setPrivateSegmentWaveByteOffset(unsigned Reg) {
314 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
317 bool hasPrivateSegmentBuffer() const {
318 return PrivateSegmentBuffer;
321 bool hasDispatchPtr() const {
325 bool hasQueuePtr() const {
329 bool hasKernargSegmentPtr() const {
330 return KernargSegmentPtr;
333 bool hasDispatchID() const {
337 bool hasFlatScratchInit() const {
338 return FlatScratchInit;
341 bool hasGridWorkgroupCountX() const {
342 return GridWorkgroupCountX;
345 bool hasGridWorkgroupCountY() const {
346 return GridWorkgroupCountY;
349 bool hasGridWorkgroupCountZ() const {
350 return GridWorkgroupCountZ;
353 bool hasWorkGroupIDX() const {
357 bool hasWorkGroupIDY() const {
361 bool hasWorkGroupIDZ() const {
365 bool hasWorkGroupInfo() const {
366 return WorkGroupInfo;
369 bool hasPrivateSegmentWaveByteOffset() const {
370 return PrivateSegmentWaveByteOffset;
373 bool hasWorkItemIDX() const {
377 bool hasWorkItemIDY() const {
381 bool hasWorkItemIDZ() const {
385 bool hasImplicitArgPtr() const {
386 return ImplicitArgPtr;
389 bool hasImplicitBufferPtr() const {
390 return ImplicitBufferPtr;
393 AMDGPUFunctionArgInfo &getArgInfo() {
397 const AMDGPUFunctionArgInfo &getArgInfo() const {
401 std::pair<const ArgDescriptor *, const TargetRegisterClass *>
402 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
403 return ArgInfo.getPreloadedValue(Value);
406 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
407 return ArgInfo.getPreloadedValue(Value).first->getRegister();
410 unsigned getGITPtrHigh() const {
414 unsigned getNumUserSGPRs() const {
418 unsigned getNumPreloadedSGPRs() const {
419 return NumUserSGPRs + NumSystemSGPRs;
422 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
423 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
426 /// \brief Returns the physical register reserved for use as the resource
427 /// descriptor for scratch accesses.
428 unsigned getScratchRSrcReg() const {
429 return ScratchRSrcReg;
432 void setScratchRSrcReg(unsigned Reg) {
433 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
434 ScratchRSrcReg = Reg;
437 unsigned getScratchWaveOffsetReg() const {
438 return ScratchWaveOffsetReg;
441 unsigned getFrameOffsetReg() const {
442 return FrameOffsetReg;
445 void setStackPtrOffsetReg(unsigned Reg) {
446 StackPtrOffsetReg = Reg;
449 // Note the unset value for this is AMDGPU::SP_REG rather than
450 // NoRegister. This is mostly a workaround for MIR tests where state that
451 // can't be directly computed from the function is not preserved in serialized
453 unsigned getStackPtrOffsetReg() const {
454 return StackPtrOffsetReg;
457 void setScratchWaveOffsetReg(unsigned Reg) {
458 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
459 ScratchWaveOffsetReg = Reg;
460 if (isEntryFunction())
461 FrameOffsetReg = ScratchWaveOffsetReg;
464 unsigned getQueuePtrUserSGPR() const {
465 return ArgInfo.QueuePtr.getRegister();
468 unsigned getImplicitBufferPtrUserSGPR() const {
469 return ArgInfo.ImplicitBufferPtr.getRegister();
472 bool hasSpilledSGPRs() const {
473 return HasSpilledSGPRs;
476 void setHasSpilledSGPRs(bool Spill = true) {
477 HasSpilledSGPRs = Spill;
480 bool hasSpilledVGPRs() const {
481 return HasSpilledVGPRs;
484 void setHasSpilledVGPRs(bool Spill = true) {
485 HasSpilledVGPRs = Spill;
488 bool hasNonSpillStackObjects() const {
489 return HasNonSpillStackObjects;
492 void setHasNonSpillStackObjects(bool StackObject = true) {
493 HasNonSpillStackObjects = StackObject;
496 unsigned getNumSpilledSGPRs() const {
497 return NumSpilledSGPRs;
500 unsigned getNumSpilledVGPRs() const {
501 return NumSpilledVGPRs;
504 void addToSpilledSGPRs(unsigned num) {
505 NumSpilledSGPRs += num;
508 void addToSpilledVGPRs(unsigned num) {
509 NumSpilledVGPRs += num;
512 unsigned getPSInputAddr() const {
516 unsigned getPSInputEnable() const {
517 return PSInputEnable;
520 bool isPSInputAllocated(unsigned Index) const {
521 return PSInputAddr & (1 << Index);
524 void markPSInputAllocated(unsigned Index) {
525 PSInputAddr |= 1 << Index;
528 void markPSInputEnabled(unsigned Index) {
529 PSInputEnable |= 1 << Index;
532 bool returnsVoid() const {
536 void setIfReturnsVoid(bool Value) {
540 /// \returns A pair of default/requested minimum/maximum flat work group sizes
541 /// for this function.
542 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
543 return FlatWorkGroupSizes;
546 /// \returns Default/requested minimum flat work group size for this function.
547 unsigned getMinFlatWorkGroupSize() const {
548 return FlatWorkGroupSizes.first;
551 /// \returns Default/requested maximum flat work group size for this function.
552 unsigned getMaxFlatWorkGroupSize() const {
553 return FlatWorkGroupSizes.second;
556 /// \returns A pair of default/requested minimum/maximum number of waves per
558 std::pair<unsigned, unsigned> getWavesPerEU() const {
562 /// \returns Default/requested minimum number of waves per execution unit.
563 unsigned getMinWavesPerEU() const {
564 return WavesPerEU.first;
567 /// \returns Default/requested maximum number of waves per execution unit.
568 unsigned getMaxWavesPerEU() const {
569 return WavesPerEU.second;
572 /// \returns Stack object index for \p Dim's work group ID.
573 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
575 return DebuggerWorkGroupIDStackObjectIndices[Dim];
578 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
579 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
581 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
584 /// \returns Stack object index for \p Dim's work item ID.
585 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
587 return DebuggerWorkItemIDStackObjectIndices[Dim];
590 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
591 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
593 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
596 /// \returns SGPR used for \p Dim's work group ID.
597 unsigned getWorkGroupIDSGPR(unsigned Dim) const {
600 assert(hasWorkGroupIDX());
601 return ArgInfo.WorkGroupIDX.getRegister();
603 assert(hasWorkGroupIDY());
604 return ArgInfo.WorkGroupIDY.getRegister();
606 assert(hasWorkGroupIDZ());
607 return ArgInfo.WorkGroupIDZ.getRegister();
609 llvm_unreachable("unexpected dimension");
612 /// \returns VGPR used for \p Dim' work item ID.
613 unsigned getWorkItemIDVGPR(unsigned Dim) const {
616 assert(hasWorkItemIDX());
617 return AMDGPU::VGPR0;
619 assert(hasWorkItemIDY());
620 return AMDGPU::VGPR1;
622 assert(hasWorkItemIDZ());
623 return AMDGPU::VGPR2;
625 llvm_unreachable("unexpected dimension");
628 unsigned getLDSWaveSpillSize() const {
629 return LDSWaveSpillSize;
632 const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
633 const Value *BufferRsrc) {
635 auto PSV = BufferPSVs.try_emplace(
637 llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
638 return PSV.first->second.get();
641 const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
642 const Value *ImgRsrc) {
644 auto PSV = ImagePSVs.try_emplace(
646 llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
647 return PSV.first->second.get();
651 } // end namespace llvm
653 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H