1 //===-- AMDILModuleInfo.cpp - TODO: Add brief description -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
9 #include "AMDILModuleInfo.h"
10 #include "AMDILDevices.h"
11 #include "AMDILKernel.h"
12 #include "AMDILSubtarget.h"
14 #include "AMDILAlgorithms.tpp"
15 #include "AMDILModuleInfo.h"
16 #include "AMDILDevices.h"
17 #include "AMDILKernel.h"
18 #include "AMDILSubtarget.h"
19 #include "AMDILUtilityFunctions.h"
20 #include "llvm/CodeGen/MachineConstantPool.h"
21 #include "llvm/Constants.h"
22 #include "llvm/DerivedTypes.h"
23 #include "llvm/Instructions.h"
24 #include "llvm/Support/FormattedStream.h"
28 #define CB_BASE_OFFSET 2
31 AMDILModuleInfo::AMDILModuleInfo(const MachineModuleInfo &MMI)
41 AMDILModuleInfo::~AMDILModuleInfo() {
42 for (StringMap<AMDILKernel*>::iterator kb = mKernels.begin(), ke = mKernels.end();
44 StringMapEntry<AMDILKernel*> cur = *kb;
45 AMDILKernel *ptr = cur.getValue();
50 static const AMDILConstPtr *getConstPtr(const AMDILKernel *krnl, const std::string &arg) {
51 llvm::SmallVector<AMDILConstPtr, DEFAULT_VEC_SLOTS>::const_iterator begin, end;
52 for (begin = krnl->constPtr.begin(), end = krnl->constPtr.end();
53 begin != end; ++begin) {
54 if (!strcmp(begin->name.data(),arg.c_str())) {
61 static bool structContainsSub32bitType(const StructType *ST) {
62 StructType::element_iterator eib, eie;
63 for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
65 uint32_t size = (uint32_t)GET_SCALAR_SIZE(ptr);
67 if (const StructType *ST = dyn_cast<StructType>(ptr)) {
68 if (structContainsSub32bitType(ST)) {
72 } else if (size < 32) {
80 void AMDILModuleInfo::processModule(const Module *M,
81 const AMDILTargetMachine *mTM)
83 Module::const_global_iterator GI;
84 Module::const_global_iterator GE;
85 mSTM = mTM->getSubtargetImpl();
86 for (GI = M->global_begin(), GE = M->global_end(); GI != GE; ++GI) {
87 const GlobalValue *GV = GI;
88 llvm::StringRef GVName = GV->getName();
89 const char *name = GVName.data();
90 if (!strncmp(name, "sgv", 3)) {
91 mKernelArgs[GVName] = parseSGV(GV);
92 } else if (!strncmp(name, "fgv", 3)) {
93 // we can ignore this since we don't care about the filename
95 } else if (!strncmp(name, "lvgv", 4)) {
96 mLocalArgs[GVName] = parseLVGV(GV);
97 } else if (!strncmp(name, "llvm.image.annotations", 22)) {
98 parseImageAnnotate(GV);
99 } else if (!strncmp(name, "llvm.global.annotations", 23)) {
100 parseGlobalAnnotate(GV);
101 } else if (!strncmp(name, "llvm.constpointer.annotations", 29)) {
102 parseConstantPtrAnnotate(GV);
103 } else if (!strncmp(name, "llvm.readonlypointer.annotations", 32)) {
104 // These are skipped as we handle them later in AMDILPointerManager.cpp
105 } else if (GV->getType()->getAddressSpace() == 3) { // *** Match cl_kernel.h local AS #
106 parseAutoArray(GV, false);
107 } else if (strstr(name, "clregion")) {
108 parseAutoArray(GV, true);
109 } else if (!GV->use_empty()
110 && mIgnoreStr.find(GVName) == mIgnoreStr.end()) {
111 parseConstantPtr(GV);
116 safeForEach(M->begin(), M->end(),
118 std::mem_fun(&AMDILModuleInfo::checkConstPtrsUseHW),
122 void AMDILModuleInfo::allocateGlobalCB(void) {
123 uint32_t maxCBSize = mSTM->device()->getMaxCBSize();
126 uint32_t swoffset = 0;
127 for (StringMap<AMDILConstPtr>::iterator cpb = mConstMems.begin(),
128 cpe = mConstMems.end(); cpb != cpe; ++cpb) {
129 bool constHW = mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
130 cpb->second.usesHardware = false;
132 // If we have a limit on the max CB Size, then we need to make sure that
133 // the constant sizes fall within the limits.
134 if (cpb->second.size <= maxCBSize) {
135 if (offset + cpb->second.size > maxCBSize) {
139 if (curCB < mSTM->device()->getMaxNumCBs()) {
140 cpb->second.cbNum = curCB + CB_BASE_OFFSET;
141 cpb->second.offset = offset;
142 offset += (cpb->second.size + 15) & (~15);
143 cpb->second.usesHardware = true;
148 cpb->second.cbNum = 0;
149 cpb->second.offset = swoffset;
150 swoffset += (cpb->second.size + 15) & (~15);
152 if (!mConstMems.empty()) {
153 mReservedBuffs = curCB + 1;
157 bool AMDILModuleInfo::checkConstPtrsUseHW(llvm::Module::const_iterator *FCI)
159 Function::const_arg_iterator AI, AE;
160 const Function *func = *FCI;
161 std::string name = func->getName();
162 if (!strstr(name.c_str(), "__OpenCL")
163 || !strstr(name.c_str(), "_AMDILKernel")) {
166 AMDILKernel *krnl = mKernels[name];
167 if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
168 for (AI = func->arg_begin(), AE = func->arg_end();
170 const Argument *Arg = &(*AI);
171 const PointerType *P = dyn_cast<PointerType>(Arg->getType());
175 if (P->getAddressSpace() != AMDILAS::CONSTANT_ADDRESS) {
178 const AMDILConstPtr *ptr = getConstPtr(krnl, Arg->getName());
182 AMDILConstPtr constAttr;
183 constAttr.name = Arg->getName();
184 constAttr.size = this->mSTM->device()->getMaxCBSize();
185 constAttr.base = Arg;
186 constAttr.isArgument = true;
187 constAttr.isArray = false;
188 constAttr.offset = 0;
189 constAttr.usesHardware =
190 mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
191 if (constAttr.usesHardware) {
192 constAttr.cbNum = krnl->constPtr.size() + 2;
196 krnl->constPtr.push_back(constAttr);
199 // Now lets make sure that only the N largest buffers
200 // get allocated in hardware if we have too many buffers
201 uint32_t numPtrs = krnl->constPtr.size();
202 if (numPtrs > (this->mSTM->device()->getMaxNumCBs() - mReservedBuffs)) {
203 // TODO: Change this routine so it sorts
204 // AMDILConstPtr instead of pulling the sizes out
205 // and then grab the N largest and disable the rest
206 llvm::SmallVector<uint32_t, 16> sizes;
207 for (uint32_t x = 0; x < numPtrs; ++x) {
208 sizes.push_back(krnl->constPtr[x].size);
210 std::sort(sizes.begin(), sizes.end());
211 uint32_t numToDisable = numPtrs - (mSTM->device()->getMaxNumCBs() -
213 uint32_t safeSize = sizes[numToDisable-1];
214 for (uint32_t x = 0; x < numPtrs && numToDisable; ++x) {
215 if (krnl->constPtr[x].size <= safeSize) {
216 krnl->constPtr[x].usesHardware = false;
221 // Renumber all of the valid CB's so that
222 // they are linear increase
223 uint32_t CBid = 2 + mReservedBuffs;
224 for (uint32_t x = 0; x < numPtrs; ++x) {
225 if (krnl->constPtr[x].usesHardware) {
226 krnl->constPtr[x].cbNum = CBid++;
229 for (StringMap<AMDILConstPtr>::iterator cpb = mConstMems.begin(),
230 cpe = mConstMems.end(); cpb != cpe; ++cpb) {
231 if (cpb->second.usesHardware) {
232 krnl->constPtr.push_back(cpb->second);
235 for (uint32_t x = 0; x < krnl->constPtr.size(); ++x) {
236 AMDILConstPtr &c = krnl->constPtr[x];
237 uint32_t cbNum = c.cbNum - CB_BASE_OFFSET;
238 if (cbNum < HW_MAX_NUM_CB && c.cbNum >= CB_BASE_OFFSET) {
239 if ((c.size + c.offset) > krnl->constSizes[cbNum]) {
240 krnl->constSizes[cbNum] =
241 ((c.size + c.offset) + 15) & ~15;
244 krnl->constPtr[x].usesHardware = false;
250 int32_t AMDILModuleInfo::getArrayOffset(const llvm::StringRef &a) const {
251 StringMap<AMDILArrayMem>::const_iterator iter = mArrayMems.find(a);
252 if (iter != mArrayMems.end()) {
253 return iter->second.offset;
259 int32_t AMDILModuleInfo::getConstOffset(const llvm::StringRef &a) const {
260 StringMap<AMDILConstPtr>::const_iterator iter = mConstMems.find(a);
261 if (iter != mConstMems.end()) {
262 return iter->second.offset;
268 bool AMDILModuleInfo::getConstHWBit(const llvm::StringRef &name) const {
269 StringMap<AMDILConstPtr>::const_iterator iter = mConstMems.find(name);
270 if (iter != mConstMems.end()) {
271 return iter->second.usesHardware;
277 // As of right now we only care about the required group size
278 // so we can skip the variable encoding
279 AMDILKernelAttr AMDILModuleInfo::parseSGV(const GlobalValue *G) {
280 AMDILKernelAttr nArg;
281 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
282 memset(&nArg, 0, sizeof(nArg));
283 for (int x = 0; x < 3; ++x) {
284 nArg.reqGroupSize[x] = mSTM->getDefaultSize(x);
285 nArg.reqRegionSize[x] = mSTM->getDefaultSize(x);
287 if (!GV || !GV->hasInitializer()) {
290 const Constant *CV = GV->getInitializer();
291 const ConstantDataArray *CA = dyn_cast_or_null<ConstantDataArray>(CV);
292 if (!CA || !CA->isString()) {
295 std::string init = CA->getAsString();
296 size_t pos = init.find("RWG");
297 if (pos != llvm::StringRef::npos) {
299 std::string LWS = init.substr(pos, init.length() - pos);
300 const char *lws = LWS.c_str();
301 sscanf(lws, "%u,%u,%u", &(nArg.reqGroupSize[0]),
302 &(nArg.reqGroupSize[1]),
303 &(nArg.reqGroupSize[2]));
306 pos = init.find("RWR");
307 if (pos != llvm::StringRef::npos) {
309 std::string LWS = init.substr(pos, init.length() - pos);
310 const char *lws = LWS.c_str();
311 sscanf(lws, "%u,%u,%u", &(nArg.reqRegionSize[0]),
312 &(nArg.reqRegionSize[1]),
313 &(nArg.reqRegionSize[2]));
319 AMDILLocalArg AMDILModuleInfo::parseLVGV(const GlobalValue *G) {
321 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
323 if (!GV || !GV->hasInitializer()) {
326 const ConstantArray *CA =
327 dyn_cast_or_null<ConstantArray>(GV->getInitializer());
331 for (size_t x = 0, y = CA->getNumOperands(); x < y; ++x) {
332 const Value *local = CA->getOperand(x);
333 const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(local);
334 if (!CE || !CE->getNumOperands()) {
337 nArg.name = (*(CE->op_begin()))->getName();
338 if (mArrayMems.find(nArg.name) != mArrayMems.end()) {
339 nArg.local.push_back(&(mArrayMems[nArg.name]));
345 void AMDILModuleInfo::parseConstantPtrAnnotate(const GlobalValue *G) {
346 const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(G);
347 const ConstantArray *CA =
348 dyn_cast_or_null<ConstantArray>(GV->getInitializer());
352 uint32_t numOps = CA->getNumOperands();
353 for (uint32_t x = 0; x < numOps; ++x) {
354 const Value *V = CA->getOperand(x);
355 const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
359 assert(CS->getNumOperands() == 2 && "There can only be 2"
360 " fields, a name and size");
361 const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CS->getOperand(0));
362 const ConstantInt *sizeField = dyn_cast<ConstantInt>(CS->getOperand(1));
363 assert(nameField && "There must be a constant name field");
364 assert(sizeField && "There must be a constant size field");
365 const GlobalVariable *nameGV =
366 dyn_cast<GlobalVariable>(nameField->getOperand(0));
367 const ConstantDataArray *nameArray =
368 dyn_cast<ConstantDataArray>(nameGV->getInitializer());
369 // Lets add this string to the set of strings we should ignore processing
370 mIgnoreStr.insert(nameGV->getName());
371 if (mConstMems.find(nameGV->getName())
372 != mConstMems.end()) {
373 // If we already processesd this string as a constant, lets remove it from
374 // the list of known constants. This way we don't process unneeded data
375 // and don't generate code/metadata for strings that are never used.
376 mConstMems.erase(mConstMems.find(nameGV->getName()));
378 mIgnoreStr.insert(CS->getOperand(0)->getName());
380 AMDILConstPtr constAttr;
381 constAttr.name = nameArray->getAsString();
382 constAttr.size = (sizeField->getZExtValue() + 15) & ~15;
384 constAttr.isArgument = true;
385 constAttr.isArray = false;
387 constAttr.offset = 0;
388 constAttr.usesHardware = (constAttr.size <= mSTM->device()->getMaxCBSize());
389 // Now that we have all our constant information,
390 // lets update the AMDILKernel
391 llvm::StringRef AMDILKernelName = G->getName().data() + 30;
393 if (mKernels.find(AMDILKernelName) != mKernels.end()) {
394 k = mKernels[AMDILKernelName];
404 memset(k->constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
406 constAttr.cbNum = k->constPtr.size() + 2;
407 k->constPtr.push_back(constAttr);
408 mKernels[AMDILKernelName] = k;
412 void AMDILModuleInfo::parseImageAnnotate(const GlobalValue *G) {
413 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
414 const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
418 if (isa<GlobalValue>(CA)) {
421 uint32_t e = CA->getNumOperands();
426 llvm::StringRef name = G->getName().data() + 23;
427 if (mKernels.find(name) != mKernels.end()) {
438 memset(k->constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
440 for (uint32_t i = 0; i != e; ++i) {
441 const Value *V = CA->getOperand(i);
442 const Constant *C = dyn_cast<Constant>(V);
443 const ConstantStruct *CS = dyn_cast<ConstantStruct>(C);
444 if (CS && CS->getNumOperands() == 2) {
445 if (mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()) !=
447 // If we already processesd this string as a constant, lets remove it
448 // from the list of known constants. This way we don't process unneeded
449 // data and don't generate code/metadata for strings that are never
452 mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()));
454 mIgnoreStr.insert(CS->getOperand(0)->getOperand(0)->getName());
456 const ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(1));
457 uint32_t val = (uint32_t)CI->getZExtValue();
459 k->readOnly.insert(i);
460 } else if (val == 2) {
461 k->writeOnly.insert(i);
463 assert(!"Unknown image type value!");
470 void AMDILModuleInfo::parseAutoArray(const GlobalValue *GV, bool isRegion) {
471 const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
472 Type *Ty = (G) ? G->getType() : NULL;
476 tmp.vecSize = getTypeSize(Ty, true);
477 tmp.isRegion = isRegion;
478 mArrayMems[GV->getName()] = tmp;
481 void AMDILModuleInfo::parseConstantPtr(const GlobalValue *GV) {
482 const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
483 Type *Ty = (G) ? G->getType() : NULL;
484 AMDILConstPtr constAttr;
485 constAttr.name = G->getName();
486 constAttr.size = getTypeSize(Ty, true);
488 constAttr.isArgument = false;
489 constAttr.isArray = true;
490 constAttr.offset = 0;
492 constAttr.usesHardware = false;
493 mConstMems[GV->getName()] = constAttr;
496 void AMDILModuleInfo::parseGlobalAnnotate(const GlobalValue *G) {
497 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
498 if (!GV->hasInitializer()) {
501 const Constant *CT = GV->getInitializer();
502 if (!CT || isa<GlobalValue>(CT)) {
505 const ConstantArray *CA = dyn_cast<ConstantArray>(CT);
510 unsigned int nKernels = CA->getNumOperands();
511 for (unsigned int i = 0, e = nKernels; i != e; ++i) {
512 parseKernelInformation(CA->getOperand(i));
516 void AMDILModuleInfo::parseKernelInformation(const Value *V) {
517 if (isa<GlobalValue>(V)) {
520 const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
524 uint32_t N = CS->getNumOperands();
530 // The first operand is always a pointer to the AMDILKernel.
531 const Constant *CV = dyn_cast<Constant>(CS->getOperand(0));
532 llvm::StringRef AMDILKernelName = "";
533 if (CV->getNumOperands()) {
534 AMDILKernelName = (*(CV->op_begin()))->getName();
537 // If we have images, then we have already created the AMDILKernel and we just need
538 // to get the AMDILKernel information.
539 if (mKernels.find(AMDILKernelName) != mKernels.end()) {
540 tmp = mKernels[AMDILKernelName];
542 tmp = new AMDILKernel;
550 memset(tmp->constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
554 // The second operand is SGV, there can only be one so we don't need to worry
555 // about parsing out multiple data points.
556 CV = dyn_cast<Constant>(CS->getOperand(1));
558 llvm::StringRef sgvName;
559 if (CV->getNumOperands()) {
560 sgvName = (*(CV->op_begin()))->getName();
563 if (mKernelArgs.find(sgvName) != mKernelArgs.end()) {
564 tmp->sgv = &mKernelArgs[sgvName];
566 // The third operand is FGV, which is skipped
567 // The fourth operand is LVGV
568 // There can be multiple local arrays, so we
569 // need to handle each one seperatly
570 CV = dyn_cast<Constant>(CS->getOperand(3));
571 llvm::StringRef lvgvName = "";
572 if (CV->getNumOperands()) {
573 lvgvName = (*(CV->op_begin()))->getName();
575 if (mLocalArgs.find(lvgvName) != mLocalArgs.end()) {
576 AMDILLocalArg *ptr = &mLocalArgs[lvgvName];
578 llvm::SmallVector<AMDILArrayMem *, DEFAULT_VEC_SLOTS>::iterator ib, ie;
579 for (ib = ptr->local.begin(), ie = ptr->local.end(); ib != ie; ++ib) {
580 if ((*ib)->isRegion) {
582 (*ib)->offset = tmp->curHWRSize;
583 tmp->curHWRSize += ((*ib)->vecSize + 15) & ~15;
585 (*ib)->offset = tmp->curRSize;
586 tmp->curRSize += ((*ib)->vecSize + 15) & ~15;
590 (*ib)->offset = tmp->curHWSize;
591 tmp->curHWSize += ((*ib)->vecSize + 15) & ~15;
593 (*ib)->offset = tmp->curSize;
594 tmp->curSize += ((*ib)->vecSize + 15) & ~15;
600 // The fifth operand is NULL
601 mKernels[AMDILKernelName] = tmp;
605 AMDILModuleInfo::getKernel(const llvm::StringRef &name) {
606 StringMap<AMDILKernel*>::iterator iter = mKernels.find(name);
607 if (iter == mKernels.end()) {
614 bool AMDILModuleInfo::isKernel(const llvm::StringRef &name) const {
615 return (mKernels.find(name) != mKernels.end());
618 bool AMDILModuleInfo::isWriteOnlyImage(const llvm::StringRef &name,
619 uint32_t iID) const {
620 const StringMap<AMDILKernel*>::const_iterator kiter = mKernels.find(name);
621 if (kiter == mKernels.end()) {
624 return kiter->second->writeOnly.count(iID);
628 AMDILModuleInfo::getNumWriteImages(const llvm::StringRef &name) const {
630 env = getenv("GPU_DISABLE_RAW_UAV");
631 if (env && env[0] == '1') {
634 const StringMap<AMDILKernel*>::const_iterator kiter = mKernels.find(name);
635 if (kiter == mKernels.end()) {
638 return kiter->second->writeOnly.size();
642 bool AMDILModuleInfo::isReadOnlyImage(const llvm::StringRef &name,
643 uint32_t iID) const {
644 const StringMap<AMDILKernel*>::const_iterator kiter = mKernels.find(name);
645 if (kiter == mKernels.end()) {
648 return kiter->second->readOnly.count(iID);
651 bool AMDILModuleInfo::hasRWG(const llvm::StringRef &name) const {
652 StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
653 if (iter != mKernels.end()) {
654 AMDILKernelAttr *ptr = iter->second->sgv;
662 bool AMDILModuleInfo::hasRWR(const llvm::StringRef &name) const {
663 StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
664 if (iter != mKernels.end()) {
665 AMDILKernelAttr *ptr = iter->second->sgv;
674 AMDILModuleInfo::getMaxGroupSize(const llvm::StringRef &name) const {
675 StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
676 if (iter != mKernels.end()) {
677 AMDILKernelAttr *sgv = iter->second->sgv;
679 return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2];
682 return mSTM->getDefaultSize(0) *
683 mSTM->getDefaultSize(1) *
684 mSTM->getDefaultSize(2);
688 AMDILModuleInfo::getMaxRegionSize(const llvm::StringRef &name) const {
689 StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
690 if (iter != mKernels.end()) {
691 AMDILKernelAttr *sgv = iter->second->sgv;
693 return sgv->reqRegionSize[0] *
694 sgv->reqRegionSize[1] *
695 sgv->reqRegionSize[2];
698 return mSTM->getDefaultSize(0) *
699 mSTM->getDefaultSize(1) *
700 mSTM->getDefaultSize(2);
702 uint32_t AMDILModuleInfo::getRegionSize(const llvm::StringRef &name) const {
703 StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
704 if (iter != mKernels.end()) {
705 return iter->second->curRSize;
711 uint32_t AMDILModuleInfo::getLocalSize(const llvm::StringRef &name) const {
712 StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
713 if (iter != mKernels.end()) {
714 return iter->second->curSize;
720 uint32_t AMDILModuleInfo::getConstSize(const llvm::StringRef &name) const {
721 StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
722 if (iter != mKernels.end()) {
723 return iter->second->constSize;
730 AMDILModuleInfo::getHWRegionSize(const llvm::StringRef &name) const {
731 StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
732 if (iter != mKernels.end()) {
733 return iter->second->curHWRSize;
739 uint32_t AMDILModuleInfo::getHWLocalSize(const llvm::StringRef &name) const {
740 StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
741 if (iter != mKernels.end()) {
742 return iter->second->curHWSize;
749 int32_t AMDILModuleInfo::getArgID(const Argument *arg) {
750 DenseMap<const Argument *, int32_t>::iterator argiter = mArgIDMap.find(arg);
751 if (argiter != mArgIDMap.end()) {
752 return argiter->second;
760 AMDILModuleInfo::getRegion(const llvm::StringRef &name, uint32_t dim) const {
761 StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
762 if (iter != mKernels.end() && iter->second->sgv) {
763 AMDILKernelAttr *sgv = iter->second->sgv;
769 return sgv->reqRegionSize[dim];
772 return sgv->reqRegionSize[0] *
773 sgv->reqRegionSize[1] *
774 sgv->reqRegionSize[2];
781 return mSTM->getDefaultSize(0) *
782 mSTM->getDefaultSize(1) *
783 mSTM->getDefaultSize(2);
787 return mSTM->getDefaultSize(dim);
793 StringMap<AMDILConstPtr>::iterator AMDILModuleInfo::consts_begin() {
794 return mConstMems.begin();
798 StringMap<AMDILConstPtr>::iterator AMDILModuleInfo::consts_end() {
799 return mConstMems.end();
802 bool AMDILModuleInfo::byteStoreExists(StringRef S) const {
803 return mByteStore.find(S) != mByteStore.end();
806 uint32_t AMDILModuleInfo::getConstPtrSize(const AMDILKernel *krnl,
807 const llvm::StringRef &arg)
809 const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
811 return curConst->size;
817 uint32_t AMDILModuleInfo::getConstPtrOff(const AMDILKernel *krnl,
818 const llvm::StringRef &arg)
820 const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
822 return curConst->offset;
828 uint32_t AMDILModuleInfo::getConstPtrCB(const AMDILKernel *krnl,
829 const llvm::StringRef &arg)
831 const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
833 return curConst->cbNum;
839 void AMDILModuleInfo::calculateCPOffsets(const MachineFunction *MF,
842 const MachineConstantPool *MCP = MF->getConstantPool();
846 const std::vector<MachineConstantPoolEntry> consts = MCP->getConstants();
847 size_t numConsts = consts.size();
848 for (size_t x = 0; x < numConsts; ++x) {
849 krnl->CPOffsets.push_back(
850 std::make_pair<uint32_t, const Constant*>(
851 mCurrentCPOffset, consts[x].Val.ConstVal));
852 size_t curSize = getTypeSize(consts[x].Val.ConstVal->getType(), true);
853 // Align the size to the vector boundary
854 curSize = (curSize + 15) & (~15);
855 mCurrentCPOffset += curSize;
859 bool AMDILModuleInfo::isConstPtrArray(const AMDILKernel *krnl,
860 const llvm::StringRef &arg) {
861 const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
863 return curConst->isArray;
869 bool AMDILModuleInfo::isConstPtrArgument(const AMDILKernel *krnl,
870 const llvm::StringRef &arg)
872 const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
874 return curConst->isArgument;
880 const Value *AMDILModuleInfo::getConstPtrValue(const AMDILKernel *krnl,
881 const llvm::StringRef &arg) {
882 const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
884 return curConst->base;
891 dumpZeroElements(StructType * const T, llvm::raw_ostream &O, bool asBytes);
893 dumpZeroElements(IntegerType * const T, llvm::raw_ostream &O, bool asBytes);
895 dumpZeroElements(ArrayType * const T, llvm::raw_ostream &O, bool asBytes);
897 dumpZeroElements(VectorType * const T, llvm::raw_ostream &O, bool asBytes);
899 dumpZeroElements(Type * const T, llvm::raw_ostream &O, bool asBytes);
901 void dumpZeroElements(Type * const T, llvm::raw_ostream &O, bool asBytes) {
905 switch(T->getTypeID()) {
906 case Type::X86_FP80TyID:
907 case Type::FP128TyID:
908 case Type::PPC_FP128TyID:
909 case Type::LabelTyID:
910 assert(0 && "These types are not supported by this backend");
912 case Type::DoubleTyID:
914 O << ":0:0:0:0:0:0:0:0";
919 case Type::FloatTyID:
920 case Type::PointerTyID:
921 case Type::FunctionTyID:
927 case Type::IntegerTyID:
928 dumpZeroElements(dyn_cast<IntegerType>(T), O, asBytes);
930 case Type::StructTyID:
932 const StructType *ST = cast<StructType>(T);
933 if (!ST->isOpaque()) {
934 dumpZeroElements(dyn_cast<StructType>(T), O, asBytes);
935 } else { // A pre-LLVM 3.0 opaque type
944 case Type::ArrayTyID:
945 dumpZeroElements(dyn_cast<ArrayType>(T), O, asBytes);
947 case Type::VectorTyID:
948 dumpZeroElements(dyn_cast<VectorType>(T), O, asBytes);
954 dumpZeroElements(StructType * const ST, llvm::raw_ostream &O, bool asBytes) {
959 StructType::element_iterator eib = ST->element_begin();
960 StructType::element_iterator eie = ST->element_end();
961 for (;eib != eie; ++eib) {
963 dumpZeroElements(curType, O, asBytes);
968 dumpZeroElements(IntegerType * const IT, llvm::raw_ostream &O, bool asBytes) {
970 unsigned byteWidth = (IT->getBitWidth() >> 3);
971 for (unsigned x = 0; x < byteWidth; ++x) {
978 dumpZeroElements(ArrayType * const AT, llvm::raw_ostream &O, bool asBytes) {
979 size_t size = AT->getNumElements();
980 for (size_t x = 0; x < size; ++x) {
981 dumpZeroElements(AT->getElementType(), O, asBytes);
986 dumpZeroElements(VectorType * const VT, llvm::raw_ostream &O, bool asBytes) {
987 size_t size = VT->getNumElements();
988 for (size_t x = 0; x < size; ++x) {
989 dumpZeroElements(VT->getElementType(), O, asBytes);
993 void AMDILModuleInfo::printConstantValue(const Constant *CAval,
994 llvm::raw_ostream &O, bool asBytes) {
995 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CAval)) {
996 bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
998 double val = CFP->getValueAPF().convertToDouble();
1007 O.write_hex(conv.l);
1009 for (int i = 0; i < 8; ++i) {
1011 O.write_hex((unsigned)conv.c[i] & 0xFF);
1015 float val = CFP->getValueAPF().convertToFloat();
1024 O.write_hex(conv.u);
1026 for (int i = 0; i < 4; ++i) {
1028 O.write_hex((unsigned)conv.c[i] & 0xFF);
1032 } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CAval)) {
1033 uint64_t zVal = CI->getValue().getZExtValue();
1038 switch (CI->getBitWidth()) {
1046 for (int i = 0; i < 8; ++i) {
1048 O.write_hex((unsigned)conv.c[i] & 0xFF);
1054 O.write_hex(zVal & 0xFF);
1062 conv.s = (uint16_t)zVal;
1064 O.write_hex((unsigned)conv.c[0] & 0xFF);
1066 O.write_hex((unsigned)conv.c[1] & 0xFF);
1075 conv.i = (uint32_t)zVal;
1076 for (int i = 0; i < 4; ++i) {
1078 O.write_hex((unsigned)conv.c[i] & 0xFF);
1084 } else if (const ConstantVector *CV = dyn_cast<ConstantVector>(CAval)) {
1085 int y = CV->getNumOperands()-1;
1087 for (; x < y; ++x) {
1088 printConstantValue(CV->getOperand(x), O, asBytes);
1090 printConstantValue(CV->getOperand(x), O, asBytes);
1091 } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CAval)) {
1092 int y = CS->getNumOperands();
1094 for (; x < y; ++x) {
1095 printConstantValue(CS->getOperand(x), O, asBytes);
1097 } else if (const ConstantAggregateZero *CAZ
1098 = dyn_cast<ConstantAggregateZero>(CAval)) {
1099 int y = CAZ->getNumOperands();
1102 for (; x < y; ++x) {
1103 printConstantValue((llvm::Constant *)CAZ->getOperand(x),
1108 dumpZeroElements(CAval->getType(), O, asBytes);
1110 int y = getNumElements(CAval->getType())-1;
1111 for (int x = 0; x < y; ++x) {
1117 } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CAval)) {
1118 int y = CA->getNumOperands();
1120 for (; x < y; ++x) {
1121 printConstantValue(CA->getOperand(x), O, asBytes);
1123 } else if (dyn_cast<ConstantPointerNull>(CAval)) {
1125 //assert(0 && "Hit condition which was not expected");
1126 } else if (dyn_cast<ConstantExpr>(CAval)) {
1128 //assert(0 && "Hit condition which was not expected");
1129 } else if (dyn_cast<UndefValue>(CAval)) {
1131 //assert(0 && "Hit condition which was not expected");
1133 assert(0 && "Hit condition which was not expected");
1137 static bool isStruct(Type * const T)
1142 switch (T->getTypeID()) {
1145 case Type::PointerTyID:
1146 return isStruct(T->getContainedType(0));
1147 case Type::StructTyID:
1149 case Type::ArrayTyID:
1150 case Type::VectorTyID:
1151 return isStruct(dyn_cast<SequentialType>(T)->getElementType());
1156 void AMDILModuleInfo::dumpDataToCB(llvm::raw_ostream &O, AMDILKernelManager *km,
1159 for (StringMap<AMDILConstPtr>::iterator cmb = consts_begin(),
1160 cme = consts_end(); cmb != cme; ++cmb) {
1161 if (id == cmb->second.cbNum) {
1162 size += (cmb->second.size + 15) & (~15);
1166 O << ";#DATASTART:" << (size + mCurrentCPOffset) << "\n";
1167 if (mCurrentCPOffset) {
1168 for (StringMap<AMDILKernel*>::iterator kcpb = mKernels.begin(),
1169 kcpe = mKernels.end(); kcpb != kcpe; ++kcpb) {
1170 const AMDILKernel *k = kcpb->second;
1171 size_t numConsts = k->CPOffsets.size();
1172 for (size_t x = 0; x < numConsts; ++x) {
1173 size_t offset = k->CPOffsets[x].first;
1174 const Constant *C = k->CPOffsets[x].second;
1175 Type *Ty = C->getType();
1176 size_t size = (isStruct(Ty) ? getTypeSize(Ty, true)
1177 : getNumElements(Ty));
1178 O << ";#" << km->getTypeName(Ty, symTab) << ":";
1179 O << offset << ":" << size ;
1180 printConstantValue(C, O, isStruct(Ty));
1186 O << ";#DATASTART:" << id << ":" << size << "\n";
1189 for (StringMap<AMDILConstPtr>::iterator cmb = consts_begin(), cme = consts_end();
1190 cmb != cme; ++cmb) {
1191 if (cmb->second.cbNum != id) {
1194 const GlobalVariable *G = dyn_cast<GlobalVariable>(cmb->second.base);
1195 Type *Ty = (G) ? G->getType() : NULL;
1196 size_t offset = cmb->second.offset;
1197 const Constant *C = G->getInitializer();
1198 size_t size = (isStruct(Ty)
1199 ? getTypeSize(Ty, true)
1200 : getNumElements(Ty));
1201 O << ";#" << km->getTypeName(Ty, symTab) << ":";
1203 O << (offset + mCurrentCPOffset) << ":" << size;
1205 O << offset << ":" << size;
1208 printConstantValue(C, O, isStruct(Ty));
1210 assert(0 && "Cannot have a constant pointer"
1211 " without an initializer!");
1218 O << ";#DATAEND:" << id << "\n";
1223 AMDILModuleInfo::dumpDataSection(llvm::raw_ostream &O, AMDILKernelManager *km) {
1224 if (mConstMems.empty() && !mCurrentCPOffset) {
1227 llvm::DenseSet<uint32_t> const_set;
1228 for (StringMap<AMDILConstPtr>::iterator cmb = consts_begin(), cme = consts_end();
1229 cmb != cme; ++cmb) {
1230 const_set.insert(cmb->second.cbNum);
1232 if (mCurrentCPOffset) {
1233 const_set.insert(0);
1235 for (llvm::DenseSet<uint32_t>::iterator setb = const_set.begin(),
1236 sete = const_set.end(); setb != sete; ++setb) {
1237 dumpDataToCB(O, km, *setb);
1242 /// Create a function ID if it is not known or return the known
1244 uint32_t AMDILModuleInfo::getOrCreateFunctionID(const GlobalValue* func) {
1245 if (func->getName().size()) {
1246 return getOrCreateFunctionID(func->getName());
1249 if (mFuncPtrNames.find(func) == mFuncPtrNames.end()) {
1250 id = mFuncPtrNames.size() + RESERVED_FUNCS + mFuncNames.size();
1251 mFuncPtrNames[func] = id;
1253 id = mFuncPtrNames[func];
1257 uint32_t AMDILModuleInfo::getOrCreateFunctionID(const std::string &func) {
1259 if (mFuncNames.find(func) == mFuncNames.end()) {
1260 id = mFuncNames.size() + RESERVED_FUNCS + mFuncPtrNames.size();
1261 mFuncNames[func] = id;
1263 id = mFuncNames[func];