1 //===-- AMDIL789IOExpansion.cpp - TODO: Add brief description -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
10 // @file AMDIL789IOExpansion.cpp
11 // @details Implementation of the IO expansion class for 789 devices.
13 #include "AMDILCompilerErrors.h"
14 #include "AMDILCompilerWarnings.h"
15 #include "AMDILDevices.h"
16 #include "AMDILGlobalManager.h"
17 #include "AMDILIOExpansion.h"
18 #include "AMDILKernelManager.h"
19 #include "AMDILMachineFunctionInfo.h"
20 #include "AMDILTargetMachine.h"
21 #include "AMDILUtilityFunctions.h"
22 #include "llvm/CodeGen/MachineConstantPool.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/DerivedTypes.h"
26 #include "llvm/Support/DebugLoc.h"
27 #include "llvm/Value.h"
30 AMDIL789IOExpansion::AMDIL789IOExpansion(TargetMachine &tm
32 : AMDILIOExpansion(tm AMDIL_OPT_LEVEL_VAR)
36 AMDIL789IOExpansion::~AMDIL789IOExpansion() {
39 const char *AMDIL789IOExpansion::getPassName() const
41 return "AMDIL 789 IO Expansion Pass";
43 // This code produces the following pseudo-IL:
44 // mov r1007, $src.y000
45 // cmov_logical r1007.x___, $flag.yyyy, r1007.xxxx, $src.xxxx
46 // mov r1006, $src.z000
47 // cmov_logical r1007.x___, $flag.zzzz, r1006.xxxx, r1007.xxxx
48 // mov r1006, $src.w000
49 // cmov_logical $dst.x___, $flag.wwww, r1006.xxxx, r1007.xxxx
51 AMDIL789IOExpansion::emitComponentExtract(MachineInstr *MI,
52 unsigned flag, unsigned src, unsigned dst, bool before)
54 MachineBasicBlock::iterator I = *MI;
55 DebugLoc DL = MI->getDebugLoc();
56 BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
59 BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1007)
63 BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1006)
66 BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1007)
69 .addReg(AMDIL::R1007);
70 BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1006)
73 BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_W_i32), dst)
76 .addReg(AMDIL::R1007);
79 // We have a 128 bit load but a 8/16/32bit value, so we need to
80 // select the correct component and make sure that the correct
81 // bits are selected. For the 8 and 16 bit cases we need to
82 // extract from the component the correct bits and for 32 bits
83 // we just need to select the correct component.
85 AMDIL789IOExpansion::emitDataLoadSelect(MachineInstr *MI)
87 MachineBasicBlock::iterator I = *MI;
88 DebugLoc DL = MI->getDebugLoc();
89 emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1011, AMDIL::R1011, false);
90 if (getMemorySize(MI) == 1) {
91 // This produces the following pseudo-IL:
92 // iand r1006.x___, r1010.xxxx, l14.xxxx
93 // mov r1006, r1006.xxxx
94 // iadd r1006, r1006, {0, -1, 2, 3}
95 // ieq r1008, r1006, 0
96 // mov r1011, r1011.xxxx
97 // ishr r1011, r1011, {0, 8, 16, 24}
98 // mov r1007, r1011.y000
99 // cmov_logical r1007.x___, r1008.yyyy, r1007.xxxx, r1011.xxxx
100 // mov r1006, r1011.z000
101 // cmov_logical r1007.x___, r1008.zzzz, r1006.xxxx, r1007.xxxx
102 // mov r1006, r1011.w000
103 // cmov_logical r1011.x___, r1008.wwww, r1006.xxxx, r1007.xxxx
104 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1006)
105 .addReg(AMDIL::R1010)
106 .addImm(mMFI->addi32Literal(3));
107 BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1006)
108 .addReg(AMDIL::R1006);
109 BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1006)
110 .addReg(AMDIL::R1006)
111 .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
112 (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
113 BuildMI(*mBB, I, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008)
114 .addReg(AMDIL::R1006)
115 .addImm(mMFI->addi32Literal(0));
116 BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1011)
117 .addReg(AMDIL::R1011);
118 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1011)
119 .addReg(AMDIL::R1011)
120 .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
121 emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1011, AMDIL::R1011, false);
122 } else if (getMemorySize(MI) == 2) {
123 // This produces the following pseudo-IL:
124 // ishr r1007.x___, r1010.xxxx, 1
125 // iand r1008.x___, r1007.xxxx, 1
126 // ishr r1007.x___, r1011.xxxx, 16
127 // cmov_logical r1011.x___, r1008.xxxx, r1007.xxxx, r1011.xxxx
128 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007)
129 .addReg(AMDIL::R1010)
130 .addImm(mMFI->addi32Literal(1));
131 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
132 .addReg(AMDIL::R1007)
133 .addImm(mMFI->addi32Literal(1));
134 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007)
135 .addReg(AMDIL::R1011)
136 .addImm(mMFI->addi32Literal(16));
137 BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1011)
138 .addReg(AMDIL::R1008)
139 .addReg(AMDIL::R1007)
140 .addReg(AMDIL::R1011);
143 // This function does address calculations modifications to load from a vector
144 // register type instead of a dword addressed load.
146 AMDIL789IOExpansion::emitVectorAddressCalc(MachineInstr *MI, bool is32bit, bool needsSelect)
148 MachineBasicBlock::iterator I = *MI;
149 DebugLoc DL = MI->getDebugLoc();
150 // This produces the following pseudo-IL:
151 // ishr r1007.x___, r1010.xxxx, (is32bit) ? 2 : 3
152 // iand r1008.x___, r1007.xxxx, (is32bit) ? 3 : 1
153 // ishr r1007.x___, r1007.xxxx, (is32bit) ? 2 : 1
154 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007)
155 .addReg(AMDIL::R1010)
156 .addImm(mMFI->addi32Literal((is32bit) ? 0x2 : 3));
157 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
158 .addReg(AMDIL::R1007)
159 .addImm(mMFI->addi32Literal((is32bit) ? 3 : 1));
160 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007)
161 .addReg(AMDIL::R1007)
162 .addImm(mMFI->addi32Literal((is32bit) ? 2 : 1));
164 // If the component selection is required, the following
165 // pseudo-IL is produced.
166 // mov r1008, r1008.xxxx
167 // iadd r1008, r1008, (is32bit) ? {0, -1, -2, -3} : {0, 0, -1, -1}
168 // ieq r1008, r1008, 0
169 BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008)
170 .addReg(AMDIL::R1008);
171 BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
172 .addReg(AMDIL::R1008)
173 .addImm(mMFI->addi128Literal((is32bit) ? 0xFFFFFFFFULL << 32 : 0ULL,
174 (is32bit) ? 0xFFFFFFFEULL | (0xFFFFFFFDULL << 32) :
176 BuildMI(*mBB, I, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008)
177 .addReg(AMDIL::R1008)
178 .addImm(mMFI->addi32Literal(0));
181 // This function emits a switch statement and writes 32bit/64bit
182 // value to a 128bit vector register type.
184 AMDIL789IOExpansion::emitVectorSwitchWrite(MachineInstr *MI, bool is32bit)
186 MachineBasicBlock::iterator I = *MI;
187 uint32_t xID = getPointerID(MI);
188 assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
189 // This section generates the following pseudo-IL:
192 // mov x1[r1007.x].(is32bit) ? x___ : xy__, r1011.x{y}
195 // mov x1[r1007.x].(is32bit) ? _y__ : __zw, r1011.x{yxy}
197 // if is32bit is true, case 2 and 3 are emitted.
199 // mov x1[r1007.x].__z_, r1011.x
202 // mov x1[r1007.x].___w, r1011.x
206 BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::SWITCH))
207 .addReg(AMDIL::R1008);
208 BuildMI(*mBB, I, DL, mTII->get(AMDIL::DEFAULT));
210 mTII->get((is32bit) ? AMDIL::SCRATCHSTORE_X : AMDIL::SCRATCHSTORE_XY)
212 .addReg(AMDIL::R1011)
214 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BREAK));
215 BuildMI(*mBB, I, DL, mTII->get(AMDIL::CASE)).addImm(1);
217 mTII->get((is32bit) ? AMDIL::SCRATCHSTORE_Y : AMDIL::SCRATCHSTORE_ZW), AMDIL::R1007)
218 .addReg(AMDIL::R1011)
220 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BREAK));
222 BuildMI(*mBB, I, DL, mTII->get(AMDIL::CASE)).addImm(2);
224 mTII->get(AMDIL::SCRATCHSTORE_Z), AMDIL::R1007)
225 .addReg(AMDIL::R1011)
227 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BREAK));
228 BuildMI(*mBB, I, DL, mTII->get(AMDIL::CASE)).addImm(3);
230 mTII->get(AMDIL::SCRATCHSTORE_W), AMDIL::R1007)
231 .addReg(AMDIL::R1011)
233 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BREAK));
235 BuildMI(*mBB, I, DL, mTII->get(AMDIL::ENDSWITCH));
239 AMDIL789IOExpansion::expandPrivateLoad(MachineInstr *MI)
241 MachineBasicBlock::iterator I = *MI;
242 bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
243 if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) {
244 return expandGlobalLoad(MI);
246 if (!mMFI->usesMem(AMDILDevice::SCRATCH_ID)
247 && mKM->isKernel()) {
248 mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
250 uint32_t xID = getPointerID(MI);
251 assert(xID && "Found a scratch load that was incorrectly marked as zero ID!\n");
253 xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
254 mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
257 // These instructions go before the current MI.
258 expandLoadStartCode(MI);
259 switch (getMemorySize(MI)) {
261 // Since the private register is a 128 bit aligned, we have to align the address
262 // first, since our source address is 32bit aligned and then load the data.
263 // This produces the following pseudo-IL:
264 // ishr r1010.x___, r1010.xxxx, 4
265 // mov r1011, x1[r1010.x]
267 mTII->get(AMDIL::SHR_i32), AMDIL::R1010)
268 .addReg(AMDIL::R1010)
269 .addImm(mMFI->addi32Literal(4));
271 mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
272 .addReg(AMDIL::R1010)
278 emitVectorAddressCalc(MI, true, true);
279 // This produces the following pseudo-IL:
280 // mov r1011, x1[r1007.x]
282 mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
283 .addReg(AMDIL::R1007)
285 // These instructions go after the current MI.
286 emitDataLoadSelect(MI);
289 emitVectorAddressCalc(MI, false, true);
290 // This produces the following pseudo-IL:
291 // mov r1011, x1[r1007.x]
292 // mov r1007, r1011.zw00
293 // cmov_logical r1011.xy__, r1008.xxxx, r1011.xy, r1007.zw
295 mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
296 .addReg(AMDIL::R1007)
298 // These instructions go after the current MI.
300 mTII->get(AMDIL::VEXTRACT_v2i64), AMDIL::R1007)
301 .addReg(AMDIL::R1011)
304 mTII->get(AMDIL::CMOVLOG_i64), AMDIL::R1011)
305 .addReg(AMDIL::R1008)
306 .addReg(AMDIL::R1011)
307 .addReg(AMDIL::R1007);
310 expandPackedData(MI);
311 expandExtendLoad(MI);
312 BuildMI(*mBB, I, MI->getDebugLoc(),
313 mTII->get(getMoveInstFromID(
314 MI->getDesc().OpInfo[0].RegClass)),
315 MI->getOperand(0).getReg())
316 .addReg(AMDIL::R1011);
321 AMDIL789IOExpansion::expandConstantLoad(MachineInstr *MI)
323 MachineBasicBlock::iterator I = *MI;
324 if (!isHardwareInst(MI) || MI->memoperands_empty()) {
325 return expandGlobalLoad(MI);
327 uint32_t cID = getPointerID(MI);
329 return expandGlobalLoad(MI);
331 if (!mMFI->usesMem(AMDILDevice::CONSTANT_ID)
332 && mKM->isKernel()) {
333 mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
337 // These instructions go before the current MI.
338 expandLoadStartCode(MI);
339 switch (getMemorySize(MI)) {
342 mTII->get(AMDIL::SHR_i32), AMDIL::R1010)
343 .addReg(AMDIL::R1010)
344 .addImm(mMFI->addi32Literal(4));
346 mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
347 .addReg(AMDIL::R1010)
353 emitVectorAddressCalc(MI, true, true);
355 mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
356 .addReg(AMDIL::R1007)
358 // These instructions go after the current MI.
359 emitDataLoadSelect(MI);
362 emitVectorAddressCalc(MI, false, true);
364 mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
365 .addReg(AMDIL::R1007)
367 // These instructions go after the current MI.
369 mTII->get(AMDIL::VEXTRACT_v2i64), AMDIL::R1007)
370 .addReg(AMDIL::R1011)
373 mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1008)
374 .addReg(AMDIL::R1008);
376 mTII->get(AMDIL::CMOVLOG_i64), AMDIL::R1011)
377 .addReg(AMDIL::R1008)
378 .addReg(AMDIL::R1011)
379 .addReg(AMDIL::R1007);
382 expandPackedData(MI);
383 expandExtendLoad(MI);
384 BuildMI(*mBB, I, MI->getDebugLoc(),
385 mTII->get(getMoveInstFromID(
386 MI->getDesc().OpInfo[0].RegClass)),
387 MI->getOperand(0).getReg())
388 .addReg(AMDIL::R1011);
389 MI->getOperand(0).setReg(AMDIL::R1011);
393 AMDIL789IOExpansion::expandConstantPoolLoad(MachineInstr *MI)
395 if (!isStaticCPLoad(MI)) {
396 return expandConstantLoad(MI);
398 uint32_t idx = MI->getOperand(1).getIndex();
399 const MachineConstantPool *MCP = MI->getParent()->getParent()
401 const std::vector<MachineConstantPoolEntry> &consts
402 = MCP->getConstants();
403 const Constant *C = consts[idx].Val.ConstVal;
404 emitCPInst(MI, C, mKM, 0, isExtendLoad(MI));
409 AMDIL789IOExpansion::expandPrivateStore(MachineInstr *MI)
411 MachineBasicBlock::iterator I = *MI;
412 bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
413 if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) {
414 return expandGlobalStore(MI);
416 if (!mMFI->usesMem(AMDILDevice::SCRATCH_ID)
417 && mKM->isKernel()) {
418 mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
420 uint32_t xID = getPointerID(MI);
421 assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
423 xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
424 mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
427 // These instructions go before the current MI.
428 expandStoreSetupCode(MI);
429 switch (getMemorySize(MI)) {
431 // This section generates the following pseudo-IL:
432 // ishr r1010.x___, r1010.xxxx, 4
433 // mov x1[r1010.x], r1011
435 mTII->get(AMDIL::SHR_i32), AMDIL::R1010)
436 .addReg(AMDIL::R1010)
437 .addImm(mMFI->addi32Literal(4));
438 BuildMI(*mBB, I, MI->getDebugLoc(),
439 mTII->get(AMDIL::SCRATCHSTORE), AMDIL::R1010)
440 .addReg(AMDIL::R1011)
444 emitVectorAddressCalc(MI, true, true);
445 // This section generates the following pseudo-IL:
446 // mov r1002, x1[r1007.x]
448 mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002)
449 .addReg(AMDIL::R1007)
451 emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1002, AMDIL::R1002, true);
452 // This section generates the following pseudo-IL:
453 // iand r1003.x, r1010.x, 3
454 // mov r1003, r1003.xxxx
455 // iadd r1000, r1003, {0, -1, -2, -3}
456 // ieq r1000, r1000, 0
457 // mov r1002, r1002.xxxx
458 // ishr r1002, r1002, {0, 8, 16, 24}
459 // mov r1011, r1011.xxxx
460 // cmov_logical r1002, r1000, r1011, r1002
461 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1003)
462 .addReg(AMDIL::R1010)
463 .addImm(mMFI->addi32Literal(3));
464 BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1003)
465 .addReg(AMDIL::R1003);
466 BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1001)
467 .addReg(AMDIL::R1003)
468 .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
469 (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
470 BuildMI(*mBB, I, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1001)
471 .addReg(AMDIL::R1001)
472 .addImm(mMFI->addi32Literal(0));
473 BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1002)
474 .addReg(AMDIL::R1002);
475 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1002)
476 .addReg(AMDIL::R1002)
477 .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
478 BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1011)
479 .addReg(AMDIL::R1011);
480 BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_v4i32), AMDIL::R1002)
481 .addReg(AMDIL::R1001)
482 .addReg(AMDIL::R1011)
483 .addReg(AMDIL::R1002);
484 if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
485 // This section generates the following pseudo-IL:
486 // iand r1002, r1002, 0xFF
487 // ishl r1002, r1002, {0, 8, 16, 24}
488 // ior r1002.xy, r1002.xy, r1002.zw
489 // ior r1011.x, r1002.x, r1002.y
490 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1002)
491 .addReg(AMDIL::R1002)
492 .addImm(mMFI->addi32Literal(0xFF));
493 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1002)
494 .addReg(AMDIL::R1002)
495 .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
496 BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::R1002)
497 .addReg(AMDIL::R1002).addReg(AMDIL::R1002);
498 BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011)
499 .addReg(AMDIL::R1002).addReg(AMDIL::R1002);
501 // This section generates the following pseudo-IL:
502 // mov r1001.xy, r1002.yw
503 // mov r1002.xy, r1002.xz
504 // ubit_insert r1002.xy, 8, 8, r1001.xy, r1002.xy
505 // mov r1001.x, r1002.y
506 // ubit_insert r1011.x, 16, 16, r1002.y, r1002.x
507 BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::R1001)
508 .addReg(AMDIL::R1002);
509 BuildMI(*mBB, I, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::R1002)
510 .addReg(AMDIL::R1002);
511 BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), AMDIL::R1002)
512 .addImm(mMFI->addi32Literal(8))
513 .addImm(mMFI->addi32Literal(8))
514 .addReg(AMDIL::R1001)
515 .addReg(AMDIL::R1002);
516 BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI), AMDIL::R1001)
517 .addReg(AMDIL::R1002);
518 BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011)
519 .addImm(mMFI->addi32Literal(16))
520 .addImm(mMFI->addi32Literal(16))
521 .addReg(AMDIL::R1001)
522 .addReg(AMDIL::R1002);
524 emitVectorAddressCalc(MI, true, false);
525 emitVectorSwitchWrite(MI, true);
528 emitVectorAddressCalc(MI, true, true);
529 // This section generates the following pseudo-IL:
530 // mov r1002, x1[r1007.x]
532 mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002)
533 .addReg(AMDIL::R1007)
535 emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1002, AMDIL::R1002, true);
536 // This section generates the following pseudo-IL:
537 // ishr r1003.x, r1010.x, 1
538 // iand r1003.x, r1003.x, 1
539 // ishr r1001.x, r1002.x, 16
540 // cmov_logical r1002.x, r1003.x, r1002.x, r1011.x
541 // cmov_logical r1001.x, r1003.x, r1011.x, r1001.x
542 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1003)
543 .addReg(AMDIL::R1010)
544 .addImm(mMFI->addi32Literal(1));
545 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1003)
546 .addReg(AMDIL::R1003)
547 .addImm(mMFI->addi32Literal(1));
548 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1001)
549 .addReg(AMDIL::R1002)
550 .addImm(mMFI->addi32Literal(16));
551 BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1002)
552 .addReg(AMDIL::R1003)
553 .addReg(AMDIL::R1002)
554 .addReg(AMDIL::R1011);
555 BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1001)
556 .addReg(AMDIL::R1003)
557 .addReg(AMDIL::R1011)
558 .addReg(AMDIL::R1001);
559 if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
560 // This section generates the following pseudo-IL:
561 // iand r1002.x, r1002.x, 0xFFFF
562 // iand r1001.x, r1001.x, 0xFFFF
563 // ishl r1001.x, r1002.x, 16
564 // ior r1011.x, r1002.x, r1001.x
565 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1002)
566 .addReg(AMDIL::R1002)
567 .addImm(mMFI->addi32Literal(0xFFFF));
568 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1001)
569 .addReg(AMDIL::R1001)
570 .addImm(mMFI->addi32Literal(0xFFFF));
571 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1001)
572 .addReg(AMDIL::R1001)
573 .addImm(mMFI->addi32Literal(16));
574 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_OR_i32), AMDIL::R1011)
575 .addReg(AMDIL::R1002).addReg(AMDIL::R1001);
577 // This section generates the following pseudo-IL:
578 // ubit_insert r1011.x, 16, 16, r1001.y, r1002.x
579 BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011)
580 .addImm(mMFI->addi32Literal(16))
581 .addImm(mMFI->addi32Literal(16))
582 .addReg(AMDIL::R1001)
583 .addReg(AMDIL::R1002);
585 emitVectorAddressCalc(MI, true, false);
586 emitVectorSwitchWrite(MI, true);
589 emitVectorAddressCalc(MI, true, false);
590 emitVectorSwitchWrite(MI, true);
593 emitVectorAddressCalc(MI, false, false);
594 emitVectorSwitchWrite(MI, false);
599 AMDIL789IOExpansion::expandStoreSetupCode(MachineInstr *MI)
601 MachineBasicBlock::iterator I = *MI;
603 if (MI->getOperand(0).isUndef()) {
604 BuildMI(*mBB, I, DL, mTII->get(getMoveInstFromID(
605 MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011)
606 .addImm(mMFI->addi32Literal(0));
608 BuildMI(*mBB, I, DL, mTII->get(getMoveInstFromID(
609 MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011)
610 .addReg(MI->getOperand(0).getReg());
613 if (MI->getOperand(2).isReg()) {
614 BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_i32), AMDIL::R1010)
615 .addReg(MI->getOperand(1).getReg())
616 .addReg(MI->getOperand(2).getReg());
618 BuildMI(*mBB, I, DL, mTII->get(AMDIL::MOVE_i32), AMDIL::R1010)
619 .addReg(MI->getOperand(1).getReg());
621 expandAddressCalc(MI);
622 expandPackedData(MI);
627 AMDIL789IOExpansion::expandPackedData(MachineInstr *MI)
629 MachineBasicBlock::iterator I = *MI;
630 if (!isPackedData(MI)) {
634 // If we have packed data, then the shift size is no longer
635 // the same as the load size and we need to adjust accordingly
636 switch(getPackedID(MI)) {
641 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::R1011)
642 .addReg(AMDIL::R1011)
643 .addImm(mMFI->addi64Literal(0xFFULL | (0xFFULL << 32)));
644 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::R1011)
645 .addReg(AMDIL::R1011).addImm(mMFI->addi64Literal(8ULL << 32));
646 BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011)
647 .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
652 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
653 .addReg(AMDIL::R1011)
654 .addImm(mMFI->addi32Literal(0xFF));
655 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011)
656 .addReg(AMDIL::R1011)
657 .addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32))));
658 BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::R1011)
659 .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
660 BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011)
661 .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
666 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::R1011)
667 .addReg(AMDIL::R1011)
668 .addImm(mMFI->addi32Literal(0xFFFF));
669 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::R1011)
670 .addReg(AMDIL::R1011)
671 .addImm(mMFI->addi64Literal(16ULL << 32));
672 BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011)
673 .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
678 BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
679 .addReg(AMDIL::R1011)
680 .addImm(mMFI->addi32Literal(0xFFFF));
681 BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011)
682 .addReg(AMDIL::R1011)
683 .addImm(mMFI->addi64Literal(16ULL << 32));
684 BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v4i16), AMDIL::R1011)
685 .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
689 BuildMI(*mBB, I, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::R1012)
690 .addReg(AMDIL::R1011)
691 .addImm(mMFI->addi32Literal(8));
692 BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
693 .addReg(AMDIL::R1011).addReg(AMDIL::R1012);
697 BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i8), AMDIL::R1011)
698 .addReg(AMDIL::R1011);
699 BuildMI(*mBB, I, DL, mTII->get(AMDIL::USHRVEC_v4i8), AMDIL::R1011)
700 .addReg(AMDIL::R1011)
701 .addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32))));
706 BuildMI(*mBB, I, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::R1012)
707 .addReg(AMDIL::R1011)
708 .addImm(mMFI->addi32Literal(16));
709 BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
710 .addReg(AMDIL::R1011).addReg(AMDIL::R1012);
715 BuildMI(*mBB, I, DL, mTII->get(AMDIL::USHRVEC_v2i32), AMDIL::R1012)
716 .addReg(AMDIL::R1011)
717 .addImm(mMFI->addi32Literal(16));
718 BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
719 .addReg(AMDIL::R1011).addReg(AMDIL::R1012);