1 //===-- R600LowerInstructions.cpp - TODO: Add brief description -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // TODO: Add full description
12 //===----------------------------------------------------------------------===//
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUUtil.h"
18 #include "AMDILMachineFunctionInfo.h"
19 #include "AMDILRegisterInfo.h"
20 #include "R600InstrInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/Constants.h"
25 #include "llvm/Target/TargetInstrInfo.h"
32 class R600LowerInstructionsPass : public MachineFunctionPass {
37 AMDILMachineFunctionInfo * MFI;
38 const R600InstrInfo * TII;
39 MachineRegisterInfo * MRI;
41 void lowerFLT(MachineInstr &MI);
43 void calcAddress(const MachineOperand &ptrOp,
44 const MachineOperand &indexOp,
46 MachineBasicBlock &MBB,
47 MachineBasicBlock::iterator I) const;
49 void divMod(MachineInstr &MI,
50 MachineBasicBlock &MBB,
51 MachineBasicBlock::iterator I,
52 bool div = true) const;
55 R600LowerInstructionsPass(TargetMachine &tm) :
56 MachineFunctionPass(ID), TM(tm),
57 TII(static_cast<const R600InstrInfo*>(tm.getInstrInfo())),
61 const char *getPassName() const { return "R600 Lower Instructions"; }
62 virtual bool runOnMachineFunction(MachineFunction &MF);
65 } /* End anonymous namespace */
67 char R600LowerInstructionsPass::ID = 0;
69 FunctionPass *llvm::createR600LowerInstructionsPass(TargetMachine &tm) {
70 return new R600LowerInstructionsPass(tm);
73 bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
75 MRI = &MF.getRegInfo();
76 MFI = MF.getInfo<AMDILMachineFunctionInfo>();
78 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
80 MachineBasicBlock &MBB = *BB;
81 for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
82 I != MBB.end(); I = Next, Next = llvm::next(I) ) {
84 MachineInstr &MI = *I;
85 switch(MI.getOpcode()) {
87 BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FGE))
88 .addOperand(MI.getOperand(0))
89 .addOperand(MI.getOperand(2))
90 .addOperand(MI.getOperand(1));
95 unsigned setgt = MRI->createVirtualRegister(
96 &AMDIL::R600_TReg32RegClass);
97 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT),
99 .addOperand(MI.getOperand(1))
100 .addReg(AMDIL::ZERO);
102 unsigned add_int = MRI->createVirtualRegister(
103 &AMDIL::R600_TReg32RegClass);
104 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT),
107 .addOperand(MI.getOperand(1));
109 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::XOR_INT))
110 .addOperand(MI.getOperand(0))
117 /* XXX: We could propagate the ABS flag to all of the uses of Operand0 and
118 * remove the ABS instruction.*/
119 case AMDIL::FABS_f32:
121 MI.getOperand(1).addTargetFlag(MO_FLAG_ABS);
122 BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::MOVE_f32))
123 .addOperand(MI.getOperand(0))
124 .addOperand(MI.getOperand(1));
127 case AMDIL::BINARY_OR_f32:
129 unsigned tmp0 = MRI->createVirtualRegister(&AMDIL::GPRI32RegClass);
130 BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FTOI), tmp0)
131 .addOperand(MI.getOperand(1));
132 unsigned tmp1 = MRI->createVirtualRegister(&AMDIL::GPRI32RegClass);
133 BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FTOI), tmp1)
134 .addOperand(MI.getOperand(2));
135 unsigned tmp2 = MRI->createVirtualRegister(&AMDIL::GPRI32RegClass);
136 BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::BINARY_OR_i32), tmp2)
139 BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::ITOF), MI.getOperand(0).getReg())
143 case AMDIL::CMOVLOG_f32:
144 BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(MI.getOpcode()))
145 .addOperand(MI.getOperand(0))
146 .addOperand(MI.getOperand(1))
147 .addOperand(MI.getOperand(3))
148 .addOperand(MI.getOperand(2));
151 case AMDIL::CMOVLOG_i32:
152 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT))
153 .addOperand(MI.getOperand(0))
154 .addOperand(MI.getOperand(1))
155 .addOperand(MI.getOperand(3))
156 .addOperand(MI.getOperand(2));
159 case AMDIL::CLAMP_f32:
161 MachineOperand lowOp = MI.getOperand(2);
162 MachineOperand highOp = MI.getOperand(3);
163 if (lowOp.isReg() && highOp.isReg()
164 && lowOp.getReg() == AMDIL::ZERO && highOp.getReg() == AMDIL::ONE) {
165 MI.getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
166 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::MOV))
167 .addOperand(MI.getOperand(0))
168 .addOperand(MI.getOperand(1));
170 /* XXX: Handle other cases */
176 case AMDIL::UDIV_i32:
180 /* XXX: Figure out the semantics of DIV_INF_f32 and make sure this is OK */
181 /* case AMDIL::DIV_INF_f32:
183 unsigned tmp0 = MRI->createVirtualRegister(&AMDIL::GPRF32RegClass);
184 BuildMI(MBB, I, MBB.findDebugLoc(I),
185 TM.getInstrInfo()->get(AMDIL::RECIP_CLAMPED), tmp0)
186 .addOperand(MI.getOperand(2));
187 BuildMI(MBB, I, MBB.findDebugLoc(I),
188 TM.getInstrInfo()->get(AMDIL::MUL_IEEE_f32))
189 .addOperand(MI.getOperand(0))
191 .addOperand(MI.getOperand(1));
194 */ /* XXX: This is an optimization */
196 case AMDIL::GLOBALLOAD_f32:
197 case AMDIL::GLOBALLOAD_i32:
199 MachineOperand &ptrOperand = MI.getOperand(1);
200 MachineOperand &indexOperand = MI.getOperand(2);
202 MRI->createVirtualRegister(&AMDIL::R600_TReg32_XRegClass);
204 /* Calculate the address with in the VTX buffer */
205 calcAddress(ptrOperand, indexOperand, indexReg, MBB, I);
207 /* Make sure the VTX_READ_eg writes to the X chan */
208 MRI->setRegClass(MI.getOperand(0).getReg(),
209 &AMDIL::R600_TReg32_XRegClass);
211 /* Add the VTX_READ_eg instruction */
212 BuildMI(MBB, I, MBB.findDebugLoc(I),
213 TII->get(AMDIL::VTX_READ_eg))
214 .addOperand(MI.getOperand(0))
220 case AMDIL::GLOBALSTORE_i32:
221 case AMDIL::GLOBALSTORE_f32:
223 MachineOperand &ptrOperand = MI.getOperand(1);
224 MachineOperand &indexOperand = MI.getOperand(2);
226 MRI->createVirtualRegister(&AMDIL::R600_TReg32_XRegClass);
227 unsigned byteIndexReg =
228 MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
230 MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
232 MRI->createVirtualRegister(&AMDIL::R600_TReg32_XRegClass);
234 /* Move the store value to the correct register class */
235 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::COPY), rwReg)
236 .addOperand(MI.getOperand(0));
238 /* Calculate the address in the RAT */
239 calcAddress(ptrOperand, indexOperand, byteIndexReg, MBB, I);
242 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::MOV), shiftReg)
243 .addReg(AMDIL::ALU_LITERAL_X)
246 /* XXX: Check GPU family */
247 BuildMI(MBB, I, MBB.findDebugLoc(I),
248 TII->get(AMDIL::LSHR_eg), indexReg)
249 .addReg(byteIndexReg)
252 /* XXX: Check GPU Family */
253 BuildMI(MBB, I, MBB.findDebugLoc(I),
254 TII->get(AMDIL::RAT_WRITE_CACHELESS_eg))
261 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGT_INT))
262 .addOperand(MI.getOperand(0))
263 .addOperand(MI.getOperand(2))
264 .addOperand(MI.getOperand(1));
266 case AMDIL::LOADCONST_f32:
267 case AMDIL::LOADCONST_i32:
269 bool canInline = false;
271 MachineOperand & dstOp = MI.getOperand(0);
272 MachineOperand & immOp = MI.getOperand(1);
273 if (immOp.isFPImm()) {
274 const ConstantFP * cfp = immOp.getFPImm();
277 inlineReg = AMDIL::ZERO;
278 } else if (cfp->isExactlyValue(1.0f)) {
280 inlineReg = AMDIL::ONE;
281 } else if (cfp->isExactlyValue(0.5f)) {
283 inlineReg = AMDIL::HALF;
288 MachineOperand * use = dstOp.getNextOperandForReg();
289 /* The lowering operation for CLAMP needs to have the immediates
290 * as operands, so we must propagate them. */
292 MachineOperand * next = use->getNextOperandForReg();
293 if (use->getParent()->getOpcode() == AMDIL::CLAMP_f32) {
294 use->setReg(inlineReg);
298 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::COPY))
302 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::MOV))
304 .addReg(AMDIL::ALU_LITERAL_X)
310 case AMDIL::MASK_WRITE:
312 unsigned maskedRegister = MI.getOperand(0).getReg();
313 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
314 MachineInstr * defInstr = MRI->getVRegDef(maskedRegister);
315 MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
316 def->addTargetFlag(MO_FLAG_MASK);
320 case AMDIL::VEXTRACT_v4f32:
321 MI.getOperand(2).setImm(MI.getOperand(2).getImm() - 1);
324 case AMDIL::NEGATE_i32:
325 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT))
326 .addOperand(MI.getOperand(0))
328 .addOperand(MI.getOperand(1));
333 MI.getOperand(1).addTargetFlag(MO_FLAG_NEG);
334 BuildMI(MBB, I, MBB.findDebugLoc(I),
335 TII->get(TII->getISAOpcode(AMDIL::MOV)))
336 .addOperand(MI.getOperand(0))
337 .addOperand(MI.getOperand(1));
343 MI.getOperand(2).addTargetFlag(MO_FLAG_NEG);
344 BuildMI(MBB, I, MBB.findDebugLoc(I),
345 TII->get(TII->getISAOpcode(AMDIL::ADD_f32)))
346 .addOperand(MI.getOperand(0))
347 .addOperand(MI.getOperand(1))
348 .addOperand(MI.getOperand(2));
352 case AMDIL::VINSERT_v4f32:
355 int64_t swz = MI.getOperand(4).getImm();
372 fprintf(stderr, "swizzle: %ld\n", swz);
376 BuildMI(MBB, I, MBB.findDebugLoc(I),
377 TM.getInstrInfo()->get(AMDIL::SET_CHAN))
378 .addOperand(MI.getOperand(1))
379 .addOperand(MI.getOperand(2))
382 BuildMI(MBB, I, MBB.findDebugLoc(I),
383 TM.getInstrInfo()->get(AMDIL::COPY))
384 .addOperand(MI.getOperand(0))
385 .addOperand(MI.getOperand(1));
392 MI.eraseFromParent();
398 void R600LowerInstructionsPass::calcAddress(const MachineOperand &ptrOp,
399 const MachineOperand &indexOp,
401 MachineBasicBlock &MBB,
402 MachineBasicBlock::iterator I) const
404 /* Optimize the case where the indexOperand is 0 */
405 if (indexOp.isImm() && indexOp.getImm() == 0) {
406 assert(ptrOp.isReg());
407 BuildMI(MBB, I, MBB.findDebugLoc(I),
408 TII->get(AMDIL::COPY), indexReg)
411 BuildMI(MBB, I, MBB.findDebugLoc(I),
412 TII->get(AMDIL::ADD_INT), indexReg)
418 /* Mostly copied from tgsi_divmod() in r600_shader.c */
419 void R600LowerInstructionsPass::divMod(MachineInstr &MI,
420 MachineBasicBlock &MBB,
421 MachineBasicBlock::iterator I,
424 unsigned dst = MI.getOperand(0).getReg();
425 MachineOperand &numerator = MI.getOperand(1);
426 MachineOperand &denominator = MI.getOperand(2);
427 /* rcp = RECIP(denominator) = 2^32 / denominator + e
428 * e is rounding error */
429 unsigned rcp = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
430 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getRECIP_UINT()), rcp)
431 .addOperand(denominator);
433 /* rcp_lo = lo(rcp * denominator) */
434 unsigned rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
435 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT()), rcp_lo)
437 .addOperand(denominator);
439 /* rcp_hi = HI (rcp * denominator) */
440 unsigned rcp_hi = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
441 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), rcp_hi)
443 .addOperand(denominator);
445 unsigned neg_rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
446 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), neg_rcp_lo)
450 unsigned abs_rcp_lo = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
451 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), abs_rcp_lo)
456 unsigned e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
457 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), e)
461 unsigned rcp_plus_e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
462 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), rcp_plus_e)
466 unsigned rcp_sub_e = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
467 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), rcp_sub_e)
471 /* tmp0 = rcp_hi == 0 ? rcp_plus_e : rcp_sub_e */
472 unsigned tmp0 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
473 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), tmp0)
478 unsigned q = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
479 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULHI_UINT()), q)
481 .addOperand(numerator);
483 /* num_sub_r = q * denominator */
484 unsigned num_sub_r = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
485 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(TII->getMULLO_UINT()),
488 .addOperand(denominator);
490 unsigned r = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
491 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), r)
492 .addOperand(numerator)
495 unsigned r_ge_den = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
496 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), r_ge_den)
498 .addOperand(denominator);
500 unsigned r_ge_zero = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
501 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), r_ge_zero)
502 .addOperand(numerator)
505 unsigned tmp1 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
506 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::AND_INT), tmp1)
510 unsigned val0 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
511 unsigned val1 = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
512 unsigned result = MRI->createVirtualRegister(&AMDIL::R600_TReg32RegClass);
514 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), val0)
516 .addReg(AMDIL::ONE_INT);
518 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), val1)
520 .addReg(AMDIL::ONE_INT);
522 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), result)
527 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SUB_INT), val0)
529 .addOperand(denominator);
531 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::ADD_INT), val1)
533 .addOperand(denominator);
535 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), result)
541 /* XXX: Do we need to set to MAX_INT if denominator is 0? */
542 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::CNDE_INT), dst)