1 //===-- R600CodeEmitter.cpp - TODO: Add brief description -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // TODO: Add full description
12 //===----------------------------------------------------------------------===//
15 #include "AMDGPUUtil.h"
16 #include "AMDILCodeEmitter.h"
17 #include "AMDILInstrInfo.h"
18 #include "AMDILMachineFunctionInfo.h"
19 #include "AMDILUtilityFunctions.h"
20 #include "R600RegisterInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/Support/DataTypes.h"
25 #include "llvm/Support/FormattedStream.h"
26 #include "llvm/Target/TargetMachine.h"
30 #define SRC_BYTE_COUNT 11
31 #define DST_BYTE_COUNT 5
37 class R600CodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter {
42 formatted_raw_ostream &_OS;
43 const TargetMachine * TM;
44 const MachineRegisterInfo * MRI;
45 AMDILMachineFunctionInfo * MFI;
46 const R600RegisterInfo * TRI;
47 bool evergreenEncoding;
50 unsigned reductionElement;
53 unsigned section_start;
57 R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
58 _OS(OS), TM(NULL), evergreenEncoding(false), isReduction(false),
61 const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
63 bool runOnMachineFunction(MachineFunction &MF);
64 virtual uint64_t getMachineOpValue(const MachineInstr &MI,
65 const MachineOperand &MO) const;
69 void emitALUInstr(MachineInstr &MI);
70 void emitSrc(const MachineOperand & MO);
71 void emitDst(const MachineOperand & MO);
72 void emitALU(MachineInstr &MI, unsigned numSrc);
73 void emitTexInstr(MachineInstr &MI);
74 void emitFCInstr(MachineInstr &MI);
76 unsigned int getHWInst(const MachineInstr &MI);
78 void emitNullBytes(unsigned int byteCount);
80 void emitByte(unsigned int byte);
82 void emitTwoBytes(uint32_t bytes);
84 void emit(uint32_t value);
85 void emit(uint64_t value);
87 unsigned getHWReg(unsigned regNo) const;
89 unsigned getElement(unsigned regNo);
93 } /* End anonymous namespace */
95 #define WRITE_MASK_X 0x1
96 #define WRITE_MASK_Y 0x2
97 #define WRITE_MASK_Z 0x4
98 #define WRITE_MASK_W 0x8
138 TEXTURE_SHADOW1D_ARRAY,
139 TEXTURE_SHADOW2D_ARRAY
142 char R600CodeEmitter::ID = 0;
144 FunctionPass *llvm::createR600CodeEmitterPass(formatted_raw_ostream &OS) {
145 return new R600CodeEmitter(OS);
148 bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
150 TM = &MF.getTarget();
151 MRI = &MF.getRegInfo();
152 MFI = MF.getInfo<AMDILMachineFunctionInfo>();
153 TRI = static_cast<const R600RegisterInfo *>(TM->getRegisterInfo());
154 const AMDILSubtarget &STM = TM->getSubtarget<AMDILSubtarget>();
155 std::string gpu = STM.getDeviceName();
156 if (!gpu.compare(0,3, "rv7")) {
157 evergreenEncoding = false;
159 evergreenEncoding = true;
161 const AMDGPUTargetMachine *amdtm =
162 static_cast<const AMDGPUTargetMachine *>(&MF.getTarget());
164 if (amdtm->shouldDumpCode()) {
168 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
170 MachineBasicBlock &MBB = *BB;
171 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
173 MachineInstr &MI = *I;
174 if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
177 if (isTexOp(MI.getOpcode())) {
179 } else if (isFCOp(MI.getOpcode())){
181 } else if (isReductionOp(MI.getOpcode())) {
184 for (reductionElement = 0; reductionElement < 4; reductionElement++) {
185 isLast = (reductionElement == 3);
189 } else if (MI.getOpcode() == AMDIL::RETURN) {
192 switch(MI.getOpcode()) {
193 case AMDIL::RAT_WRITE_CACHELESS_eg:
195 /* XXX: Support for autoencoding 64-bit instructions was added
196 * in LLVM 3.1. Until we drop support for 3.0, we will use Magic
197 * numbers for the high bits. */
198 uint64_t high = 0x95c0100000000000;
199 uint64_t inst = getBinaryCodeForInstr(MI);
201 /* Set End Of Program bit */
202 /* XXX: Need better check of end of program. EOP should be
203 * encoded in one of the operands of the MI, and it should be
204 * set in a prior pass. */
205 MachineBasicBlock::iterator NextI = llvm::next(I);
206 MachineInstr &NextMI = *NextI;
207 if (NextMI.getOpcode() == AMDIL::RETURN) {
208 inst |= (((uint64_t)1) << 53);
210 emitByte(INSTR_NATIVE);
214 case AMDIL::VTX_READ_eg:
224 emitByte(MI.getOperand(2).getImm());
227 emitByte(getHWReg(MI.getOperand(1).getReg()));
230 emitByte(TRI->getHWRegChan(MI.getOperand(1).getReg()));
232 /* mega_fetch_count */
236 emitByte(getHWReg(MI.getOperand(0).getReg()));
250 /* use_const_fields */
259 /* format_comp_all */
283 void R600CodeEmitter::emitALUInstr(MachineInstr &MI)
286 unsigned numOperands = MI.getNumOperands();
288 /* Some instructions are just place holder instructions that represent
289 * operations that the GPU does automatically. They should be ignored. */
290 if (isPlaceHolderOpcode(MI.getOpcode())) {
294 /* We need to handle some opcodes differently */
295 switch (MI.getOpcode()) {
298 /* Custom swizzle instructions, ignore the last two operands */
299 case AMDIL::SET_CHAN:
303 case AMDIL::VEXTRACT_v4f32:
308 case AMDIL::STORE_OUTPUT:
313 /* XXX Check if instruction writes a result */
314 if (numOperands < 1) {
317 const MachineOperand dstOp = MI.getOperand(0);
319 /* Emit instruction type */
322 unsigned int opIndex;
323 for (opIndex = 1; opIndex < numOperands; opIndex++) {
324 /* Literal constants are always stored as the last operand. */
325 if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
328 emitSrc(MI.getOperand(opIndex));
331 /* Emit zeros for unused sources */
332 for ( ; opIndex < 4; opIndex++) {
333 emitNullBytes(SRC_BYTE_COUNT);
338 emitALU(MI, numOperands - 1);
341 void R600CodeEmitter::emitSrc(const MachineOperand & MO)
344 /* Emit the source select (2 bytes). For GPRs, this is the register index.
345 * For other potential instruction operands, (e.g. constant registers) the
346 * value of the source select is defined in the r600isa docs. */
348 unsigned reg = MO.getReg();
349 emitTwoBytes(getHWReg(reg));
350 if (reg == AMDIL::ALU_LITERAL_X) {
351 const MachineInstr * parent = MO.getParent();
352 unsigned immOpIndex = parent->getNumOperands() - 1;
353 MachineOperand immOp = parent->getOperand(immOpIndex);
354 if (immOp.isFPImm()) {
355 value = immOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue();
357 assert(immOp.isImm());
358 value = immOp.getImm();
362 /* XXX: Handle other operand types. */
366 /* Emit the source channel (1 byte) */
368 emitByte(reductionElement);
369 } else if (MO.isReg()) {
370 const MachineInstr * parent = MO.getParent();
371 /* The source channel for EXTRACT is stored in operand 2. */
372 if (parent->getOpcode() == AMDIL::VEXTRACT_v4f32) {
373 emitByte(parent->getOperand(2).getImm());
375 emitByte(TRI->getHWRegChan(MO.getReg()));
381 /* XXX: Emit isNegated (1 byte) */
382 if ((!(MO.getTargetFlags() & MO_FLAG_ABS))
383 && (MO.getTargetFlags() & MO_FLAG_NEG ||
385 (MO.getReg() == AMDIL::NEG_ONE || MO.getReg() == AMDIL::NEG_HALF)))){
391 /* Emit isAbsolute (1 byte) */
392 if (MO.getTargetFlags() & MO_FLAG_ABS) {
398 /* XXX: Emit relative addressing mode (1 byte) */
401 /* Emit kc_bank, This will be adjusted later by r600_asm */
404 /* Emit the literal value, if applicable (4 bytes). */
409 void R600CodeEmitter::emitDst(const MachineOperand & MO)
412 /* Emit the destination register index (1 byte) */
413 emitByte(getHWReg(MO.getReg()));
415 /* Emit the element of the destination register (1 byte)*/
416 const MachineInstr * parent = MO.getParent();
418 emitByte(reductionElement);
420 /* The destination element for SET_CHAN is stored in the 3rd operand. */
421 } else if (parent->getOpcode() == AMDIL::SET_CHAN) {
422 emitByte(parent->getOperand(2).getImm());
423 } else if (parent->getOpcode() == AMDIL::VCREATE_v4f32) {
426 emitByte(TRI->getHWRegChan(MO.getReg()));
429 /* Emit isClamped (1 byte) */
430 if (MO.getTargetFlags() & MO_FLAG_CLAMP) {
436 /* Emit writemask (1 byte). */
437 if ((isReduction && reductionElement != TRI->getHWRegChan(MO.getReg()))
438 || MO.getTargetFlags() & MO_FLAG_MASK) {
444 /* XXX: Emit relative addressing mode */
447 /* XXX: Handle other operand types. Are there any for destination regs? */
448 emitNullBytes(DST_BYTE_COUNT);
452 void R600CodeEmitter::emitALU(MachineInstr &MI, unsigned numSrc)
454 /* Emit the instruction (2 bytes) */
455 emitTwoBytes(getHWInst(MI));
457 /* Emit isLast (for this instruction group) (1 byte) */
463 /* Emit isOp3 (1 byte) */
470 /* XXX: Emit predicate (1 byte) */
473 /* XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like
474 * r600_asm.c sets it. */
477 /* XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for. */
480 /* XXX: Emit OMOD (1 byte) Not implemented. */
483 /* XXX: Emit index_mode. I think this is for indirect addressing, so we
484 * don't need to worry about it. */
488 void R600CodeEmitter::emitTexInstr(MachineInstr &MI)
491 int64_t sampler = MI.getOperand(2).getImm();
492 int64_t textureType = MI.getOperand(3).getImm();
493 unsigned opcode = MI.getOpcode();
494 unsigned srcSelect[4] = {0, 1, 2, 3};
496 /* Emit instruction type */
499 /* Emit instruction */
500 emitByte(getHWInst(MI));
502 /* XXX: Emit resource id r600_shader.c uses sampler + 1. Why? */
503 emitByte(sampler + 1 + 1);
505 /* Emit source register */
506 emitByte(getHWReg(MI.getOperand(1).getReg()));
508 /* XXX: Emit src isRelativeAddress */
511 /* Emit destination register */
512 emitByte(getHWReg(MI.getOperand(0).getReg()));
514 /* XXX: Emit dst isRealtiveAddress */
517 /* XXX: Emit dst select */
523 /* XXX: Emit lod bias */
526 /* XXX: Emit coord types */
527 unsigned coordType[4] = {1, 1, 1, 1};
529 if (textureType == TEXTURE_RECT
530 || textureType == TEXTURE_SHADOWRECT) {
531 coordType[ELEMENT_X] = 0;
532 coordType[ELEMENT_Y] = 0;
535 if (textureType == TEXTURE_1D_ARRAY
536 || textureType == TEXTURE_SHADOW1D_ARRAY) {
537 if (opcode == AMDIL::TEX_SAMPLE_C_L || opcode == AMDIL::TEX_SAMPLE_C_LB) {
538 coordType[ELEMENT_Y] = 0;
540 coordType[ELEMENT_Z] = 0;
541 srcSelect[ELEMENT_Z] = ELEMENT_Y;
543 } else if (textureType == TEXTURE_2D_ARRAY
544 || textureType == TEXTURE_SHADOW2D_ARRAY) {
545 coordType[ELEMENT_Z] = 0;
548 for (unsigned i = 0; i < 4; i++) {
549 emitByte(coordType[i]);
552 /* XXX: Emit offsets */
556 /* There is no OFFSET_W */
558 /* Emit sampler id */
561 /* XXX:Emit source select */
562 if ((textureType == TEXTURE_SHADOW1D
563 || textureType == TEXTURE_SHADOW2D
564 || textureType == TEXTURE_SHADOWRECT
565 || textureType == TEXTURE_SHADOW1D_ARRAY)
566 && opcode != AMDIL::TEX_SAMPLE_C_L
567 && opcode != AMDIL::TEX_SAMPLE_C_LB) {
568 srcSelect[ELEMENT_W] = ELEMENT_Z;
571 for (unsigned i = 0; i < 4; i++) {
572 emitByte(srcSelect[i]);
576 void R600CodeEmitter::emitFCInstr(MachineInstr &MI)
578 /* Emit instruction type */
582 unsigned numOperands = MI.getNumOperands();
583 if (numOperands > 0) {
584 assert(numOperands == 1);
585 emitSrc(MI.getOperand(0));
587 emitNullBytes(SRC_BYTE_COUNT);
590 /* Emit FC Instruction */
592 switch (MI.getOpcode()) {
593 case AMDIL::BREAK_LOGICALZ_f32:
596 case AMDIL::BREAK_LOGICALNZ_i32:
597 instr = FC_BREAK_NZ_INT;
599 case AMDIL::BREAK_LOGICALZ_i32:
600 instr = FC_BREAK_Z_INT;
602 case AMDIL::CONTINUE_LOGICALNZ_f32:
605 /* XXX: This assumes that all IFs will be if (x != 0). If we add
606 * optimizations this might not be the case */
607 case AMDIL::IF_LOGICALNZ_f32:
608 case AMDIL::IF_LOGICALNZ_i32:
611 case AMDIL::IF_LOGICALZ_f32:
623 case AMDIL::WHILELOOP:
633 #define INSTR_FLOAT2_V(inst, hw) \
634 case AMDIL:: inst##_v4f32: \
635 case AMDIL:: inst##_v2f32: return HW_INST2(hw);
637 #define INSTR_FLOAT2_S(inst, hw) \
638 case AMDIL:: inst##_f32: return HW_INST2(hw);
640 #define INSTR_FLOAT2(inst, hw) \
641 INSTR_FLOAT2_V(inst, hw) \
642 INSTR_FLOAT2_S(inst, hw)
644 unsigned int R600CodeEmitter::getHWInst(const MachineInstr &MI)
647 /* XXX: Lower these to MOV before the code emitter. */
648 switch (MI.getOpcode()) {
649 case AMDIL::STORE_OUTPUT:
650 case AMDIL::VCREATE_v4i32:
651 case AMDIL::VCREATE_v4f32:
652 case AMDIL::VEXTRACT_v4f32:
653 case AMDIL::VINSERT_v4f32:
654 case AMDIL::LOADCONST_i32:
655 case AMDIL::LOADCONST_f32:
656 case AMDIL::MOVE_v4i32:
657 case AMDIL::SET_CHAN:
658 /* Instructons to reinterpret bits as ... */
659 case AMDIL::IL_ASINT_f32:
660 case AMDIL::IL_ASINT_i32:
661 case AMDIL::IL_ASFLOAT_f32:
662 case AMDIL::IL_ASFLOAT_i32:
666 return getBinaryCodeForInstr(MI);
670 void R600CodeEmitter::emitNullBytes(unsigned int byteCount)
672 for (unsigned int i = 0; i < byteCount; i++) {
677 void R600CodeEmitter::emitByte(unsigned int byte)
679 _OS.write((uint8_t) byte & 0xff);
681 void R600CodeEmitter::emitTwoBytes(unsigned int bytes)
683 _OS.write((uint8_t) (bytes & 0xff));
684 _OS.write((uint8_t) ((bytes >> 8) & 0xff));
687 void R600CodeEmitter::emit(uint32_t value)
689 for (unsigned i = 0; i < 4; i++) {
690 _OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
694 void R600CodeEmitter::emit(uint64_t value)
696 for (unsigned i = 0; i < 8; i++) {
697 emitByte((value >> (8 * i)) & 0xff);
701 unsigned R600CodeEmitter::getHWReg(unsigned regNo) const
705 hwReg = TRI->getHWRegIndex(regNo);
706 if (AMDIL::R600_CReg32RegClass.contains(regNo)) {
712 uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr &MI,
713 const MachineOperand &MO) const
716 return getHWReg(MO.getReg());
723 RegElement maskBitToElement(unsigned int maskBit)
726 case WRITE_MASK_X: return ELEMENT_X;
727 case WRITE_MASK_Y: return ELEMENT_Y;
728 case WRITE_MASK_Z: return ELEMENT_Z;
729 case WRITE_MASK_W: return ELEMENT_W;
731 assert("Invalid maskBit");
736 unsigned int dstSwizzleToWriteMask(unsigned swizzle)
740 case AMDIL_DST_SWIZZLE_DEFAULT:
741 return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
742 case AMDIL_DST_SWIZZLE_X___:
744 case AMDIL_DST_SWIZZLE_XY__:
745 return WRITE_MASK_X | WRITE_MASK_Y;
746 case AMDIL_DST_SWIZZLE_XYZ_:
747 return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z;
748 case AMDIL_DST_SWIZZLE_XYZW:
749 return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
750 case AMDIL_DST_SWIZZLE__Y__:
752 case AMDIL_DST_SWIZZLE__YZ_:
753 return WRITE_MASK_Y | WRITE_MASK_Z;
754 case AMDIL_DST_SWIZZLE__YZW:
755 return WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
756 case AMDIL_DST_SWIZZLE___Z_:
758 case AMDIL_DST_SWIZZLE___ZW:
759 return WRITE_MASK_Z | WRITE_MASK_W;
760 case AMDIL_DST_SWIZZLE____W:
762 case AMDIL_DST_SWIZZLE_X_ZW:
763 return WRITE_MASK_X | WRITE_MASK_Z | WRITE_MASK_W;
764 case AMDIL_DST_SWIZZLE_XY_W:
765 return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_W;
766 case AMDIL_DST_SWIZZLE_X_Z_:
767 return WRITE_MASK_X | WRITE_MASK_Z;
768 case AMDIL_DST_SWIZZLE_X__W:
769 return WRITE_MASK_X | WRITE_MASK_W;
770 case AMDIL_DST_SWIZZLE__Y_W:
771 return WRITE_MASK_Y | WRITE_MASK_W;
775 #include "AMDILGenCodeEmitter.inc"