2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target_nv50.h"
28 #define NV50_OP_ENC_LONG 0
29 #define NV50_OP_ENC_SHORT 1
30 #define NV50_OP_ENC_IMM 2
31 #define NV50_OP_ENC_LONG_ALT 3
33 class CodeEmitterNV50 : public CodeEmitter
36 CodeEmitterNV50(const TargetNV50 *);
38 virtual bool emitInstruction(Instruction *);
40 virtual uint32_t getMinEncodingSize(const Instruction *) const;
42 inline void setProgramType(Program::Type pType) { progType = pType; }
44 virtual void prepareEmission(Function *);
47 Program::Type progType;
49 const TargetNV50 *targNV50;
52 inline void defId(const ValueDef&, const int pos);
53 inline void srcId(const ValueRef&, const int pos);
54 inline void srcId(const ValueRef *, const int pos);
56 inline void srcAddr16(const ValueRef&, bool adj, const int pos);
57 inline void srcAddr8(const ValueRef&, const int pos);
59 void emitFlagsRd(const Instruction *);
60 void emitFlagsWr(const Instruction *);
62 void emitCondCode(CondCode cc, DataType ty, int pos);
64 inline void setARegBits(unsigned int);
66 void setAReg16(const Instruction *, int s);
67 void setImmediate(const Instruction *, int s);
69 void setDst(const Value *);
70 void setDst(const Instruction *, int d);
71 void setSrcFileBits(const Instruction *, int enc);
72 void setSrc(const Instruction *, unsigned int s, int slot);
74 void emitForm_MAD(const Instruction *);
75 void emitForm_ADD(const Instruction *);
76 void emitForm_MUL(const Instruction *);
77 void emitForm_IMM(const Instruction *);
79 void emitLoadStoreSizeLG(DataType ty, int pos);
80 void emitLoadStoreSizeCS(DataType ty);
82 void roundMode_MAD(const Instruction *);
83 void roundMode_CVT(RoundMode);
85 void emitMNeg12(const Instruction *);
87 void emitLOAD(const Instruction *);
88 void emitSTORE(const Instruction *);
89 void emitMOV(const Instruction *);
90 void emitRDSV(const Instruction *);
92 void emitINTERP(const Instruction *);
93 void emitPFETCH(const Instruction *);
94 void emitOUT(const Instruction *);
96 void emitUADD(const Instruction *);
97 void emitAADD(const Instruction *);
98 void emitFADD(const Instruction *);
99 void emitDADD(const Instruction *);
100 void emitIMUL(const Instruction *);
101 void emitFMUL(const Instruction *);
102 void emitDMUL(const Instruction *);
103 void emitFMAD(const Instruction *);
104 void emitDMAD(const Instruction *);
105 void emitIMAD(const Instruction *);
106 void emitISAD(const Instruction *);
108 void emitMINMAX(const Instruction *);
110 void emitPreOp(const Instruction *);
111 void emitSFnOp(const Instruction *, uint8_t subOp);
113 void emitShift(const Instruction *);
114 void emitARL(const Instruction *, unsigned int shl);
115 void emitLogicOp(const Instruction *);
116 void emitNOT(const Instruction *);
118 void emitCVT(const Instruction *);
119 void emitSET(const Instruction *);
121 void emitTEX(const TexInstruction *);
122 void emitTXQ(const TexInstruction *);
123 void emitTEXPREP(const TexInstruction *);
125 void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
127 void emitFlow(const Instruction *, uint8_t flowOp);
128 void emitPRERETEmu(const FlowInstruction *);
129 void emitBAR(const Instruction *);
131 void emitATOM(const Instruction *);
134 #define SDATA(a) ((a).rep()->reg.data)
135 #define DDATA(a) ((a).rep()->reg.data)
137 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
140 code[pos / 32] |= SDATA(src).id << (pos % 32);
143 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
146 code[pos / 32] |= SDATA(*src).id << (pos % 32);
149 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
153 int32_t offset = SDATA(src).offset;
155 assert(!adj || src.get()->reg.size <= 4);
157 offset /= src.get()->reg.size;
159 assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
162 offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
164 code[pos / 32] |= offset << (pos % 32);
167 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
171 uint32_t offset = SDATA(src).offset;
173 assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
175 code[pos / 32] |= (offset >> 2) << (pos % 32);
178 void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
180 assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
182 code[pos / 32] |= DDATA(def).id << (pos % 32);
186 CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
189 case ROUND_M: code[1] |= 1 << 22; break;
190 case ROUND_P: code[1] |= 2 << 22; break;
191 case ROUND_Z: code[1] |= 3 << 22; break;
193 assert(insn->rnd == ROUND_N);
199 CodeEmitterNV50::emitMNeg12(const Instruction *i)
201 code[1] |= i->src(0).mod.neg() << 26;
202 code[1] |= i->src(1).mod.neg() << 27;
205 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
209 assert(pos >= 32 || pos <= 27);
212 case CC_LT: enc = 0x1; break;
213 case CC_LTU: enc = 0x9; break;
214 case CC_EQ: enc = 0x2; break;
215 case CC_EQU: enc = 0xa; break;
216 case CC_LE: enc = 0x3; break;
217 case CC_LEU: enc = 0xb; break;
218 case CC_GT: enc = 0x4; break;
219 case CC_GTU: enc = 0xc; break;
220 case CC_NE: enc = 0x5; break;
221 case CC_NEU: enc = 0xd; break;
222 case CC_GE: enc = 0x6; break;
223 case CC_GEU: enc = 0xe; break;
224 case CC_TR: enc = 0xf; break;
225 case CC_FL: enc = 0x0; break;
227 case CC_O: enc = 0x10; break;
228 case CC_C: enc = 0x11; break;
229 case CC_A: enc = 0x12; break;
230 case CC_S: enc = 0x13; break;
231 case CC_NS: enc = 0x1c; break;
232 case CC_NA: enc = 0x1d; break;
233 case CC_NC: enc = 0x1e; break;
234 case CC_NO: enc = 0x1f; break;
238 assert(!"invalid condition code");
241 if (ty != TYPE_NONE && !isFloatType(ty))
242 enc &= ~0x8; // unordered only exists for float types
244 code[pos / 32] |= enc << (pos % 32);
248 CodeEmitterNV50::emitFlagsRd(const Instruction *i)
250 int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
252 assert(!(code[1] & 0x00003f80));
255 assert(i->getSrc(s)->reg.file == FILE_FLAGS);
256 emitCondCode(i->cc, TYPE_NONE, 32 + 7);
257 srcId(i->src(s), 32 + 12);
264 CodeEmitterNV50::emitFlagsWr(const Instruction *i)
266 assert(!(code[1] & 0x70));
268 int flagsDef = i->flagsDef;
270 // find flags definition and check that it is the last def
272 for (int d = 0; i->defExists(d); ++d)
273 if (i->def(d).getFile() == FILE_FLAGS)
275 if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
276 WARN("Instruction::flagsDef was not set properly\n");
278 if (flagsDef == 0 && i->defExists(1))
279 WARN("flags def should not be the primary definition\n");
282 code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
287 CodeEmitterNV50::setARegBits(unsigned int u)
289 code[0] |= (u & 3) << 26;
294 CodeEmitterNV50::setAReg16(const Instruction *i, int s)
296 if (i->srcExists(s)) {
297 s = i->src(s).indirect[0];
299 setARegBits(SDATA(i->src(s)).id + 1);
304 CodeEmitterNV50::setImmediate(const Instruction *i, int s)
306 const ImmediateValue *imm = i->src(s).get()->asImm();
309 uint32_t u = imm->reg.data.u32;
311 if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
315 code[0] |= (u & 0x3f) << 16;
316 code[1] |= (u >> 6) << 2;
320 CodeEmitterNV50::setDst(const Value *dst)
322 const Storage *reg = &dst->join->reg;
324 assert(reg->file != FILE_ADDRESS);
326 if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
327 code[0] |= (127 << 2) | 1;
331 if (reg->file == FILE_SHADER_OUTPUT) {
333 id = reg->data.offset / 4;
342 CodeEmitterNV50::setDst(const Instruction *i, int d)
344 if (i->defExists(d)) {
345 setDst(i->getDef(d));
348 code[0] |= 0x01fc; // bit bucket
359 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
363 for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
364 switch (i->src(s).getFile()) {
367 case FILE_MEMORY_SHARED:
368 case FILE_SHADER_INPUT:
369 mode |= 1 << (s * 2);
371 case FILE_MEMORY_CONST:
372 mode |= 2 << (s * 2);
375 mode |= 3 << (s * 2);
378 ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
386 case 0x01: // arr/grr
387 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
388 code[0] |= 0x01800000;
389 if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
390 code[1] |= 0x00200000;
392 if (enc == NV50_OP_ENC_SHORT)
393 code[0] |= 0x01000000;
395 code[1] |= 0x00200000;
399 assert(i->op == OP_MOV);
404 assert(progType == Program::TYPE_GEOMETRY ||
405 progType == Program::TYPE_COMPUTE);
406 code[0] |= 0x01000000;
407 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
408 int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
410 code[0] |= (reg + 1) << 26;
414 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
415 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
417 case 0x09: // acr/gcr
418 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
419 code[0] |= 0x01800000;
421 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
422 code[1] |= 0x00200000;
424 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
427 code[0] |= 0x01000000;
428 code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
431 code[0] |= 0x01000000;
432 code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
433 assert(progType != Program::TYPE_GEOMETRY);
436 ERROR("not encodable: %x\n", mode);
440 if (progType != Program::TYPE_COMPUTE)
443 if ((mode & 3) == 1) {
444 const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;
457 assert(i->getSrc(0)->reg.size == 4);
464 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
466 if (Target::operationSrcNr[i->op] <= s)
468 const Storage *reg = &i->src(s).rep()->reg;
470 unsigned int id = (reg->file == FILE_GPR) ?
472 reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
475 case 0: code[0] |= id << 9; break;
476 case 1: code[0] |= id << 16; break;
477 case 2: code[1] |= id << 14; break;
485 // - long instruction
486 // - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
489 CodeEmitterNV50::emitForm_MAD(const Instruction *i)
491 assert(i->encSize == 8);
499 setSrcFileBits(i, NV50_OP_ENC_LONG);
504 if (i->getIndirect(0, 0)) {
505 assert(!i->srcExists(1) || !i->getIndirect(1, 0));
506 assert(!i->srcExists(2) || !i->getIndirect(2, 0));
508 } else if (i->srcExists(1) && i->getIndirect(1, 0)) {
509 assert(!i->srcExists(2) || !i->getIndirect(2, 0));
516 // like default form, but 2nd source in slot 2, and no 3rd source
518 CodeEmitterNV50::emitForm_ADD(const Instruction *i)
520 assert(i->encSize == 8);
528 setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
532 if (i->getIndirect(0, 0)) {
533 assert(!i->getIndirect(1, 0));
540 // default short form (rr, ar, rc, gr)
542 CodeEmitterNV50::emitForm_MUL(const Instruction *i)
544 assert(i->encSize == 4 && !(code[0] & 1));
545 assert(i->defExists(0));
546 assert(!i->getPredicate());
550 setSrcFileBits(i, NV50_OP_ENC_SHORT);
555 // usual immediate form
556 // - 1 to 3 sources where second is immediate (rir, gir)
557 // - no address or predicate possible
559 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
561 assert(i->encSize == 8);
564 assert(i->defExists(0) && i->srcExists(0));
568 setSrcFileBits(i, NV50_OP_ENC_IMM);
569 if (Target::operationSrcNr[i->op] > 1) {
572 // If there is another source, it has to be the same as the dest reg.
579 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
584 case TYPE_F32: // fall through
585 case TYPE_S32: // fall through
586 case TYPE_U32: enc = 0x6; break;
587 case TYPE_B128: enc = 0x5; break;
588 case TYPE_F64: // fall through
589 case TYPE_S64: // fall through
590 case TYPE_U64: enc = 0x4; break;
591 case TYPE_S16: enc = 0x3; break;
592 case TYPE_U16: enc = 0x2; break;
593 case TYPE_S8: enc = 0x1; break;
594 case TYPE_U8: enc = 0x0; break;
597 assert(!"invalid load/store type");
600 code[pos / 32] |= enc << (pos % 32);
604 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
608 case TYPE_U16: code[1] |= 0x4000; break;
609 case TYPE_S16: code[1] |= 0x8000; break;
612 case TYPE_U32: code[1] |= 0xc000; break;
620 CodeEmitterNV50::emitLOAD(const Instruction *i)
622 DataFile sf = i->src(0).getFile();
623 int32_t offset = i->getSrc(0)->reg.data.offset;
626 case FILE_SHADER_INPUT:
627 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
628 code[0] = 0x11800001;
630 // use 'mov' where we can
631 code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
632 code[1] = 0x00200000 | (i->lanes << 14);
633 if (typeSizeof(i->dType) == 4)
634 code[1] |= 0x04000000;
636 case FILE_MEMORY_SHARED:
637 if (targ->getChipset() >= 0x84) {
638 assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
639 code[0] = 0x10000001;
640 code[1] = 0x40000000;
642 if (typeSizeof(i->dType) == 4)
643 code[1] |= 0x04000000;
645 emitLoadStoreSizeCS(i->sType);
647 assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
648 code[0] = 0x10000001;
649 code[1] = 0x00200000 | (i->lanes << 14);
650 emitLoadStoreSizeCS(i->sType);
653 case FILE_MEMORY_CONST:
654 code[0] = 0x10000001;
655 code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
656 if (typeSizeof(i->dType) == 4)
657 code[1] |= 0x04000000;
658 emitLoadStoreSizeCS(i->sType);
660 case FILE_MEMORY_LOCAL:
661 code[0] = 0xd0000001;
662 code[1] = 0x40000000;
664 case FILE_MEMORY_GLOBAL:
665 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
666 code[1] = 0x80000000;
669 assert(!"invalid load source file");
672 if (sf == FILE_MEMORY_LOCAL ||
673 sf == FILE_MEMORY_GLOBAL)
674 emitLoadStoreSizeLG(i->sType, 21 + 32);
681 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
682 srcId(*i->src(0).getIndirect(0), 9);
685 srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
690 CodeEmitterNV50::emitSTORE(const Instruction *i)
692 DataFile f = i->getSrc(0)->reg.file;
693 int32_t offset = i->getSrc(0)->reg.data.offset;
696 case FILE_SHADER_OUTPUT:
697 code[0] = 0x00000001 | ((offset >> 2) << 9);
698 code[1] = 0x80c00000;
699 srcId(i->src(1), 32 + 14);
701 case FILE_MEMORY_GLOBAL:
702 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
703 code[1] = 0xa0000000;
704 emitLoadStoreSizeLG(i->dType, 21 + 32);
707 case FILE_MEMORY_LOCAL:
708 code[0] = 0xd0000001;
709 code[1] = 0x60000000;
710 emitLoadStoreSizeLG(i->dType, 21 + 32);
713 case FILE_MEMORY_SHARED:
714 code[0] = 0x00000001;
715 code[1] = 0xe0000000;
716 switch (typeSizeof(i->dType)) {
718 code[0] |= offset << 9;
719 code[1] |= 0x00400000;
722 code[0] |= (offset >> 1) << 9;
725 code[0] |= (offset >> 2) << 9;
726 code[1] |= 0x04200000;
732 srcId(i->src(1), 32 + 14);
735 assert(!"invalid store destination file");
739 if (f == FILE_MEMORY_GLOBAL)
740 srcId(*i->src(0).getIndirect(0), 9);
744 if (f == FILE_MEMORY_LOCAL)
745 srcAddr16(i->src(0), false, 9);
751 CodeEmitterNV50::emitMOV(const Instruction *i)
753 DataFile sf = i->getSrc(0)->reg.file;
754 DataFile df = i->getDef(0)->reg.file;
756 assert(sf == FILE_GPR || df == FILE_GPR);
758 if (sf == FILE_FLAGS) {
759 assert(i->flagsSrc >= 0);
760 code[0] = 0x00000001;
761 code[1] = 0x20000000;
765 if (sf == FILE_ADDRESS) {
766 code[0] = 0x00000001;
767 code[1] = 0x40000000;
769 setARegBits(SDATA(i->src(0)).id + 1);
772 if (df == FILE_FLAGS) {
773 assert(i->flagsDef >= 0);
774 code[0] = 0x00000001;
775 code[1] = 0xa0000000;
780 if (sf == FILE_IMMEDIATE) {
781 code[0] = 0x10008001;
782 code[1] = 0x00000003;
785 if (i->encSize == 4) {
786 code[0] = 0x10008000;
788 code[0] = 0x10000001;
789 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
790 code[1] |= (i->lanes << 14);
796 if (df == FILE_SHADER_OUTPUT) {
797 assert(i->encSize == 8);
802 static inline uint8_t getSRegEncoding(const ValueRef &ref)
804 switch (SDATA(ref).sv.sv) {
805 case SV_PHYSID: return 0;
806 case SV_CLOCK: return 1;
807 case SV_VERTEX_STRIDE: return 3;
808 // case SV_PM_COUNTER: return 4 + SDATA(ref).sv.index;
809 case SV_SAMPLE_INDEX: return 8;
811 assert(!"no sreg for system value");
817 CodeEmitterNV50::emitRDSV(const Instruction *i)
819 code[0] = 0x00000001;
820 code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
826 CodeEmitterNV50::emitNOP()
828 code[0] = 0xf0000001;
829 code[1] = 0xe0000000;
833 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
835 code[0] = 0xc0000000 | (lane << 16);
836 code[1] = 0x80000000;
838 code[0] |= (quOp & 0x03) << 20;
839 code[1] |= (quOp & 0xfc) << 20;
843 if (!i->srcExists(1))
844 srcId(i->src(0), 32 + 14);
847 /* NOTE: This returns the base address of a vertex inside the primitive.
848 * src0 is an immediate, the index (not offset) of the vertex
849 * inside the primitive. XXX: signed or unsigned ?
850 * src1 (may be NULL) should use whatever units the hardware requires
851 * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
854 CodeEmitterNV50::emitPFETCH(const Instruction *i)
856 const uint32_t prim = i->src(0).get()->reg.data.u32;
859 if (i->def(0).getFile() == FILE_ADDRESS) {
861 code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
862 code[1] = 0xc0200000;
863 code[0] |= prim << 9;
864 assert(!i->srcExists(1));
866 if (i->srcExists(1)) {
867 // ld b32 $rX a[$aX+base]
868 code[0] = 0x00000001;
869 code[1] = 0x04200000 | (0xf << 14);
871 code[0] |= prim << 9;
872 setARegBits(SDATA(i->src(1)).id + 1);
875 code[0] = 0x10000001;
876 code[1] = 0x04200000 | (0xf << 14);
878 code[0] |= prim << 9;
884 interpApply(const InterpEntry *entry, uint32_t *code,
885 bool force_persample_interp, bool flatshade)
887 int ipa = entry->ipa;
888 int encSize = entry->reg;
889 int loc = entry->loc;
891 if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
892 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
893 if (force_persample_interp) {
895 code[loc + 1] |= 1 << 16;
897 code[loc + 0] |= 1 << 24;
900 code[loc + 1] &= ~(1 << 16);
902 code[loc + 0] &= ~(1 << 24);
908 CodeEmitterNV50::emitINTERP(const Instruction *i)
910 code[0] = 0x80000000;
913 srcAddr8(i->src(0), 16);
915 if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
918 if (i->op == OP_PINTERP) {
922 if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
926 if (i->encSize == 8) {
927 if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
930 code[1] = (code[0] & (3 << 24)) >> (24 - 16);
931 code[0] &= ~0x03000000;
936 addInterp(i->ipa, i->encSize, interpApply);
940 CodeEmitterNV50::emitMINMAX(const Instruction *i)
942 if (i->dType == TYPE_F64) {
943 code[0] = 0xe0000000;
944 code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
946 code[0] = 0x30000000;
947 code[1] = 0x80000000;
949 code[1] |= 0x20000000;
952 case TYPE_F32: code[0] |= 0x80000000; break;
953 case TYPE_S32: code[1] |= 0x8c000000; break;
954 case TYPE_U32: code[1] |= 0x84000000; break;
955 case TYPE_S16: code[1] |= 0x80000000; break;
956 case TYPE_U16: break;
963 code[1] |= i->src(0).mod.abs() << 20;
964 code[1] |= i->src(0).mod.neg() << 26;
965 code[1] |= i->src(1).mod.abs() << 19;
966 code[1] |= i->src(1).mod.neg() << 27;
972 CodeEmitterNV50::emitFMAD(const Instruction *i)
974 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
975 const int neg_add = i->src(2).mod.neg();
977 code[0] = 0xe0000000;
979 if (i->src(1).getFile() == FILE_IMMEDIATE) {
982 code[0] |= neg_mul << 15;
983 code[0] |= neg_add << 22;
987 if (i->encSize == 4) {
989 code[0] |= neg_mul << 15;
990 code[0] |= neg_add << 22;
994 code[1] = neg_mul << 26;
995 code[1] |= neg_add << 27;
1003 CodeEmitterNV50::emitDMAD(const Instruction *i)
1005 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1006 const int neg_add = i->src(2).mod.neg();
1008 assert(i->encSize == 8);
1009 assert(!i->saturate);
1011 code[1] = 0x40000000;
1012 code[0] = 0xe0000000;
1014 code[1] |= neg_mul << 26;
1015 code[1] |= neg_add << 27;
1023 CodeEmitterNV50::emitFADD(const Instruction *i)
1025 const int neg0 = i->src(0).mod.neg();
1026 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1028 code[0] = 0xb0000000;
1030 assert(!(i->src(0).mod | i->src(1).mod).abs());
1032 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1035 code[0] |= neg0 << 15;
1036 code[0] |= neg1 << 22;
1040 if (i->encSize == 8) {
1043 code[1] |= neg0 << 26;
1044 code[1] |= neg1 << 27;
1049 code[0] |= neg0 << 15;
1050 code[0] |= neg1 << 22;
1057 CodeEmitterNV50::emitDADD(const Instruction *i)
1059 const int neg0 = i->src(0).mod.neg();
1060 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1062 assert(!(i->src(0).mod | i->src(1).mod).abs());
1063 assert(!i->saturate);
1064 assert(i->encSize == 8);
1066 code[1] = 0x60000000;
1067 code[0] = 0xe0000000;
1071 code[1] |= neg0 << 26;
1072 code[1] |= neg1 << 27;
1076 CodeEmitterNV50::emitUADD(const Instruction *i)
1078 const int neg0 = i->src(0).mod.neg();
1079 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1081 code[0] = 0x20008000;
1083 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1087 if (i->encSize == 8) {
1088 code[0] = 0x20000000;
1089 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
1094 assert(!(neg0 && neg1));
1095 code[0] |= neg0 << 28;
1096 code[0] |= neg1 << 22;
1098 if (i->flagsSrc >= 0) {
1099 // addc == sub | subr
1100 assert(!(code[0] & 0x10400000) && !i->getPredicate());
1101 code[0] |= 0x10400000;
1102 srcId(i->src(i->flagsSrc), 32 + 12);
1107 CodeEmitterNV50::emitAADD(const Instruction *i)
1109 const int s = (i->op == OP_MOV) ? 0 : 1;
1111 code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
1112 code[1] = 0x20000000;
1114 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1118 if (s && i->srcExists(0))
1119 setARegBits(SDATA(i->src(0)).id + 1);
1123 CodeEmitterNV50::emitIMUL(const Instruction *i)
1125 code[0] = 0x40000000;
1127 if (i->encSize == 8) {
1128 code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
1129 if (i->src(1).getFile() == FILE_IMMEDIATE)
1134 if (i->sType == TYPE_S16)
1141 CodeEmitterNV50::emitFMUL(const Instruction *i)
1143 const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1145 code[0] = 0xc0000000;
1147 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1155 if (i->encSize == 8) {
1156 code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
1158 code[1] |= 0x08000000;
1172 CodeEmitterNV50::emitDMUL(const Instruction *i)
1174 const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1176 assert(!i->saturate);
1177 assert(i->encSize == 8);
1179 code[1] = 0x80000000;
1180 code[0] = 0xe0000000;
1183 code[1] |= 0x08000000;
1185 roundMode_CVT(i->rnd);
1191 CodeEmitterNV50::emitIMAD(const Instruction *i)
1193 code[0] = 0x60000000;
1194 if (isSignedType(i->sType))
1195 code[1] = i->saturate ? 0x40000000 : 0x20000000;
1197 code[1] = 0x00000000;
1199 int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1200 int neg2 = i->src(2).mod.neg();
1202 assert(!(neg1 & neg2));
1203 code[1] |= neg1 << 27;
1204 code[1] |= neg2 << 26;
1206 if (i->src(1).getFile() == FILE_IMMEDIATE)
1211 if (i->flagsSrc >= 0) {
1212 // add with carry from $cX
1213 assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1214 code[1] |= 0xc << 24;
1215 srcId(i->src(i->flagsSrc), 32 + 12);
1220 CodeEmitterNV50::emitISAD(const Instruction *i)
1222 if (i->encSize == 8) {
1223 code[0] = 0x50000000;
1225 case TYPE_U32: code[1] = 0x04000000; break;
1226 case TYPE_S32: code[1] = 0x0c000000; break;
1227 case TYPE_U16: code[1] = 0x00000000; break;
1228 case TYPE_S16: code[1] = 0x08000000; break;
1236 case TYPE_U32: code[0] = 0x50008000; break;
1237 case TYPE_S32: code[0] = 0x50008100; break;
1238 case TYPE_U16: code[0] = 0x50000000; break;
1239 case TYPE_S16: code[0] = 0x50000100; break;
1249 CodeEmitterNV50::emitSET(const Instruction *i)
1251 code[0] = 0x30000000;
1252 code[1] = 0x60000000;
1256 code[0] = 0xe0000000;
1257 code[1] = 0xe0000000;
1259 case TYPE_F32: code[0] |= 0x80000000; break;
1260 case TYPE_S32: code[1] |= 0x0c000000; break;
1261 case TYPE_U32: code[1] |= 0x04000000; break;
1262 case TYPE_S16: code[1] |= 0x08000000; break;
1263 case TYPE_U16: break;
1269 emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1271 if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1272 if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1273 if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1274 if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1280 CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1283 case ROUND_NI: code[1] |= 0x08000000; break;
1284 case ROUND_M: code[1] |= 0x00020000; break;
1285 case ROUND_MI: code[1] |= 0x08020000; break;
1286 case ROUND_P: code[1] |= 0x00040000; break;
1287 case ROUND_PI: code[1] |= 0x08040000; break;
1288 case ROUND_Z: code[1] |= 0x00060000; break;
1289 case ROUND_ZI: code[1] |= 0x08060000; break;
1291 assert(rnd == ROUND_N);
1297 CodeEmitterNV50::emitCVT(const Instruction *i)
1299 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1304 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
1305 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1306 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1312 if (i->op == OP_NEG && i->dType == TYPE_U32)
1317 code[0] = 0xa0000000;
1322 case TYPE_F64: code[1] = 0xc4404000; break;
1323 case TYPE_S64: code[1] = 0x44414000; break;
1324 case TYPE_U64: code[1] = 0x44404000; break;
1325 case TYPE_F32: code[1] = 0xc4400000; break;
1326 case TYPE_S32: code[1] = 0x44410000; break;
1327 case TYPE_U32: code[1] = 0x44400000; break;
1335 case TYPE_F64: code[1] = 0x8c404000; break;
1336 case TYPE_F32: code[1] = 0x8c400000; break;
1344 case TYPE_F64: code[1] = 0x84404000; break;
1345 case TYPE_F32: code[1] = 0x84400000; break;
1353 case TYPE_F64: code[1] = 0xc0404000; break;
1354 case TYPE_S64: code[1] = 0x40414000; break;
1355 case TYPE_U64: code[1] = 0x40404000; break;
1356 case TYPE_F32: code[1] = 0xc4004000; break;
1357 case TYPE_S32: code[1] = 0x44014000; break;
1358 case TYPE_U32: code[1] = 0x44004000; break;
1359 case TYPE_F16: code[1] = 0xc4000000; break;
1360 case TYPE_U16: code[1] = 0x44000000; break;
1368 case TYPE_F64: code[1] = 0x88404000; break;
1369 case TYPE_F32: code[1] = 0x8c004000; break;
1370 case TYPE_S32: code[1] = 0x0c014000; break;
1371 case TYPE_U32: code[1] = 0x0c004000; break;
1372 case TYPE_F16: code[1] = 0x8c000000; break;
1373 case TYPE_S16: code[1] = 0x0c010000; break;
1374 case TYPE_U16: code[1] = 0x0c000000; break;
1375 case TYPE_S8: code[1] = 0x0c018000; break;
1376 case TYPE_U8: code[1] = 0x0c008000; break;
1384 case TYPE_F64: code[1] = 0x80404000; break;
1385 case TYPE_F32: code[1] = 0x84004000; break;
1386 case TYPE_S32: code[1] = 0x04014000; break;
1387 case TYPE_U32: code[1] = 0x04004000; break;
1388 case TYPE_F16: code[1] = 0x84000000; break;
1389 case TYPE_S16: code[1] = 0x04010000; break;
1390 case TYPE_U16: code[1] = 0x04000000; break;
1391 case TYPE_S8: code[1] = 0x04018000; break;
1392 case TYPE_U8: code[1] = 0x04008000; break;
1406 if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1407 code[1] |= 0x00004000;
1412 case OP_ABS: code[1] |= 1 << 20; break;
1413 case OP_SAT: code[1] |= 1 << 19; break;
1414 case OP_NEG: code[1] |= 1 << 29; break;
1418 code[1] ^= i->src(0).mod.neg() << 29;
1419 code[1] |= i->src(0).mod.abs() << 20;
1423 assert(i->op != OP_ABS || !i->src(0).mod.neg());
1429 CodeEmitterNV50::emitPreOp(const Instruction *i)
1431 code[0] = 0xb0000000;
1432 code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1434 code[1] |= i->src(0).mod.abs() << 20;
1435 code[1] |= i->src(0).mod.neg() << 26;
1441 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1443 code[0] = 0x90000000;
1445 if (i->encSize == 4) {
1446 assert(i->op == OP_RCP);
1447 code[0] |= i->src(0).mod.abs() << 15;
1448 code[0] |= i->src(0).mod.neg() << 22;
1451 code[1] = subOp << 29;
1452 code[1] |= i->src(0).mod.abs() << 20;
1453 code[1] |= i->src(0).mod.neg() << 26;
1459 CodeEmitterNV50::emitNOT(const Instruction *i)
1461 code[0] = 0xd0000000;
1462 code[1] = 0x0002c000;
1467 code[1] |= 0x04000000;
1477 CodeEmitterNV50::emitLogicOp(const Instruction *i)
1479 code[0] = 0xd0000000;
1482 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1484 case OP_OR: code[0] |= 0x0100; break;
1485 case OP_XOR: code[0] |= 0x8000; break;
1487 assert(i->op == OP_AND);
1490 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1496 case OP_AND: code[1] = 0x04000000; break;
1497 case OP_OR: code[1] = 0x04004000; break;
1498 case OP_XOR: code[1] = 0x04008000; break;
1503 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1505 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1513 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1515 code[0] = 0x00000001 | (shl << 16);
1516 code[1] = 0xc0000000;
1518 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1520 setSrcFileBits(i, NV50_OP_ENC_IMM);
1526 CodeEmitterNV50::emitShift(const Instruction *i)
1528 if (i->def(0).getFile() == FILE_ADDRESS) {
1529 assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1530 emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1532 code[0] = 0x30000001;
1533 code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
1534 if (i->op == OP_SHR && isSignedType(i->sType))
1537 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1539 code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1540 defId(i->def(0), 2);
1541 srcId(i->src(0), 9);
1550 CodeEmitterNV50::emitOUT(const Instruction *i)
1552 code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
1553 code[1] = 0xc0000000;
1559 CodeEmitterNV50::emitTEX(const TexInstruction *i)
1561 code[0] = 0xf0000001;
1562 code[1] = 0x00000000;
1566 code[1] = 0x20000000;
1569 code[1] = 0x40000000;
1572 code[0] |= 0x01000000;
1575 code[0] |= 0x01000000;
1576 code[1] = 0x80000000;
1579 code[1] = 0x60020000;
1582 assert(i->op == OP_TEX);
1586 code[0] |= i->tex.r << 9;
1587 code[0] |= i->tex.s << 17;
1589 int argc = i->tex.target.getArgCount();
1591 if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1593 if (i->tex.target.isShadow())
1597 code[0] |= (argc - 1) << 22;
1599 if (i->tex.target.isCube()) {
1600 code[0] |= 0x08000000;
1602 if (i->tex.useOffsets) {
1603 code[1] |= (i->tex.offset[0] & 0xf) << 24;
1604 code[1] |= (i->tex.offset[1] & 0xf) << 20;
1605 code[1] |= (i->tex.offset[2] & 0xf) << 16;
1608 code[0] |= (i->tex.mask & 0x3) << 25;
1609 code[1] |= (i->tex.mask & 0xc) << 12;
1611 if (i->tex.liveOnly)
1614 defId(i->def(0), 2);
1620 CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1622 assert(i->tex.query == TXQ_DIMS);
1624 code[0] = 0xf0000001;
1625 code[1] = 0x60000000;
1627 code[0] |= i->tex.r << 9;
1628 code[0] |= i->tex.s << 17;
1630 code[0] |= (i->tex.mask & 0x3) << 25;
1631 code[1] |= (i->tex.mask & 0xc) << 12;
1633 defId(i->def(0), 2);
1639 CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
1641 code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
1642 code[1] = 0x60010000;
1644 code[0] |= (i->tex.mask & 0x3) << 25;
1645 code[1] |= (i->tex.mask & 0xc) << 12;
1646 defId(i->def(0), 2);
1652 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1654 uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1656 code[0] = 0x10000003; // bra
1657 code[1] = 0x00000780; // always
1660 case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1662 case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1666 assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1667 code[0] = 0x20000003; // call
1668 code[1] = 0x00000000; // no predicate
1671 addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1672 addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1676 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1678 const FlowInstruction *f = i->asFlow();
1679 bool hasPred = false;
1680 bool hasTarg = false;
1682 code[0] = 0x00000003 | (flowOp << 28);
1683 code[1] = 0x00000000;
1703 if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1718 if (f->op == OP_CALL) {
1720 pos = targNV50->getBuiltinOffset(f->target.builtin);
1722 pos = f->target.fn->binPos;
1725 pos = f->target.bb->binPos;
1728 code[0] |= ((pos >> 2) & 0xffff) << 11;
1729 code[1] |= ((pos >> 18) & 0x003f) << 14;
1731 RelocEntry::Type relocTy;
1733 relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1735 addReloc(relocTy, 0, pos, 0x07fff800, 9);
1736 addReloc(relocTy, 1, pos, 0x000fc000, -4);
1741 CodeEmitterNV50::emitBAR(const Instruction *i)
1743 ImmediateValue *barId = i->getSrc(0)->asImm();
1746 code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
1747 code[1] = 0x00004000;
1749 if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
1754 CodeEmitterNV50::emitATOM(const Instruction *i)
1758 case NV50_IR_SUBOP_ATOM_ADD: subOp = 0x0; break;
1759 case NV50_IR_SUBOP_ATOM_MIN: subOp = 0x7; break;
1760 case NV50_IR_SUBOP_ATOM_MAX: subOp = 0x6; break;
1761 case NV50_IR_SUBOP_ATOM_INC: subOp = 0x4; break;
1762 case NV50_IR_SUBOP_ATOM_DEC: subOp = 0x5; break;
1763 case NV50_IR_SUBOP_ATOM_AND: subOp = 0xa; break;
1764 case NV50_IR_SUBOP_ATOM_OR: subOp = 0xb; break;
1765 case NV50_IR_SUBOP_ATOM_XOR: subOp = 0xc; break;
1766 case NV50_IR_SUBOP_ATOM_CAS: subOp = 0x2; break;
1767 case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
1769 assert(!"invalid subop");
1772 code[0] = 0xd0000001;
1773 code[1] = 0xe0c00000 | (subOp << 2);
1774 if (isSignedType(i->dType))
1781 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1785 code[0] |= i->getSrc(0)->reg.fileIndex << 23;
1786 srcId(i->getIndirect(0, 0), 9);
1790 CodeEmitterNV50::emitInstruction(Instruction *insn)
1792 if (!insn->encSize) {
1793 ERROR("skipping unencodable instruction: "); insn->print();
1796 if (codeSize + insn->encSize > codeSizeLimit) {
1797 ERROR("code emitter output buffer too small\n");
1801 if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1802 INFO("EMIT: "); insn->print();
1834 if (insn->dType == TYPE_F64)
1836 else if (isFloatType(insn->dType))
1838 else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1844 if (insn->dType == TYPE_F64)
1846 else if (isFloatType(insn->dType))
1853 if (insn->dType == TYPE_F64)
1855 else if (isFloatType(insn->dType))
1891 if (insn->def(0).getFile() == FILE_ADDRESS)
1894 if (insn->def(0).getFile() == FILE_FLAGS ||
1895 insn->src(0).getFile() == FILE_FLAGS ||
1896 insn->src(0).getFile() == FILE_ADDRESS)
1929 emitTEX(insn->asTex());
1932 emitTXQ(insn->asTex());
1935 emitTEXPREP(insn->asTex());
1942 emitFlow(insn, 0x0);
1945 emitFlow(insn, 0x1);
1948 emitFlow(insn, 0x2);
1951 emitFlow(insn, 0x3);
1954 emitFlow(insn, 0x4);
1957 emitFlow(insn, 0x5);
1960 emitFlow(insn, 0x6);
1963 emitFlow(insn, 0x7);
1966 emitFlow(insn, 0xa);
1969 emitFlow(insn, 0xd);
1972 emitQUADOP(insn, insn->lanes, insn->subOp);
1975 emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
1978 emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
1989 ERROR("operation should have been eliminated\n");
2003 ERROR("operation should have been lowered\n");
2006 ERROR("unknown op: %u\n", insn->op);
2009 if (insn->join || insn->op == OP_JOIN)
2012 if (insn->exit || insn->op == OP_EXIT)
2015 assert((insn->encSize == 8) == (code[0] & 1));
2017 code += insn->encSize / 4;
2018 codeSize += insn->encSize;
2023 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
2025 const Target::OpInfo &info = targ->getOpInfo(i);
2027 if (info.minEncSize > 4 || i->dType == TYPE_F64)
2030 // check constraints on dst and src operands
2031 for (int d = 0; i->defExists(d); ++d) {
2032 if (i->def(d).rep()->reg.data.id > 63 ||
2033 i->def(d).rep()->reg.file != FILE_GPR)
2037 for (int s = 0; i->srcExists(s); ++s) {
2038 DataFile sf = i->src(s).getFile();
2040 if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
2042 if (i->src(s).rep()->reg.data.id > 63)
2046 // check modifiers & rounding
2047 if (i->join || i->lanes != 0xf || i->exit)
2049 if (i->op == OP_MUL && i->rnd != ROUND_N)
2053 return 8; // TODO: short tex encoding
2055 // check constraints on short MAD
2056 if (info.srcNr >= 2 && i->srcExists(2)) {
2057 if (!i->defExists(0) || !isFloatType(i->dType) ||
2058 i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
2062 return info.minEncSize;
2065 // Change the encoding size of an instruction after BBs have been scheduled.
2067 makeInstructionLong(Instruction *insn)
2069 if (insn->encSize == 8)
2071 Function *fn = insn->bb->getFunction();
2075 for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
2079 insn->next->encSize = 8;
2081 if (insn->prev && insn->prev->encSize == 4) {
2083 insn->prev->encSize = 8;
2087 for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
2088 fn->bbArray[i]->binPos += 4;
2091 insn->bb->binSize += adj;
2095 trySetExitModifier(Instruction *insn)
2097 if (insn->op == OP_DISCARD ||
2098 insn->op == OP_QUADON ||
2099 insn->op == OP_QUADPOP)
2101 for (int s = 0; insn->srcExists(s); ++s)
2102 if (insn->src(s).getFile() == FILE_IMMEDIATE)
2104 if (insn->asFlow()) {
2105 if (insn->op == OP_CALL) // side effects !
2107 if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
2112 makeInstructionLong(insn);
2117 replaceExitWithModifier(Function *func)
2119 BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2121 if (!epilogue->getExit() ||
2122 epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
2125 if (epilogue->getEntry()->op != OP_EXIT) {
2126 Instruction *insn = epilogue->getExit()->prev;
2127 if (!insn || !trySetExitModifier(insn))
2131 for (Graph::EdgeIterator ei = func->cfgExit->incident();
2132 !ei.end(); ei.next()) {
2133 BasicBlock *bb = BasicBlock::get(ei.getNode());
2134 Instruction *i = bb->getExit();
2136 if (!i || !trySetExitModifier(i))
2140 epilogue->binSize -= 8;
2142 delete_Instruction(func->getProgram(), epilogue->getExit());
2146 CodeEmitterNV50::prepareEmission(Function *func)
2148 CodeEmitter::prepareEmission(func);
2150 replaceExitWithModifier(func);
2153 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) :
2154 CodeEmitter(target), targNV50(target)
2156 targ = target; // specialized
2158 codeSize = codeSizeLimit = 0;
2163 TargetNV50::getCodeEmitter(Program::Type type)
2165 CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
2166 emit->setProgramType(type);
2170 } // namespace nv50_ir