2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_info.h"
30 #include "tgsi/tgsi_strings.h"
31 #include "util/u_hash_table.h"
32 #include "toy_helpers.h"
35 /* map TGSI opcode to GEN opcode 1-to-1 */
40 } aos_simple_opcode_map[TGSI_OPCODE_LAST] = {
41 [TGSI_OPCODE_ARL] = { BRW_OPCODE_RNDD, 1, 1 },
42 [TGSI_OPCODE_MOV] = { BRW_OPCODE_MOV, 1, 1 },
43 [TGSI_OPCODE_RCP] = { TOY_OPCODE_INV, 1, 1 },
44 [TGSI_OPCODE_RSQ] = { TOY_OPCODE_RSQ, 1, 1 },
45 [TGSI_OPCODE_MUL] = { BRW_OPCODE_MUL, 1, 2 },
46 [TGSI_OPCODE_ADD] = { BRW_OPCODE_ADD, 1, 2 },
47 [TGSI_OPCODE_DP3] = { BRW_OPCODE_DP3, 1, 2 },
48 [TGSI_OPCODE_DP4] = { BRW_OPCODE_DP4, 1, 2 },
49 [TGSI_OPCODE_MIN] = { BRW_OPCODE_SEL, 1, 2 },
50 [TGSI_OPCODE_MAX] = { BRW_OPCODE_SEL, 1, 2 },
51 /* a later pass will move src[2] to accumulator */
52 [TGSI_OPCODE_MAD] = { BRW_OPCODE_MAC, 1, 3 },
53 [TGSI_OPCODE_SUB] = { BRW_OPCODE_ADD, 1, 2 },
54 [TGSI_OPCODE_SQRT] = { TOY_OPCODE_SQRT, 1, 1 },
55 [TGSI_OPCODE_FRC] = { BRW_OPCODE_FRC, 1, 1 },
56 [TGSI_OPCODE_FLR] = { BRW_OPCODE_RNDD, 1, 1 },
57 [TGSI_OPCODE_ROUND] = { BRW_OPCODE_RNDE, 1, 1 },
58 [TGSI_OPCODE_EX2] = { TOY_OPCODE_EXP, 1, 1 },
59 [TGSI_OPCODE_LG2] = { TOY_OPCODE_LOG, 1, 1 },
60 [TGSI_OPCODE_POW] = { TOY_OPCODE_POW, 1, 2 },
61 [TGSI_OPCODE_ABS] = { BRW_OPCODE_MOV, 1, 1 },
62 [TGSI_OPCODE_DPH] = { BRW_OPCODE_DPH, 1, 2 },
63 [TGSI_OPCODE_COS] = { TOY_OPCODE_COS, 1, 1 },
64 [TGSI_OPCODE_KILP] = { TOY_OPCODE_KIL, 0, 0 },
65 [TGSI_OPCODE_SIN] = { TOY_OPCODE_SIN, 1, 1 },
66 [TGSI_OPCODE_ARR] = { BRW_OPCODE_RNDZ, 1, 1 },
67 [TGSI_OPCODE_DP2] = { BRW_OPCODE_DP2, 1, 2 },
68 [TGSI_OPCODE_IF] = { BRW_OPCODE_IF, 0, 1 },
69 [TGSI_OPCODE_UIF] = { BRW_OPCODE_IF, 0, 1 },
70 [TGSI_OPCODE_ELSE] = { BRW_OPCODE_ELSE, 0, 0 },
71 [TGSI_OPCODE_ENDIF] = { BRW_OPCODE_ENDIF, 0, 0 },
72 [TGSI_OPCODE_I2F] = { BRW_OPCODE_MOV, 1, 1 },
73 [TGSI_OPCODE_NOT] = { BRW_OPCODE_NOT, 1, 1 },
74 [TGSI_OPCODE_TRUNC] = { BRW_OPCODE_RNDZ, 1, 1 },
75 [TGSI_OPCODE_SHL] = { BRW_OPCODE_SHL, 1, 2 },
76 [TGSI_OPCODE_AND] = { BRW_OPCODE_AND, 1, 2 },
77 [TGSI_OPCODE_OR] = { BRW_OPCODE_OR, 1, 2 },
78 [TGSI_OPCODE_MOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 },
79 [TGSI_OPCODE_XOR] = { BRW_OPCODE_XOR, 1, 2 },
80 [TGSI_OPCODE_EMIT] = { TOY_OPCODE_EMIT, 0, 0 },
81 [TGSI_OPCODE_ENDPRIM] = { TOY_OPCODE_ENDPRIM, 0, 0 },
82 [TGSI_OPCODE_NOP] = { BRW_OPCODE_NOP, 0, 0 },
83 [TGSI_OPCODE_KIL] = { TOY_OPCODE_KIL, 0, 1 },
84 [TGSI_OPCODE_END] = { BRW_OPCODE_NOP, 0, 0 },
85 [TGSI_OPCODE_F2I] = { BRW_OPCODE_MOV, 1, 1 },
86 [TGSI_OPCODE_IDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 },
87 [TGSI_OPCODE_IMAX] = { BRW_OPCODE_SEL, 1, 2 },
88 [TGSI_OPCODE_IMIN] = { BRW_OPCODE_SEL, 1, 2 },
89 [TGSI_OPCODE_INEG] = { BRW_OPCODE_MOV, 1, 1 },
90 [TGSI_OPCODE_ISHR] = { BRW_OPCODE_ASR, 1, 2 },
91 [TGSI_OPCODE_F2U] = { BRW_OPCODE_MOV, 1, 1 },
92 [TGSI_OPCODE_U2F] = { BRW_OPCODE_MOV, 1, 1 },
93 [TGSI_OPCODE_UADD] = { BRW_OPCODE_ADD, 1, 2 },
94 [TGSI_OPCODE_UDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 },
95 /* a later pass will move src[2] to accumulator */
96 [TGSI_OPCODE_UMAD] = { BRW_OPCODE_MAC, 1, 3 },
97 [TGSI_OPCODE_UMAX] = { BRW_OPCODE_SEL, 1, 2 },
98 [TGSI_OPCODE_UMIN] = { BRW_OPCODE_SEL, 1, 2 },
99 [TGSI_OPCODE_UMOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 },
100 [TGSI_OPCODE_UMUL] = { BRW_OPCODE_MUL, 1, 2 },
101 [TGSI_OPCODE_USHR] = { BRW_OPCODE_SHR, 1, 2 },
102 [TGSI_OPCODE_UARL] = { BRW_OPCODE_MOV, 1, 1 },
103 [TGSI_OPCODE_IABS] = { BRW_OPCODE_MOV, 1, 1 },
107 aos_simple(struct toy_compiler *tc,
108 const struct tgsi_full_instruction *tgsi_inst,
112 struct toy_inst *inst;
114 int cond_modifier = BRW_CONDITIONAL_NONE;
115 int num_dst = tgsi_inst->Instruction.NumDstRegs;
116 int num_src = tgsi_inst->Instruction.NumSrcRegs;
119 opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
120 assert(num_dst == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_dst);
121 assert(num_src == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_src);
123 assert(!"invalid aos_simple() call");
127 /* no need to emit nop */
128 if (opcode == BRW_OPCODE_NOP)
135 inst->opcode = opcode;
137 switch (tgsi_inst->Instruction.Opcode) {
138 case TGSI_OPCODE_MIN:
139 case TGSI_OPCODE_IMIN:
140 case TGSI_OPCODE_UMIN:
141 cond_modifier = BRW_CONDITIONAL_L;
143 case TGSI_OPCODE_MAX:
144 case TGSI_OPCODE_IMAX:
145 case TGSI_OPCODE_UMAX:
146 cond_modifier = BRW_CONDITIONAL_GE;
148 case TGSI_OPCODE_SUB:
149 src[1] = tsrc_negate(src[1]);
151 case TGSI_OPCODE_ABS:
152 case TGSI_OPCODE_IABS:
153 src[0] = tsrc_absolute(src[0]);
156 cond_modifier = BRW_CONDITIONAL_NEQ;
158 assert(src[0].type == TOY_TYPE_F);
159 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
160 src[1] = tsrc_imm_f(0.0f);
162 case TGSI_OPCODE_UIF:
163 cond_modifier = BRW_CONDITIONAL_NEQ;
165 assert(src[0].type == TOY_TYPE_UD);
166 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
167 src[1] = tsrc_imm_d(0);
169 case TGSI_OPCODE_INEG:
170 src[0] = tsrc_negate(src[0]);
172 case TGSI_OPCODE_RCP:
173 case TGSI_OPCODE_RSQ:
174 case TGSI_OPCODE_EX2:
175 case TGSI_OPCODE_LG2:
176 case TGSI_OPCODE_COS:
177 case TGSI_OPCODE_SIN:
178 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
180 case TGSI_OPCODE_POW:
181 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
182 src[1] = tsrc_swizzle1(src[1], TOY_SWIZZLE_X);
186 inst->cond_modifier = cond_modifier;
189 assert(num_dst == 1);
193 assert(num_src <= Elements(inst->src));
194 for (i = 0; i < num_src; i++)
195 inst->src[i] = src[i];
199 aos_set_on_cond(struct toy_compiler *tc,
200 const struct tgsi_full_instruction *tgsi_inst,
204 struct toy_inst *inst;
206 struct toy_src zero, one;
208 switch (tgsi_inst->Instruction.Opcode) {
209 case TGSI_OPCODE_SLT:
210 case TGSI_OPCODE_ISLT:
211 case TGSI_OPCODE_USLT:
212 cond = BRW_CONDITIONAL_L;
214 case TGSI_OPCODE_SGE:
215 case TGSI_OPCODE_ISGE:
216 case TGSI_OPCODE_USGE:
217 cond = BRW_CONDITIONAL_GE;
219 case TGSI_OPCODE_SEQ:
220 case TGSI_OPCODE_USEQ:
221 cond = BRW_CONDITIONAL_EQ;
223 case TGSI_OPCODE_SGT:
224 cond = BRW_CONDITIONAL_G;
226 case TGSI_OPCODE_SLE:
227 cond = BRW_CONDITIONAL_LE;
229 case TGSI_OPCODE_SNE:
230 case TGSI_OPCODE_USNE:
231 cond = BRW_CONDITIONAL_NEQ;
234 assert(!"invalid aos_set_on_cond() call");
238 /* note that for integer versions, all bits are set */
239 switch (dst[0].type) {
242 zero = tsrc_imm_f(0.0f);
243 one = tsrc_imm_f(1.0f);
246 zero = tsrc_imm_d(0);
247 one = tsrc_imm_d(-1);
250 zero = tsrc_imm_ud(0);
251 one = tsrc_imm_ud(~0);
255 tc_MOV(tc, dst[0], zero);
256 tc_CMP(tc, tdst_null(), src[0], src[1], cond);
257 inst = tc_MOV(tc, dst[0], one);
258 inst->pred_ctrl = BRW_PREDICATE_NORMAL;
262 aos_compare(struct toy_compiler *tc,
263 const struct tgsi_full_instruction *tgsi_inst,
267 struct toy_inst *inst;
270 switch (tgsi_inst->Instruction.Opcode) {
271 case TGSI_OPCODE_CMP:
272 zero = tsrc_imm_f(0.0f);
274 case TGSI_OPCODE_UCMP:
275 zero = tsrc_imm_ud(0);
278 assert(!"invalid aos_compare() call");
282 tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_L);
283 inst = tc_SEL(tc, dst[0], src[1], src[2], BRW_CONDITIONAL_NONE);
284 inst->pred_ctrl = BRW_PREDICATE_NORMAL;
288 aos_set_sign(struct toy_compiler *tc,
289 const struct tgsi_full_instruction *tgsi_inst,
293 struct toy_inst *inst;
294 struct toy_src zero, one, neg_one;
296 switch (tgsi_inst->Instruction.Opcode) {
297 case TGSI_OPCODE_SSG:
298 zero = tsrc_imm_f(0.0f);
299 one = tsrc_imm_f(1.0f);
300 neg_one = tsrc_imm_f(-1.0f);
302 case TGSI_OPCODE_ISSG:
303 zero = tsrc_imm_d(0);
305 neg_one = tsrc_imm_d(-1);
308 assert(!"invalid aos_set_sign() call");
312 tc_MOV(tc, dst[0], zero);
314 tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_G);
315 inst = tc_MOV(tc, dst[0], one);
316 inst->pred_ctrl = BRW_PREDICATE_NORMAL;
318 tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_L);
319 inst = tc_MOV(tc, dst[0], neg_one);
320 inst->pred_ctrl = BRW_PREDICATE_NORMAL;
324 aos_tex(struct toy_compiler *tc,
325 const struct tgsi_full_instruction *tgsi_inst,
329 struct toy_inst *inst;
330 enum toy_opcode opcode;
333 switch (tgsi_inst->Instruction.Opcode) {
334 case TGSI_OPCODE_TEX:
335 opcode = TOY_OPCODE_TGSI_TEX;
337 case TGSI_OPCODE_TXD:
338 opcode = TOY_OPCODE_TGSI_TXD;
340 case TGSI_OPCODE_TXP:
341 opcode = TOY_OPCODE_TGSI_TXP;
343 case TGSI_OPCODE_TXB:
344 opcode = TOY_OPCODE_TGSI_TXB;
346 case TGSI_OPCODE_TXL:
347 opcode = TOY_OPCODE_TGSI_TXL;
349 case TGSI_OPCODE_TXF:
350 opcode = TOY_OPCODE_TGSI_TXF;
352 case TGSI_OPCODE_TXQ:
353 opcode = TOY_OPCODE_TGSI_TXQ;
355 case TGSI_OPCODE_TXQ_LZ:
356 opcode = TOY_OPCODE_TGSI_TXQ_LZ;
358 case TGSI_OPCODE_TEX2:
359 opcode = TOY_OPCODE_TGSI_TEX2;
361 case TGSI_OPCODE_TXB2:
362 opcode = TOY_OPCODE_TGSI_TXB2;
364 case TGSI_OPCODE_TXL2:
365 opcode = TOY_OPCODE_TGSI_TXL2;
368 assert(!"unsupported texturing opcode");
373 assert(tgsi_inst->Instruction.Texture);
376 inst->opcode = opcode;
377 inst->tex.target = tgsi_inst->Texture.Texture;
379 assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src));
380 assert(tgsi_inst->Instruction.NumDstRegs == 1);
383 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
384 inst->src[i] = src[i];
386 for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++)
387 tc_fail(tc, "texelFetchOffset unsupported");
391 aos_sample(struct toy_compiler *tc,
392 const struct tgsi_full_instruction *tgsi_inst,
396 struct toy_inst *inst;
397 enum toy_opcode opcode;
400 assert(!"sampling untested");
402 switch (tgsi_inst->Instruction.Opcode) {
403 case TGSI_OPCODE_SAMPLE:
404 opcode = TOY_OPCODE_TGSI_SAMPLE;
406 case TGSI_OPCODE_SAMPLE_I:
407 opcode = TOY_OPCODE_TGSI_SAMPLE_I;
409 case TGSI_OPCODE_SAMPLE_I_MS:
410 opcode = TOY_OPCODE_TGSI_SAMPLE_I_MS;
412 case TGSI_OPCODE_SAMPLE_B:
413 opcode = TOY_OPCODE_TGSI_SAMPLE_B;
415 case TGSI_OPCODE_SAMPLE_C:
416 opcode = TOY_OPCODE_TGSI_SAMPLE_C;
418 case TGSI_OPCODE_SAMPLE_C_LZ:
419 opcode = TOY_OPCODE_TGSI_SAMPLE_C_LZ;
421 case TGSI_OPCODE_SAMPLE_D:
422 opcode = TOY_OPCODE_TGSI_SAMPLE_D;
424 case TGSI_OPCODE_SAMPLE_L:
425 opcode = TOY_OPCODE_TGSI_SAMPLE_L;
427 case TGSI_OPCODE_GATHER4:
428 opcode = TOY_OPCODE_TGSI_GATHER4;
430 case TGSI_OPCODE_SVIEWINFO:
431 opcode = TOY_OPCODE_TGSI_SVIEWINFO;
433 case TGSI_OPCODE_SAMPLE_POS:
434 opcode = TOY_OPCODE_TGSI_SAMPLE_POS;
436 case TGSI_OPCODE_SAMPLE_INFO:
437 opcode = TOY_OPCODE_TGSI_SAMPLE_INFO;
440 assert(!"unsupported sampling opcode");
446 inst->opcode = opcode;
448 assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src));
449 assert(tgsi_inst->Instruction.NumDstRegs == 1);
452 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
453 inst->src[i] = src[i];
457 aos_LIT(struct toy_compiler *tc,
458 const struct tgsi_full_instruction *tgsi_inst,
462 struct toy_inst *inst;
464 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XW), tsrc_imm_f(1.0f));
466 if (!(dst[0].writemask & TOY_WRITEMASK_YZ))
469 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_YZ), tsrc_imm_f(0.0f));
471 tc_CMP(tc, tdst_null(),
472 tsrc_swizzle1(src[0], TOY_SWIZZLE_X),
477 tdst_writemask(dst[0], TOY_WRITEMASK_Y),
478 tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
479 inst->pred_ctrl = BRW_PREDICATE_NORMAL;
481 /* clamp W to (-128, 128)? */
483 tdst_writemask(dst[0], TOY_WRITEMASK_Z),
484 tsrc_swizzle1(src[0], TOY_SWIZZLE_Y),
485 tsrc_swizzle1(src[0], TOY_SWIZZLE_W));
486 inst->pred_ctrl = BRW_PREDICATE_NORMAL;
490 aos_EXP(struct toy_compiler *tc,
491 const struct tgsi_full_instruction *tgsi_inst,
495 struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
497 if (dst[0].writemask & TOY_WRITEMASK_X) {
499 tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
501 tc_RNDD(tc, tmp, src0);
503 /* construct the floating point number manually */
504 tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
505 tc_SHL(tc, tdst_d(tdst_writemask(dst[0], TOY_WRITEMASK_X)),
506 tsrc_from(tmp), tsrc_imm_d(23));
509 tc_FRC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src0);
510 tc_EXP(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
511 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
515 aos_LOG(struct toy_compiler *tc,
516 const struct tgsi_full_instruction *tgsi_inst,
520 struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
522 if (dst[0].writemask & TOY_WRITEMASK_XY) {
525 tmp = tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
528 tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0)), tsrc_imm_d(23));
529 tc_ADD(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X),
530 tsrc_from(tmp), tsrc_imm_d(-127));
533 tc_AND(tc, tmp, tsrc_d(src0), tsrc_imm_d((1 << 23) - 1));
534 tc_OR(tc, tdst_writemask(tdst_d(dst[0]), TOY_WRITEMASK_Y),
535 tsrc_from(tmp), tsrc_imm_d(127 << 23));
538 tc_LOG(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
539 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
543 aos_DST(struct toy_compiler *tc,
544 const struct tgsi_full_instruction *tgsi_inst,
548 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), tsrc_imm_f(1.0f));
549 tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0], src[1]);
550 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src[0]);
551 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), src[1]);
555 aos_LRP(struct toy_compiler *tc,
556 const struct tgsi_full_instruction *tgsi_inst,
560 struct toy_dst tmp = tc_alloc_tmp(tc);
562 tc_ADD(tc, tmp, tsrc_negate(src[0]), tsrc_imm_f(1.0f));
563 tc_MUL(tc, tmp, tsrc_from(tmp), src[2]);
564 tc_MAC(tc, dst[0], src[0], src[1], tsrc_from(tmp));
568 aos_CND(struct toy_compiler *tc,
569 const struct tgsi_full_instruction *tgsi_inst,
573 struct toy_inst *inst;
575 assert(!"CND untested");
577 tc_CMP(tc, tdst_null(), src[2], tsrc_imm_f(0.5f), BRW_CONDITIONAL_G);
578 inst = tc_SEL(tc, dst[0], src[0], src[1], BRW_CONDITIONAL_NONE);
579 inst->pred_ctrl = BRW_PREDICATE_NORMAL;
583 aos_DP2A(struct toy_compiler *tc,
584 const struct tgsi_full_instruction *tgsi_inst,
588 struct toy_dst tmp = tc_alloc_tmp(tc);
590 assert(!"DP2A untested");
592 tc_DP2(tc, tmp, src[0], src[1]);
593 tc_ADD(tc, dst[0], tsrc_swizzle1(tsrc_from(tmp), TOY_SWIZZLE_X), src[2]);
597 aos_CLAMP(struct toy_compiler *tc,
598 const struct tgsi_full_instruction *tgsi_inst,
602 assert(!"CLAMP untested");
604 tc_SEL(tc, dst[0], src[0], src[1], BRW_CONDITIONAL_GE);
605 tc_SEL(tc, dst[0], src[2], tsrc_from(dst[0]), BRW_CONDITIONAL_L);
609 aos_XPD(struct toy_compiler *tc,
610 const struct tgsi_full_instruction *tgsi_inst,
614 struct toy_dst tmp = tc_alloc_tmp(tc);
616 tc_MUL(tc, tdst_writemask(tmp, TOY_WRITEMASK_XYZ),
617 tsrc_swizzle(src[0], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
618 TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
619 tsrc_swizzle(src[1], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
620 TOY_SWIZZLE_X, TOY_SWIZZLE_W));
622 tc_MAC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
623 tsrc_swizzle(src[0], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
624 TOY_SWIZZLE_X, TOY_SWIZZLE_W),
625 tsrc_swizzle(src[1], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
626 TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
627 tsrc_negate(tsrc_from(tmp)));
629 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W),
634 aos_PK2H(struct toy_compiler *tc,
635 const struct tgsi_full_instruction *tgsi_inst,
639 const struct toy_src h1 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
640 const struct toy_src h2 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_Y));
641 struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
643 assert(!"PK2H untested");
645 tc_SHL(tc, tmp, h2, tsrc_imm_ud(16));
646 tc_OR(tc, tdst_ud(dst[0]), h1, tsrc_from(tmp));
650 aos_SFL(struct toy_compiler *tc,
651 const struct tgsi_full_instruction *tgsi_inst,
655 assert(!"SFL untested");
657 tc_MOV(tc, dst[0], tsrc_imm_f(0.0f));
661 aos_STR(struct toy_compiler *tc,
662 const struct tgsi_full_instruction *tgsi_inst,
666 assert(!"STR untested");
668 tc_MOV(tc, dst[0], tsrc_imm_f(1.0f));
672 aos_UP2H(struct toy_compiler *tc,
673 const struct tgsi_full_instruction *tgsi_inst,
677 assert(!"UP2H untested");
679 tc_AND(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_XZ),
680 tsrc_ud(src[0]), tsrc_imm_ud(0xffff));
681 tc_SHR(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_YW),
682 tsrc_ud(src[0]), tsrc_imm_ud(16));
686 aos_SCS(struct toy_compiler *tc,
687 const struct tgsi_full_instruction *tgsi_inst,
691 assert(!"SCS untested");
693 tc_add1(tc, TOY_OPCODE_COS,
694 tdst_writemask(dst[0], TOY_WRITEMASK_X), src[0]);
696 tc_add1(tc, TOY_OPCODE_SIN,
697 tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0]);
699 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), tsrc_imm_f(0.0f));
700 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
704 aos_NRM(struct toy_compiler *tc,
705 const struct tgsi_full_instruction *tgsi_inst,
709 struct toy_dst tmp = tc_alloc_tmp(tc);
711 assert(!"NRM untested");
713 tc_DP3(tc, tmp, src[0], src[0]);
714 tc_INV(tc, tmp, tsrc_from(tmp));
715 tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
716 src[0], tsrc_from(tmp));
718 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
722 aos_DIV(struct toy_compiler *tc,
723 const struct tgsi_full_instruction *tgsi_inst,
727 struct toy_dst tmp = tc_alloc_tmp(tc);
729 assert(!"DIV untested");
731 tc_INV(tc, tmp, src[1]);
732 tc_MUL(tc, dst[0], src[0], tsrc_from(tmp));
736 aos_BRK(struct toy_compiler *tc,
737 const struct tgsi_full_instruction *tgsi_inst,
741 tc_add0(tc, BRW_OPCODE_BREAK);
745 aos_CEIL(struct toy_compiler *tc,
746 const struct tgsi_full_instruction *tgsi_inst,
750 struct toy_dst tmp = tc_alloc_tmp(tc);
752 tc_RNDD(tc, tmp, tsrc_negate(src[0]));
753 tc_MOV(tc, dst[0], tsrc_negate(tsrc_from(tmp)));
757 aos_SAD(struct toy_compiler *tc,
758 const struct tgsi_full_instruction *tgsi_inst,
762 struct toy_dst tmp = tc_alloc_tmp(tc);
764 assert(!"SAD untested");
766 tc_ADD(tc, tmp, src[0], tsrc_negate(src[1]));
767 tc_ADD(tc, dst[0], tsrc_absolute(tsrc_from(tmp)), src[2]);
771 aos_CONT(struct toy_compiler *tc,
772 const struct tgsi_full_instruction *tgsi_inst,
776 tc_add0(tc, BRW_OPCODE_CONTINUE);
780 aos_BGNLOOP(struct toy_compiler *tc,
781 const struct tgsi_full_instruction *tgsi_inst,
785 struct toy_inst *inst;
787 inst = tc_add0(tc, BRW_OPCODE_DO);
788 /* this is just a marker */
793 aos_ENDLOOP(struct toy_compiler *tc,
794 const struct tgsi_full_instruction *tgsi_inst,
798 tc_add0(tc, BRW_OPCODE_WHILE);
802 aos_NRM4(struct toy_compiler *tc,
803 const struct tgsi_full_instruction *tgsi_inst,
807 struct toy_dst tmp = tc_alloc_tmp(tc);
809 assert(!"NRM4 untested");
811 tc_DP4(tc, tmp, src[0], src[0]);
812 tc_INV(tc, tmp, tsrc_from(tmp));
813 tc_MUL(tc, dst[0], tsrc_swizzle1(src[0], TOY_SWIZZLE_X), tsrc_from(tmp));
817 aos_unsupported(struct toy_compiler *tc,
818 const struct tgsi_full_instruction *tgsi_inst,
822 const char *name = tgsi_get_opcode_name(tgsi_inst->Instruction.Opcode);
824 ilo_warn("unsupported TGSI opcode: TGSI_OPCODE_%s\n", name);
826 tc_fail(tc, "unsupported TGSI instruction");
829 static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = {
830 [TGSI_OPCODE_ARL] = aos_simple,
831 [TGSI_OPCODE_MOV] = aos_simple,
832 [TGSI_OPCODE_LIT] = aos_LIT,
833 [TGSI_OPCODE_RCP] = aos_simple,
834 [TGSI_OPCODE_RSQ] = aos_simple,
835 [TGSI_OPCODE_EXP] = aos_EXP,
836 [TGSI_OPCODE_LOG] = aos_LOG,
837 [TGSI_OPCODE_MUL] = aos_simple,
838 [TGSI_OPCODE_ADD] = aos_simple,
839 [TGSI_OPCODE_DP3] = aos_simple,
840 [TGSI_OPCODE_DP4] = aos_simple,
841 [TGSI_OPCODE_DST] = aos_DST,
842 [TGSI_OPCODE_MIN] = aos_simple,
843 [TGSI_OPCODE_MAX] = aos_simple,
844 [TGSI_OPCODE_SLT] = aos_set_on_cond,
845 [TGSI_OPCODE_SGE] = aos_set_on_cond,
846 [TGSI_OPCODE_MAD] = aos_simple,
847 [TGSI_OPCODE_SUB] = aos_simple,
848 [TGSI_OPCODE_LRP] = aos_LRP,
849 [TGSI_OPCODE_CND] = aos_CND,
850 [TGSI_OPCODE_SQRT] = aos_simple,
851 [TGSI_OPCODE_DP2A] = aos_DP2A,
852 [22] = aos_unsupported,
853 [23] = aos_unsupported,
854 [TGSI_OPCODE_FRC] = aos_simple,
855 [TGSI_OPCODE_CLAMP] = aos_CLAMP,
856 [TGSI_OPCODE_FLR] = aos_simple,
857 [TGSI_OPCODE_ROUND] = aos_simple,
858 [TGSI_OPCODE_EX2] = aos_simple,
859 [TGSI_OPCODE_LG2] = aos_simple,
860 [TGSI_OPCODE_POW] = aos_simple,
861 [TGSI_OPCODE_XPD] = aos_XPD,
862 [32] = aos_unsupported,
863 [TGSI_OPCODE_ABS] = aos_simple,
864 [TGSI_OPCODE_RCC] = aos_unsupported,
865 [TGSI_OPCODE_DPH] = aos_simple,
866 [TGSI_OPCODE_COS] = aos_simple,
867 [TGSI_OPCODE_DDX] = aos_unsupported,
868 [TGSI_OPCODE_DDY] = aos_unsupported,
869 [TGSI_OPCODE_KILP] = aos_simple,
870 [TGSI_OPCODE_PK2H] = aos_PK2H,
871 [TGSI_OPCODE_PK2US] = aos_unsupported,
872 [TGSI_OPCODE_PK4B] = aos_unsupported,
873 [TGSI_OPCODE_PK4UB] = aos_unsupported,
874 [TGSI_OPCODE_RFL] = aos_unsupported,
875 [TGSI_OPCODE_SEQ] = aos_set_on_cond,
876 [TGSI_OPCODE_SFL] = aos_SFL,
877 [TGSI_OPCODE_SGT] = aos_set_on_cond,
878 [TGSI_OPCODE_SIN] = aos_simple,
879 [TGSI_OPCODE_SLE] = aos_set_on_cond,
880 [TGSI_OPCODE_SNE] = aos_set_on_cond,
881 [TGSI_OPCODE_STR] = aos_STR,
882 [TGSI_OPCODE_TEX] = aos_tex,
883 [TGSI_OPCODE_TXD] = aos_tex,
884 [TGSI_OPCODE_TXP] = aos_tex,
885 [TGSI_OPCODE_UP2H] = aos_UP2H,
886 [TGSI_OPCODE_UP2US] = aos_unsupported,
887 [TGSI_OPCODE_UP4B] = aos_unsupported,
888 [TGSI_OPCODE_UP4UB] = aos_unsupported,
889 [TGSI_OPCODE_X2D] = aos_unsupported,
890 [TGSI_OPCODE_ARA] = aos_unsupported,
891 [TGSI_OPCODE_ARR] = aos_simple,
892 [TGSI_OPCODE_BRA] = aos_unsupported,
893 [TGSI_OPCODE_CAL] = aos_unsupported,
894 [TGSI_OPCODE_RET] = aos_unsupported,
895 [TGSI_OPCODE_SSG] = aos_set_sign,
896 [TGSI_OPCODE_CMP] = aos_compare,
897 [TGSI_OPCODE_SCS] = aos_SCS,
898 [TGSI_OPCODE_TXB] = aos_tex,
899 [TGSI_OPCODE_NRM] = aos_NRM,
900 [TGSI_OPCODE_DIV] = aos_DIV,
901 [TGSI_OPCODE_DP2] = aos_simple,
902 [TGSI_OPCODE_TXL] = aos_tex,
903 [TGSI_OPCODE_BRK] = aos_BRK,
904 [TGSI_OPCODE_IF] = aos_simple,
905 [TGSI_OPCODE_UIF] = aos_simple,
906 [76] = aos_unsupported,
907 [TGSI_OPCODE_ELSE] = aos_simple,
908 [TGSI_OPCODE_ENDIF] = aos_simple,
909 [79] = aos_unsupported,
910 [80] = aos_unsupported,
911 [TGSI_OPCODE_PUSHA] = aos_unsupported,
912 [TGSI_OPCODE_POPA] = aos_unsupported,
913 [TGSI_OPCODE_CEIL] = aos_CEIL,
914 [TGSI_OPCODE_I2F] = aos_simple,
915 [TGSI_OPCODE_NOT] = aos_simple,
916 [TGSI_OPCODE_TRUNC] = aos_simple,
917 [TGSI_OPCODE_SHL] = aos_simple,
918 [88] = aos_unsupported,
919 [TGSI_OPCODE_AND] = aos_simple,
920 [TGSI_OPCODE_OR] = aos_simple,
921 [TGSI_OPCODE_MOD] = aos_simple,
922 [TGSI_OPCODE_XOR] = aos_simple,
923 [TGSI_OPCODE_SAD] = aos_SAD,
924 [TGSI_OPCODE_TXF] = aos_tex,
925 [TGSI_OPCODE_TXQ] = aos_tex,
926 [TGSI_OPCODE_CONT] = aos_CONT,
927 [TGSI_OPCODE_EMIT] = aos_simple,
928 [TGSI_OPCODE_ENDPRIM] = aos_simple,
929 [TGSI_OPCODE_BGNLOOP] = aos_BGNLOOP,
930 [TGSI_OPCODE_BGNSUB] = aos_unsupported,
931 [TGSI_OPCODE_ENDLOOP] = aos_ENDLOOP,
932 [TGSI_OPCODE_ENDSUB] = aos_unsupported,
933 [TGSI_OPCODE_TXQ_LZ] = aos_tex,
934 [104] = aos_unsupported,
935 [105] = aos_unsupported,
936 [106] = aos_unsupported,
937 [TGSI_OPCODE_NOP] = aos_simple,
938 [108] = aos_unsupported,
939 [109] = aos_unsupported,
940 [110] = aos_unsupported,
941 [111] = aos_unsupported,
942 [TGSI_OPCODE_NRM4] = aos_NRM4,
943 [TGSI_OPCODE_CALLNZ] = aos_unsupported,
944 [TGSI_OPCODE_BREAKC] = aos_unsupported,
945 [TGSI_OPCODE_KIL] = aos_simple,
946 [TGSI_OPCODE_END] = aos_simple,
947 [118] = aos_unsupported,
948 [TGSI_OPCODE_F2I] = aos_simple,
949 [TGSI_OPCODE_IDIV] = aos_simple,
950 [TGSI_OPCODE_IMAX] = aos_simple,
951 [TGSI_OPCODE_IMIN] = aos_simple,
952 [TGSI_OPCODE_INEG] = aos_simple,
953 [TGSI_OPCODE_ISGE] = aos_set_on_cond,
954 [TGSI_OPCODE_ISHR] = aos_simple,
955 [TGSI_OPCODE_ISLT] = aos_set_on_cond,
956 [TGSI_OPCODE_F2U] = aos_simple,
957 [TGSI_OPCODE_U2F] = aos_simple,
958 [TGSI_OPCODE_UADD] = aos_simple,
959 [TGSI_OPCODE_UDIV] = aos_simple,
960 [TGSI_OPCODE_UMAD] = aos_simple,
961 [TGSI_OPCODE_UMAX] = aos_simple,
962 [TGSI_OPCODE_UMIN] = aos_simple,
963 [TGSI_OPCODE_UMOD] = aos_simple,
964 [TGSI_OPCODE_UMUL] = aos_simple,
965 [TGSI_OPCODE_USEQ] = aos_set_on_cond,
966 [TGSI_OPCODE_USGE] = aos_set_on_cond,
967 [TGSI_OPCODE_USHR] = aos_simple,
968 [TGSI_OPCODE_USLT] = aos_set_on_cond,
969 [TGSI_OPCODE_USNE] = aos_set_on_cond,
970 [TGSI_OPCODE_SWITCH] = aos_unsupported,
971 [TGSI_OPCODE_CASE] = aos_unsupported,
972 [TGSI_OPCODE_DEFAULT] = aos_unsupported,
973 [TGSI_OPCODE_ENDSWITCH] = aos_unsupported,
974 [TGSI_OPCODE_SAMPLE] = aos_sample,
975 [TGSI_OPCODE_SAMPLE_I] = aos_sample,
976 [TGSI_OPCODE_SAMPLE_I_MS] = aos_sample,
977 [TGSI_OPCODE_SAMPLE_B] = aos_sample,
978 [TGSI_OPCODE_SAMPLE_C] = aos_sample,
979 [TGSI_OPCODE_SAMPLE_C_LZ] = aos_sample,
980 [TGSI_OPCODE_SAMPLE_D] = aos_sample,
981 [TGSI_OPCODE_SAMPLE_L] = aos_sample,
982 [TGSI_OPCODE_GATHER4] = aos_sample,
983 [TGSI_OPCODE_SVIEWINFO] = aos_sample,
984 [TGSI_OPCODE_SAMPLE_POS] = aos_sample,
985 [TGSI_OPCODE_SAMPLE_INFO] = aos_sample,
986 [TGSI_OPCODE_UARL] = aos_simple,
987 [TGSI_OPCODE_UCMP] = aos_compare,
988 [TGSI_OPCODE_IABS] = aos_simple,
989 [TGSI_OPCODE_ISSG] = aos_set_sign,
990 [TGSI_OPCODE_LOAD] = aos_unsupported,
991 [TGSI_OPCODE_STORE] = aos_unsupported,
992 [TGSI_OPCODE_MFENCE] = aos_unsupported,
993 [TGSI_OPCODE_LFENCE] = aos_unsupported,
994 [TGSI_OPCODE_SFENCE] = aos_unsupported,
995 [TGSI_OPCODE_BARRIER] = aos_unsupported,
996 [TGSI_OPCODE_ATOMUADD] = aos_unsupported,
997 [TGSI_OPCODE_ATOMXCHG] = aos_unsupported,
998 [TGSI_OPCODE_ATOMCAS] = aos_unsupported,
999 [TGSI_OPCODE_ATOMAND] = aos_unsupported,
1000 [TGSI_OPCODE_ATOMOR] = aos_unsupported,
1001 [TGSI_OPCODE_ATOMXOR] = aos_unsupported,
1002 [TGSI_OPCODE_ATOMUMIN] = aos_unsupported,
1003 [TGSI_OPCODE_ATOMUMAX] = aos_unsupported,
1004 [TGSI_OPCODE_ATOMIMIN] = aos_unsupported,
1005 [TGSI_OPCODE_ATOMIMAX] = aos_unsupported,
1006 [TGSI_OPCODE_TEX2] = aos_tex,
1007 [TGSI_OPCODE_TXB2] = aos_tex,
1008 [TGSI_OPCODE_TXL2] = aos_tex,
1012 soa_passthrough(struct toy_compiler *tc,
1013 const struct tgsi_full_instruction *tgsi_inst,
1014 struct toy_dst *dst_,
1015 struct toy_src *src_)
1017 const toy_tgsi_translate translate =
1018 aos_translate_table[tgsi_inst->Instruction.Opcode];
1020 translate(tc, tgsi_inst, dst_, src_);
1024 soa_per_channel(struct toy_compiler *tc,
1025 const struct tgsi_full_instruction *tgsi_inst,
1026 struct toy_dst *dst_,
1027 struct toy_src *src_)
1029 struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS][4];
1030 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
1033 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
1034 tdst_transpose(dst_[i], dst[i]);
1035 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
1036 tsrc_transpose(src_[i], src[i]);
1038 /* emit the same instruction four times for the four channels */
1039 for (ch = 0; ch < 4; ch++) {
1040 struct toy_dst aos_dst[TGSI_FULL_MAX_DST_REGISTERS];
1041 struct toy_src aos_src[TGSI_FULL_MAX_SRC_REGISTERS];
1043 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
1044 aos_dst[i] = dst[i][ch];
1045 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
1046 aos_src[i] = src[i][ch];
1048 aos_translate_table[tgsi_inst->Instruction.Opcode](tc,
1049 tgsi_inst, aos_dst, aos_src);
1054 soa_scalar_replicate(struct toy_compiler *tc,
1055 const struct tgsi_full_instruction *tgsi_inst,
1056 struct toy_dst *dst_,
1057 struct toy_src *src_)
1059 struct toy_dst dst0[4], tmp;
1060 struct toy_src srcx[TGSI_FULL_MAX_SRC_REGISTERS];
1063 assert(tgsi_inst->Instruction.NumDstRegs == 1);
1065 tdst_transpose(dst_[0], dst0);
1066 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
1067 struct toy_src tmp[4];
1069 tsrc_transpose(src_[i], tmp);
1070 /* only the X channels */
1074 tmp = tc_alloc_tmp(tc);
1076 opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
1079 switch (tgsi_inst->Instruction.Opcode) {
1080 case TGSI_OPCODE_RCP:
1081 case TGSI_OPCODE_RSQ:
1082 case TGSI_OPCODE_SQRT:
1083 case TGSI_OPCODE_EX2:
1084 case TGSI_OPCODE_LG2:
1085 case TGSI_OPCODE_COS:
1086 case TGSI_OPCODE_SIN:
1087 tc_add1(tc, opcode, tmp, srcx[0]);
1089 case TGSI_OPCODE_POW:
1090 tc_add2(tc, opcode, tmp, srcx[0], srcx[1]);
1093 assert(!"invalid soa_scalar_replicate() call");
1097 /* replicate the result */
1098 for (i = 0; i < 4; i++)
1099 tc_MOV(tc, dst0[i], tsrc_from(tmp));
1103 soa_dot_product(struct toy_compiler *tc,
1104 const struct tgsi_full_instruction *tgsi_inst,
1105 struct toy_dst *dst_,
1106 struct toy_src *src_)
1108 struct toy_dst dst0[4], tmp;
1109 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
1112 tdst_transpose(dst_[0], dst0);
1113 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
1114 tsrc_transpose(src_[i], src[i]);
1116 tmp = tc_alloc_tmp(tc);
1118 switch (tgsi_inst->Instruction.Opcode) {
1119 case TGSI_OPCODE_DP2:
1120 tc_MUL(tc, tmp, src[0][1], src[1][1]);
1121 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1123 case TGSI_OPCODE_DP2A:
1124 tc_MAC(tc, tmp, src[0][1], src[1][1], src[2][0]);
1125 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1127 case TGSI_OPCODE_DP3:
1128 tc_MUL(tc, tmp, src[0][2], src[1][2]);
1129 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1130 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1132 case TGSI_OPCODE_DPH:
1133 tc_MAC(tc, tmp, src[0][2], src[1][2], src[1][3]);
1134 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1135 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1137 case TGSI_OPCODE_DP4:
1138 tc_MUL(tc, tmp, src[0][3], src[1][3]);
1139 tc_MAC(tc, tmp, src[0][2], src[1][2], tsrc_from(tmp));
1140 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1141 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1144 assert(!"invalid soa_dot_product() call");
1148 for (i = 0; i < 4; i++)
1149 tc_MOV(tc, dst0[i], tsrc_from(tmp));
1153 soa_partial_derivative(struct toy_compiler *tc,
1154 const struct tgsi_full_instruction *tgsi_inst,
1155 struct toy_dst *dst_,
1156 struct toy_src *src_)
1158 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_DDX)
1159 tc_add1(tc, TOY_OPCODE_DDX, dst_[0], src_[0]);
1161 tc_add1(tc, TOY_OPCODE_DDY, dst_[0], src_[0]);
1165 soa_if(struct toy_compiler *tc,
1166 const struct tgsi_full_instruction *tgsi_inst,
1167 struct toy_dst *dst_,
1168 struct toy_src *src_)
1170 struct toy_src src0[4];
1172 assert(tsrc_is_swizzle1(src_[0]));
1173 tsrc_transpose(src_[0], src0);
1175 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_IF)
1176 tc_IF(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), BRW_CONDITIONAL_NEQ);
1178 tc_IF(tc, tdst_null(), src0[0], tsrc_imm_d(0), BRW_CONDITIONAL_NEQ);
1182 soa_LIT(struct toy_compiler *tc,
1183 const struct tgsi_full_instruction *tgsi_inst,
1184 struct toy_dst *dst_,
1185 struct toy_src *src_)
1187 struct toy_inst *inst;
1188 struct toy_dst dst0[4];
1189 struct toy_src src0[4];
1191 tdst_transpose(dst_[0], dst0);
1192 tsrc_transpose(src_[0], src0);
1194 tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
1195 tc_MOV(tc, dst0[1], src0[0]);
1196 tc_POW(tc, dst0[2], src0[1], src0[3]);
1197 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1200 * POW is calculated first because math with pred_ctrl is broken here.
1203 tc_CMP(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), BRW_CONDITIONAL_L);
1204 inst = tc_MOV(tc, dst0[1], tsrc_imm_f(0.0f));
1205 inst->pred_ctrl = BRW_PREDICATE_NORMAL;
1206 inst = tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
1207 inst->pred_ctrl = BRW_PREDICATE_NORMAL;
1211 soa_EXP(struct toy_compiler *tc,
1212 const struct tgsi_full_instruction *tgsi_inst,
1213 struct toy_dst *dst_,
1214 struct toy_src *src_)
1216 struct toy_dst dst0[4];
1217 struct toy_src src0[4];
1219 assert(!"SoA EXP untested");
1221 tdst_transpose(dst_[0], dst0);
1222 tsrc_transpose(src_[0], src0);
1224 if (!tdst_is_null(dst0[0])) {
1225 struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
1227 tc_RNDD(tc, tmp, src0[0]);
1229 /* construct the floating point number manually */
1230 tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
1231 tc_SHL(tc, tdst_d(dst0[0]), tsrc_from(tmp), tsrc_imm_d(23));
1234 tc_FRC(tc, dst0[1], src0[0]);
1235 tc_EXP(tc, dst0[2], src0[0]);
1236 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1240 soa_LOG(struct toy_compiler *tc,
1241 const struct tgsi_full_instruction *tgsi_inst,
1242 struct toy_dst *dst_,
1243 struct toy_src *src_)
1245 struct toy_dst dst0[4];
1246 struct toy_src src0[4];
1248 assert(!"SoA LOG untested");
1250 tdst_transpose(dst_[0], dst0);
1251 tsrc_transpose(src_[0], src0);
1253 if (dst_[0].writemask & TOY_WRITEMASK_XY) {
1254 struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
1257 tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0[0])), tsrc_imm_d(23));
1258 tc_ADD(tc, dst0[0], tsrc_from(tmp), tsrc_imm_d(-127));
1261 tc_AND(tc, tmp, tsrc_d(src0[0]), tsrc_imm_d((1 << 23) - 1));
1262 tc_OR(tc, dst0[1], tsrc_from(tmp), tsrc_imm_d(127 << 23));
1265 tc_LOG(tc, dst0[2], src0[0]);
1266 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1270 soa_DST(struct toy_compiler *tc,
1271 const struct tgsi_full_instruction *tgsi_inst,
1272 struct toy_dst *dst_,
1273 struct toy_src *src_)
1275 struct toy_dst dst0[4];
1276 struct toy_src src[2][4];
1278 tdst_transpose(dst_[0], dst0);
1279 tsrc_transpose(src_[0], src[0]);
1280 tsrc_transpose(src_[1], src[1]);
1282 tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
1283 tc_MUL(tc, dst0[1], src[0][1], src[1][1]);
1284 tc_MOV(tc, dst0[2], src[0][2]);
1285 tc_MOV(tc, dst0[3], src[1][3]);
1289 soa_XPD(struct toy_compiler *tc,
1290 const struct tgsi_full_instruction *tgsi_inst,
1291 struct toy_dst *dst_,
1292 struct toy_src *src_)
1294 struct toy_dst dst0[4];
1295 struct toy_src src[2][4];
1297 tdst_transpose(dst_[0], dst0);
1298 tsrc_transpose(src_[0], src[0]);
1299 tsrc_transpose(src_[1], src[1]);
1301 /* dst.x = src0.y * src1.z - src1.y * src0.z */
1302 tc_MUL(tc, dst0[0], src[0][2], src[1][1]);
1303 tc_MAC(tc, dst0[0], src[0][1], src[1][2], tsrc_negate(tsrc_from(dst0[0])));
1305 /* dst.y = src0.z * src1.x - src1.z * src0.x */
1306 tc_MUL(tc, dst0[1], src[0][0], src[1][2]);
1307 tc_MAC(tc, dst0[1], src[0][2], src[1][0], tsrc_negate(tsrc_from(dst0[1])));
1309 /* dst.z = src0.x * src1.y - src1.x * src0.y */
1310 tc_MUL(tc, dst0[2], src[0][1], src[1][0]);
1311 tc_MAC(tc, dst0[2], src[0][0], src[1][1], tsrc_negate(tsrc_from(dst0[2])));
1313 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1317 soa_PK2H(struct toy_compiler *tc,
1318 const struct tgsi_full_instruction *tgsi_inst,
1319 struct toy_dst *dst_,
1320 struct toy_src *src_)
1322 struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
1323 struct toy_dst dst0[4];
1324 struct toy_src src0[4];
1327 assert(!"SoA PK2H untested");
1329 tdst_transpose(dst_[0], dst0);
1330 tsrc_transpose(src_[0], src0);
1332 tc_SHL(tc, tmp, src0[1], tsrc_imm_ud(16));
1333 tc_OR(tc, tmp, src0[0], tsrc_from(tmp));
1335 for (i = 0; i < 4; i++)
1336 tc_MOV(tc, dst0[i], tsrc_from(tmp));
1340 soa_UP2H(struct toy_compiler *tc,
1341 const struct tgsi_full_instruction *tgsi_inst,
1342 struct toy_dst *dst_,
1343 struct toy_src *src_)
1345 struct toy_dst dst0[4];
1346 struct toy_src src0[4];
1348 assert(!"SoA UP2H untested");
1350 tdst_transpose(dst_[0], dst0);
1351 tsrc_transpose(src_[0], src0);
1353 tc_AND(tc, tdst_ud(dst0[0]), tsrc_ud(src0[0]), tsrc_imm_ud(0xffff));
1354 tc_SHR(tc, tdst_ud(dst0[1]), tsrc_ud(src0[1]), tsrc_imm_ud(16));
1355 tc_AND(tc, tdst_ud(dst0[2]), tsrc_ud(src0[2]), tsrc_imm_ud(0xffff));
1356 tc_SHR(tc, tdst_ud(dst0[3]), tsrc_ud(src0[3]), tsrc_imm_ud(16));
1361 soa_SCS(struct toy_compiler *tc,
1362 const struct tgsi_full_instruction *tgsi_inst,
1363 struct toy_dst *dst_,
1364 struct toy_src *src_)
1366 struct toy_dst dst0[4];
1367 struct toy_src src0[4];
1369 tdst_transpose(dst_[0], dst0);
1370 tsrc_transpose(src_[0], src0);
1372 tc_add1(tc, TOY_OPCODE_COS, dst0[0], src0[0]);
1373 tc_add1(tc, TOY_OPCODE_SIN, dst0[1], src0[0]);
1374 tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
1375 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1379 soa_NRM(struct toy_compiler *tc,
1380 const struct tgsi_full_instruction *tgsi_inst,
1381 struct toy_dst *dst_,
1382 struct toy_src *src_)
1384 const struct toy_dst tmp = tc_alloc_tmp(tc);
1385 struct toy_dst dst0[4];
1386 struct toy_src src0[4];
1388 assert(!"SoA NRM untested");
1390 tdst_transpose(dst_[0], dst0);
1391 tsrc_transpose(src_[0], src0);
1393 tc_MUL(tc, tmp, src0[2], src0[2]);
1394 tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp));
1395 tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp));
1396 tc_INV(tc, tmp, tsrc_from(tmp));
1398 tc_MUL(tc, dst0[0], src0[0], tsrc_from(tmp));
1399 tc_MUL(tc, dst0[1], src0[1], tsrc_from(tmp));
1400 tc_MUL(tc, dst0[2], src0[2], tsrc_from(tmp));
1401 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1405 soa_NRM4(struct toy_compiler *tc,
1406 const struct tgsi_full_instruction *tgsi_inst,
1407 struct toy_dst *dst_,
1408 struct toy_src *src_)
1410 const struct toy_dst tmp = tc_alloc_tmp(tc);
1411 struct toy_dst dst0[4];
1412 struct toy_src src0[4];
1415 assert(!"SoA NRM4 untested");
1417 tdst_transpose(dst_[0], dst0);
1418 tsrc_transpose(src_[0], src0);
1420 tc_MUL(tc, tmp, src0[3], src0[3]);
1421 tc_MAC(tc, tmp, src0[2], src0[2], tsrc_from(tmp));
1422 tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp));
1423 tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp));
1424 tc_INV(tc, tmp, tsrc_from(tmp));
1426 for (i = 0; i < 4; i++)
1427 tc_MUL(tc, dst0[i], src0[0], tsrc_from(tmp));
1431 soa_unsupported(struct toy_compiler *tc,
1432 const struct tgsi_full_instruction *tgsi_inst,
1433 struct toy_dst *dst_,
1434 struct toy_src *src_)
1436 const struct tgsi_opcode_info *info =
1437 tgsi_get_opcode_info(tgsi_inst->Instruction.Opcode);
1439 ilo_warn("unsupported TGSI opcode in SoA form: TGSI_OPCODE_%s\n",
1442 tc_fail(tc, "unsupported TGSI instruction in SoA form");
1445 static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = {
1446 [TGSI_OPCODE_ARL] = soa_per_channel,
1447 [TGSI_OPCODE_MOV] = soa_per_channel,
1448 [TGSI_OPCODE_LIT] = soa_LIT,
1449 [TGSI_OPCODE_RCP] = soa_scalar_replicate,
1450 [TGSI_OPCODE_RSQ] = soa_scalar_replicate,
1451 [TGSI_OPCODE_EXP] = soa_EXP,
1452 [TGSI_OPCODE_LOG] = soa_LOG,
1453 [TGSI_OPCODE_MUL] = soa_per_channel,
1454 [TGSI_OPCODE_ADD] = soa_per_channel,
1455 [TGSI_OPCODE_DP3] = soa_dot_product,
1456 [TGSI_OPCODE_DP4] = soa_dot_product,
1457 [TGSI_OPCODE_DST] = soa_DST,
1458 [TGSI_OPCODE_MIN] = soa_per_channel,
1459 [TGSI_OPCODE_MAX] = soa_per_channel,
1460 [TGSI_OPCODE_SLT] = soa_per_channel,
1461 [TGSI_OPCODE_SGE] = soa_per_channel,
1462 [TGSI_OPCODE_MAD] = soa_per_channel,
1463 [TGSI_OPCODE_SUB] = soa_per_channel,
1464 [TGSI_OPCODE_LRP] = soa_per_channel,
1465 [TGSI_OPCODE_CND] = soa_per_channel,
1466 [TGSI_OPCODE_SQRT] = soa_scalar_replicate,
1467 [TGSI_OPCODE_DP2A] = soa_dot_product,
1468 [22] = soa_unsupported,
1469 [23] = soa_unsupported,
1470 [TGSI_OPCODE_FRC] = soa_per_channel,
1471 [TGSI_OPCODE_CLAMP] = soa_per_channel,
1472 [TGSI_OPCODE_FLR] = soa_per_channel,
1473 [TGSI_OPCODE_ROUND] = soa_per_channel,
1474 [TGSI_OPCODE_EX2] = soa_scalar_replicate,
1475 [TGSI_OPCODE_LG2] = soa_scalar_replicate,
1476 [TGSI_OPCODE_POW] = soa_scalar_replicate,
1477 [TGSI_OPCODE_XPD] = soa_XPD,
1478 [32] = soa_unsupported,
1479 [TGSI_OPCODE_ABS] = soa_per_channel,
1480 [TGSI_OPCODE_RCC] = soa_unsupported,
1481 [TGSI_OPCODE_DPH] = soa_dot_product,
1482 [TGSI_OPCODE_COS] = soa_scalar_replicate,
1483 [TGSI_OPCODE_DDX] = soa_partial_derivative,
1484 [TGSI_OPCODE_DDY] = soa_partial_derivative,
1485 [TGSI_OPCODE_KILP] = soa_passthrough,
1486 [TGSI_OPCODE_PK2H] = soa_PK2H,
1487 [TGSI_OPCODE_PK2US] = soa_unsupported,
1488 [TGSI_OPCODE_PK4B] = soa_unsupported,
1489 [TGSI_OPCODE_PK4UB] = soa_unsupported,
1490 [TGSI_OPCODE_RFL] = soa_unsupported,
1491 [TGSI_OPCODE_SEQ] = soa_per_channel,
1492 [TGSI_OPCODE_SFL] = soa_per_channel,
1493 [TGSI_OPCODE_SGT] = soa_per_channel,
1494 [TGSI_OPCODE_SIN] = soa_scalar_replicate,
1495 [TGSI_OPCODE_SLE] = soa_per_channel,
1496 [TGSI_OPCODE_SNE] = soa_per_channel,
1497 [TGSI_OPCODE_STR] = soa_per_channel,
1498 [TGSI_OPCODE_TEX] = soa_passthrough,
1499 [TGSI_OPCODE_TXD] = soa_passthrough,
1500 [TGSI_OPCODE_TXP] = soa_passthrough,
1501 [TGSI_OPCODE_UP2H] = soa_UP2H,
1502 [TGSI_OPCODE_UP2US] = soa_unsupported,
1503 [TGSI_OPCODE_UP4B] = soa_unsupported,
1504 [TGSI_OPCODE_UP4UB] = soa_unsupported,
1505 [TGSI_OPCODE_X2D] = soa_unsupported,
1506 [TGSI_OPCODE_ARA] = soa_unsupported,
1507 [TGSI_OPCODE_ARR] = soa_per_channel,
1508 [TGSI_OPCODE_BRA] = soa_unsupported,
1509 [TGSI_OPCODE_CAL] = soa_unsupported,
1510 [TGSI_OPCODE_RET] = soa_unsupported,
1511 [TGSI_OPCODE_SSG] = soa_per_channel,
1512 [TGSI_OPCODE_CMP] = soa_per_channel,
1513 [TGSI_OPCODE_SCS] = soa_SCS,
1514 [TGSI_OPCODE_TXB] = soa_passthrough,
1515 [TGSI_OPCODE_NRM] = soa_NRM,
1516 [TGSI_OPCODE_DIV] = soa_per_channel,
1517 [TGSI_OPCODE_DP2] = soa_dot_product,
1518 [TGSI_OPCODE_TXL] = soa_passthrough,
1519 [TGSI_OPCODE_BRK] = soa_passthrough,
1520 [TGSI_OPCODE_IF] = soa_if,
1521 [TGSI_OPCODE_UIF] = soa_if,
1522 [76] = soa_unsupported,
1523 [TGSI_OPCODE_ELSE] = soa_passthrough,
1524 [TGSI_OPCODE_ENDIF] = soa_passthrough,
1525 [79] = soa_unsupported,
1526 [80] = soa_unsupported,
1527 [TGSI_OPCODE_PUSHA] = soa_unsupported,
1528 [TGSI_OPCODE_POPA] = soa_unsupported,
1529 [TGSI_OPCODE_CEIL] = soa_per_channel,
1530 [TGSI_OPCODE_I2F] = soa_per_channel,
1531 [TGSI_OPCODE_NOT] = soa_per_channel,
1532 [TGSI_OPCODE_TRUNC] = soa_per_channel,
1533 [TGSI_OPCODE_SHL] = soa_per_channel,
1534 [88] = soa_unsupported,
1535 [TGSI_OPCODE_AND] = soa_per_channel,
1536 [TGSI_OPCODE_OR] = soa_per_channel,
1537 [TGSI_OPCODE_MOD] = soa_per_channel,
1538 [TGSI_OPCODE_XOR] = soa_per_channel,
1539 [TGSI_OPCODE_SAD] = soa_per_channel,
1540 [TGSI_OPCODE_TXF] = soa_passthrough,
1541 [TGSI_OPCODE_TXQ] = soa_passthrough,
1542 [TGSI_OPCODE_CONT] = soa_passthrough,
1543 [TGSI_OPCODE_EMIT] = soa_unsupported,
1544 [TGSI_OPCODE_ENDPRIM] = soa_unsupported,
1545 [TGSI_OPCODE_BGNLOOP] = soa_passthrough,
1546 [TGSI_OPCODE_BGNSUB] = soa_unsupported,
1547 [TGSI_OPCODE_ENDLOOP] = soa_passthrough,
1548 [TGSI_OPCODE_ENDSUB] = soa_unsupported,
1549 [TGSI_OPCODE_TXQ_LZ] = soa_passthrough,
1550 [104] = soa_unsupported,
1551 [105] = soa_unsupported,
1552 [106] = soa_unsupported,
1553 [TGSI_OPCODE_NOP] = soa_passthrough,
1554 [108] = soa_unsupported,
1555 [109] = soa_unsupported,
1556 [110] = soa_unsupported,
1557 [111] = soa_unsupported,
1558 [TGSI_OPCODE_NRM4] = soa_NRM4,
1559 [TGSI_OPCODE_CALLNZ] = soa_unsupported,
1560 [TGSI_OPCODE_BREAKC] = soa_unsupported,
1561 [TGSI_OPCODE_KIL] = soa_passthrough,
1562 [TGSI_OPCODE_END] = soa_passthrough,
1563 [118] = soa_unsupported,
1564 [TGSI_OPCODE_F2I] = soa_per_channel,
1565 [TGSI_OPCODE_IDIV] = soa_per_channel,
1566 [TGSI_OPCODE_IMAX] = soa_per_channel,
1567 [TGSI_OPCODE_IMIN] = soa_per_channel,
1568 [TGSI_OPCODE_INEG] = soa_per_channel,
1569 [TGSI_OPCODE_ISGE] = soa_per_channel,
1570 [TGSI_OPCODE_ISHR] = soa_per_channel,
1571 [TGSI_OPCODE_ISLT] = soa_per_channel,
1572 [TGSI_OPCODE_F2U] = soa_per_channel,
1573 [TGSI_OPCODE_U2F] = soa_per_channel,
1574 [TGSI_OPCODE_UADD] = soa_per_channel,
1575 [TGSI_OPCODE_UDIV] = soa_per_channel,
1576 [TGSI_OPCODE_UMAD] = soa_per_channel,
1577 [TGSI_OPCODE_UMAX] = soa_per_channel,
1578 [TGSI_OPCODE_UMIN] = soa_per_channel,
1579 [TGSI_OPCODE_UMOD] = soa_per_channel,
1580 [TGSI_OPCODE_UMUL] = soa_per_channel,
1581 [TGSI_OPCODE_USEQ] = soa_per_channel,
1582 [TGSI_OPCODE_USGE] = soa_per_channel,
1583 [TGSI_OPCODE_USHR] = soa_per_channel,
1584 [TGSI_OPCODE_USLT] = soa_per_channel,
1585 [TGSI_OPCODE_USNE] = soa_per_channel,
1586 [TGSI_OPCODE_SWITCH] = soa_unsupported,
1587 [TGSI_OPCODE_CASE] = soa_unsupported,
1588 [TGSI_OPCODE_DEFAULT] = soa_unsupported,
1589 [TGSI_OPCODE_ENDSWITCH] = soa_unsupported,
1590 [TGSI_OPCODE_SAMPLE] = soa_passthrough,
1591 [TGSI_OPCODE_SAMPLE_I] = soa_passthrough,
1592 [TGSI_OPCODE_SAMPLE_I_MS] = soa_passthrough,
1593 [TGSI_OPCODE_SAMPLE_B] = soa_passthrough,
1594 [TGSI_OPCODE_SAMPLE_C] = soa_passthrough,
1595 [TGSI_OPCODE_SAMPLE_C_LZ] = soa_passthrough,
1596 [TGSI_OPCODE_SAMPLE_D] = soa_passthrough,
1597 [TGSI_OPCODE_SAMPLE_L] = soa_passthrough,
1598 [TGSI_OPCODE_GATHER4] = soa_passthrough,
1599 [TGSI_OPCODE_SVIEWINFO] = soa_passthrough,
1600 [TGSI_OPCODE_SAMPLE_POS] = soa_passthrough,
1601 [TGSI_OPCODE_SAMPLE_INFO] = soa_passthrough,
1602 [TGSI_OPCODE_UARL] = soa_per_channel,
1603 [TGSI_OPCODE_UCMP] = soa_per_channel,
1604 [TGSI_OPCODE_IABS] = soa_per_channel,
1605 [TGSI_OPCODE_ISSG] = soa_per_channel,
1606 [TGSI_OPCODE_LOAD] = soa_unsupported,
1607 [TGSI_OPCODE_STORE] = soa_unsupported,
1608 [TGSI_OPCODE_MFENCE] = soa_unsupported,
1609 [TGSI_OPCODE_LFENCE] = soa_unsupported,
1610 [TGSI_OPCODE_SFENCE] = soa_unsupported,
1611 [TGSI_OPCODE_BARRIER] = soa_unsupported,
1612 [TGSI_OPCODE_ATOMUADD] = soa_unsupported,
1613 [TGSI_OPCODE_ATOMXCHG] = soa_unsupported,
1614 [TGSI_OPCODE_ATOMCAS] = soa_unsupported,
1615 [TGSI_OPCODE_ATOMAND] = soa_unsupported,
1616 [TGSI_OPCODE_ATOMOR] = soa_unsupported,
1617 [TGSI_OPCODE_ATOMXOR] = soa_unsupported,
1618 [TGSI_OPCODE_ATOMUMIN] = soa_unsupported,
1619 [TGSI_OPCODE_ATOMUMAX] = soa_unsupported,
1620 [TGSI_OPCODE_ATOMIMIN] = soa_unsupported,
1621 [TGSI_OPCODE_ATOMIMAX] = soa_unsupported,
1622 [TGSI_OPCODE_TEX2] = soa_passthrough,
1623 [TGSI_OPCODE_TXB2] = soa_passthrough,
1624 [TGSI_OPCODE_TXL2] = soa_passthrough,
1628 ra_dst_is_indirect(const struct tgsi_full_dst_register *d)
1630 return (d->Register.Indirect ||
1631 (d->Register.Dimension && d->Dimension.Indirect));
1635 ra_dst_index(const struct tgsi_full_dst_register *d)
1637 assert(!d->Register.Indirect);
1638 return d->Register.Index;
1642 ra_dst_dimension(const struct tgsi_full_dst_register *d)
1644 if (d->Register.Dimension) {
1645 assert(!d->Dimension.Indirect);
1646 return d->Dimension.Index;
1654 ra_is_src_indirect(const struct tgsi_full_src_register *s)
1656 return (s->Register.Indirect ||
1657 (s->Register.Dimension && s->Dimension.Indirect));
1661 ra_src_index(const struct tgsi_full_src_register *s)
1663 assert(!s->Register.Indirect);
1664 return s->Register.Index;
1668 ra_src_dimension(const struct tgsi_full_src_register *s)
1670 if (s->Register.Dimension) {
1671 assert(!s->Dimension.Indirect);
1672 return s->Dimension.Index;
1680 * Infer the type of either the sources or the destination.
1682 static enum toy_type
1683 ra_infer_opcode_type(int tgsi_opcode, bool is_dst)
1685 enum tgsi_opcode_type type;
1688 type = tgsi_opcode_infer_dst_type(tgsi_opcode);
1690 type = tgsi_opcode_infer_src_type(tgsi_opcode);
1693 case TGSI_TYPE_UNSIGNED:
1695 case TGSI_TYPE_SIGNED:
1697 case TGSI_TYPE_FLOAT:
1699 case TGSI_TYPE_UNTYPED:
1700 case TGSI_TYPE_VOID:
1701 case TGSI_TYPE_DOUBLE:
1703 assert(!"unsupported TGSI type");
1709 * Return the type of an operand of the specified instruction.
1711 static enum toy_type
1712 ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst,
1713 int operand, bool is_dst)
1716 enum tgsi_file_type file;
1718 /* we need to look at both src and dst for MOV */
1719 /* XXX it should not be this complex */
1720 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
1721 const enum tgsi_file_type dst_file = tgsi_inst->Dst[0].Register.File;
1722 const enum tgsi_file_type src_file = tgsi_inst->Src[0].Register.File;
1724 if (dst_file == TGSI_FILE_ADDRESS || src_file == TGSI_FILE_ADDRESS) {
1727 else if (src_file == TGSI_FILE_IMMEDIATE &&
1728 !tgsi_inst->Src[0].Register.Indirect) {
1729 const int src_idx = tgsi_inst->Src[0].Register.Index;
1730 type = tgsi->imm_data.types[src_idx];
1733 /* this is the best we can do */
1739 else if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_UCMP) {
1740 if (!is_dst && operand == 0)
1748 type = ra_infer_opcode_type(tgsi_inst->Instruction.Opcode, is_dst);
1752 tgsi_inst->Dst[operand].Register.File :
1753 tgsi_inst->Src[operand].Register.File;
1755 case TGSI_FILE_SAMPLER:
1756 case TGSI_FILE_RESOURCE:
1757 case TGSI_FILE_SAMPLER_VIEW:
1760 case TGSI_FILE_ADDRESS:
1761 assert(type == TOY_TYPE_D);
1771 * Allocate a VRF register.
1774 ra_alloc_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file)
1776 const int count = (tgsi->aos) ? 1 : 4;
1777 return tc_alloc_vrf(tgsi->tc, count);
1781 * Construct the key for VRF mapping look-up.
1784 ra_get_map_key(enum tgsi_file_type file, unsigned dim, unsigned index)
1788 /* this is ugly... */
1789 assert(file < 1 << 4);
1790 assert(dim < 1 << 12);
1791 assert(index < 1 << 16);
1792 key = (file << 28) | (dim << 16) | index;
1794 return intptr_to_pointer(key);
1798 * Map a TGSI register to a VRF register.
1801 ra_map_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file,
1802 int dim, int index, bool *is_new)
1807 key = ra_get_map_key(file, dim, index);
1810 * because we allocate vrf from 1 and on, val is never NULL as long as the
1813 val = util_hash_table_get(tgsi->reg_mapping, key);
1815 vrf = pointer_to_intptr(val);
1821 vrf = (intptr_t) ra_alloc_reg(tgsi, file);
1823 /* add to the mapping */
1824 val = intptr_to_pointer(vrf);
1825 util_hash_table_set(tgsi->reg_mapping, key, val);
1835 * Return true if the destination aliases any of the sources.
1838 ra_dst_is_aliasing(const struct tgsi_full_instruction *tgsi_inst, int dst_index)
1840 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
1843 /* we need a scratch register for indirect dst anyway */
1844 if (ra_dst_is_indirect(d))
1847 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
1848 const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
1850 if (s->Register.File != d->Register.File)
1854 * we can go on to check dimension and index respectively, but
1855 * keep it simple for now
1857 if (ra_is_src_indirect(s))
1859 if (ra_src_dimension(s) == ra_dst_dimension(d) &&
1860 ra_src_index(s) == ra_dst_index(d))
1868 * Return the toy register for a TGSI destination operand.
1870 static struct toy_dst
1871 ra_get_dst(struct toy_tgsi *tgsi,
1872 const struct tgsi_full_instruction *tgsi_inst, int dst_index,
1875 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
1876 bool need_vrf = false;
1879 switch (d->Register.File) {
1880 case TGSI_FILE_NULL:
1883 case TGSI_FILE_OUTPUT:
1884 case TGSI_FILE_TEMPORARY:
1885 case TGSI_FILE_ADDRESS:
1886 case TGSI_FILE_PREDICATE:
1890 assert(!"unhandled dst file");
1896 /* XXX we do not always need a scratch given the conditions... */
1897 const bool need_scratch =
1898 (ra_dst_is_indirect(d) || ra_dst_is_aliasing(tgsi_inst, dst_index) ||
1899 tgsi_inst->Instruction.Saturate);
1900 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, dst_index, true);
1904 vrf = ra_alloc_reg(tgsi, d->Register.File);
1907 vrf = ra_map_reg(tgsi, d->Register.File,
1908 ra_dst_dimension(d), ra_dst_index(d), NULL);
1912 *is_scratch = need_scratch;
1914 dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
1915 false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
1921 static struct toy_src
1922 ra_get_src_for_vrf(const struct tgsi_full_src_register *s,
1923 enum toy_type type, int vrf)
1925 return tsrc_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
1927 s->Register.SwizzleX, s->Register.SwizzleY,
1928 s->Register.SwizzleZ, s->Register.SwizzleW,
1929 s->Register.Absolute, s->Register.Negate,
1930 vrf * TOY_REG_WIDTH);
1934 init_tgsi_reg(struct toy_tgsi *tgsi, struct toy_inst *inst,
1935 enum tgsi_file_type file, int index,
1936 const struct tgsi_ind_register *indirect,
1937 const struct tgsi_dimension *dimension,
1938 const struct tgsi_ind_register *dim_indirect)
1943 /* src[0]: TGSI file */
1944 inst->src[num_src++] = tsrc_imm_d(file);
1946 /* src[1]: TGSI dimension */
1947 inst->src[num_src++] = tsrc_imm_d((dimension) ? dimension->Index : 0);
1949 /* src[2]: TGSI dimension indirection */
1951 const int vrf = ra_map_reg(tgsi, dim_indirect->File, 0,
1952 dim_indirect->Index, NULL);
1954 src = tsrc(TOY_FILE_VRF, vrf, 0);
1955 src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
1958 src = tsrc_imm_d(0);
1961 inst->src[num_src++] = src;
1963 /* src[3]: TGSI index */
1964 inst->src[num_src++] = tsrc_imm_d(index);
1966 /* src[4]: TGSI index indirection */
1968 const int vrf = ra_map_reg(tgsi, indirect->File, 0,
1969 indirect->Index, NULL);
1971 src = tsrc(TOY_FILE_VRF, vrf, 0);
1972 src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
1975 src = tsrc_imm_d(0);
1978 inst->src[num_src++] = src;
1983 static struct toy_src
1984 ra_get_src_indirect(struct toy_tgsi *tgsi,
1985 const struct tgsi_full_instruction *tgsi_inst,
1988 const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
1989 bool need_vrf = false, is_resource = false;
1992 switch (s->Register.File) {
1993 case TGSI_FILE_NULL:
1996 case TGSI_FILE_SAMPLER:
1997 case TGSI_FILE_RESOURCE:
1998 case TGSI_FILE_SAMPLER_VIEW:
2001 case TGSI_FILE_CONSTANT:
2002 case TGSI_FILE_INPUT:
2003 case TGSI_FILE_SYSTEM_VALUE:
2004 case TGSI_FILE_TEMPORARY:
2005 case TGSI_FILE_ADDRESS:
2006 case TGSI_FILE_IMMEDIATE:
2007 case TGSI_FILE_PREDICATE:
2011 assert(!"unhandled src file");
2017 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
2021 assert(!s->Register.Dimension);
2022 assert(s->Register.Indirect);
2024 vrf = ra_map_reg(tgsi, s->Indirect.File, 0, s->Indirect.Index, NULL);
2027 vrf = ra_alloc_reg(tgsi, s->Register.File);
2030 src = ra_get_src_for_vrf(s, type, vrf);
2032 /* emit indirect fetch */
2034 struct toy_inst *inst;
2036 inst = tc_add(tgsi->tc);
2037 inst->opcode = TOY_OPCODE_TGSI_INDIRECT_FETCH;
2038 inst->dst = tdst_from(src);
2039 inst->dst.writemask = TOY_WRITEMASK_XYZW;
2041 init_tgsi_reg(tgsi, inst, s->Register.File, s->Register.Index,
2042 (s->Register.Indirect) ? &s->Indirect : NULL,
2043 (s->Register.Dimension) ? &s->Dimension : NULL,
2044 (s->Dimension.Indirect) ? &s->DimIndirect : NULL);
2052 * Return the toy register for a TGSI source operand.
2054 static struct toy_src
2055 ra_get_src(struct toy_tgsi *tgsi,
2056 const struct tgsi_full_instruction *tgsi_inst,
2059 const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
2060 bool need_vrf = false;
2063 if (ra_is_src_indirect(s))
2064 return ra_get_src_indirect(tgsi, tgsi_inst, src_index);
2066 switch (s->Register.File) {
2067 case TGSI_FILE_NULL:
2070 case TGSI_FILE_CONSTANT:
2071 case TGSI_FILE_INPUT:
2072 case TGSI_FILE_SYSTEM_VALUE:
2075 case TGSI_FILE_TEMPORARY:
2076 case TGSI_FILE_ADDRESS:
2077 case TGSI_FILE_PREDICATE:
2080 case TGSI_FILE_SAMPLER:
2081 case TGSI_FILE_RESOURCE:
2082 case TGSI_FILE_SAMPLER_VIEW:
2083 assert(!s->Register.Dimension);
2084 src = tsrc_imm_d(s->Register.Index);
2086 case TGSI_FILE_IMMEDIATE:
2088 const uint32_t *imm;
2089 enum toy_type imm_type;
2092 imm = toy_tgsi_get_imm(tgsi, s->Register.Index, &imm_type);
2095 (imm[s->Register.SwizzleX] == imm[s->Register.SwizzleY] &&
2096 imm[s->Register.SwizzleX] == imm[s->Register.SwizzleZ] &&
2097 imm[s->Register.SwizzleX] == imm[s->Register.SwizzleW]);
2100 const enum toy_type type =
2101 ra_get_type(tgsi, tgsi_inst, src_index, false);
2103 /* ignore imm_type */
2104 src = tsrc_imm_ud(imm[s->Register.SwizzleX]);
2106 src.absolute = s->Register.Absolute;
2107 src.negate = s->Register.Negate;
2115 assert(!"unhandled src file");
2121 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
2125 vrf = ra_map_reg(tgsi, s->Register.File,
2126 ra_src_dimension(s), ra_src_index(s), &is_new);
2128 src = ra_get_src_for_vrf(s, type, vrf);
2131 switch (s->Register.File) {
2132 case TGSI_FILE_TEMPORARY:
2133 case TGSI_FILE_ADDRESS:
2134 case TGSI_FILE_PREDICATE:
2136 struct toy_dst dst = tdst_from(src);
2137 dst.writemask = TOY_WRITEMASK_XYZW;
2140 * Always initialize registers. Otherwise, if the random value
2141 * ends up in a VUE, FS may fail to interpolate correctly.
2143 tc_MOV(tgsi->tc, dst, tsrc_type(tsrc_imm_d(0), type));
2157 parse_instruction(struct toy_tgsi *tgsi,
2158 const struct tgsi_full_instruction *tgsi_inst)
2160 struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS];
2161 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS];
2162 bool dst_is_scratch[TGSI_FULL_MAX_DST_REGISTERS];
2163 toy_tgsi_translate translate;
2166 /* convert TGSI registers to toy registers */
2167 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
2168 src[i] = ra_get_src(tgsi, tgsi_inst, i);
2169 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
2170 dst[i] = ra_get_dst(tgsi, tgsi_inst, i, &dst_is_scratch[i]);
2172 /* translate the instruction */
2173 translate = tgsi->translate_table[tgsi_inst->Instruction.Opcode];
2174 translate(tgsi->tc, tgsi_inst, dst, src);
2176 /* write the result to the real destinations if needed */
2177 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
2178 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
2180 if (!dst_is_scratch[i])
2183 if (tgsi_inst->Instruction.Saturate == TGSI_SAT_MINUS_PLUS_ONE)
2184 tc_fail(tgsi->tc, "TGSI_SAT_MINUS_PLUS_ONE unhandled");
2186 tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate;
2188 /* emit indirect store */
2189 if (ra_dst_is_indirect(d)) {
2190 struct toy_inst *inst;
2192 inst = tc_add(tgsi->tc);
2193 inst->opcode = TOY_OPCODE_TGSI_INDIRECT_STORE;
2196 init_tgsi_reg(tgsi, inst, d->Register.File, d->Register.Index,
2197 (d->Register.Indirect) ? &d->Indirect : NULL,
2198 (d->Register.Dimension) ? &d->Dimension : NULL,
2199 (d->Dimension.Indirect) ? &d->DimIndirect : NULL);
2202 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, i, true);
2203 struct toy_dst real_dst;
2206 vrf = ra_map_reg(tgsi, d->Register.File,
2207 ra_dst_dimension(d), ra_dst_index(d), NULL);
2208 real_dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
2209 false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
2212 tc_MOV(tgsi->tc, real_dst, tsrc_from(dst[i]));
2215 struct toy_dst tdst[4];
2216 struct toy_src tsrc[4];
2219 tdst_transpose(real_dst, tdst);
2220 tsrc_transpose(tsrc_from(dst[i]), tsrc);
2222 for (j = 0; j < 4; j++)
2223 tc_MOV(tgsi->tc, tdst[j], tsrc[j]);
2227 tgsi->tc->templ.saturate = false;
2230 switch (tgsi_inst->Instruction.Opcode) {
2231 case TGSI_OPCODE_KIL:
2232 case TGSI_OPCODE_KILP:
2233 tgsi->uses_kill = true;
2237 /* remember channels written */
2238 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
2239 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
2241 if (d->Register.File != TGSI_FILE_OUTPUT)
2243 for (i = 0; i < tgsi->num_outputs; i++) {
2244 if (tgsi->outputs[i].index == d->Register.Index) {
2245 tgsi->outputs[i].undefined_mask &= ~d->Register.WriteMask;
2253 decl_add_in(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2255 static const struct tgsi_declaration_interp default_interp = {
2256 TGSI_INTERPOLATE_PERSPECTIVE, false, 0,
2258 const struct tgsi_declaration_interp *interp =
2259 (decl->Declaration.Interpolate) ? &decl->Interp: &default_interp;
2262 if (decl->Range.Last >= Elements(tgsi->inputs)) {
2263 assert(!"invalid IN");
2267 for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2268 const int slot = tgsi->num_inputs++;
2270 tgsi->inputs[slot].index = index;
2271 tgsi->inputs[slot].usage_mask = decl->Declaration.UsageMask;
2272 if (decl->Declaration.Semantic) {
2273 tgsi->inputs[slot].semantic_name = decl->Semantic.Name;
2274 tgsi->inputs[slot].semantic_index = decl->Semantic.Index;
2277 tgsi->inputs[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
2278 tgsi->inputs[slot].semantic_index = index;
2280 tgsi->inputs[slot].interp = interp->Interpolate;
2281 tgsi->inputs[slot].centroid = interp->Centroid;
2286 decl_add_out(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2290 if (decl->Range.Last >= Elements(tgsi->outputs)) {
2291 assert(!"invalid OUT");
2295 assert(decl->Declaration.Semantic);
2297 for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2298 const int slot = tgsi->num_outputs++;
2300 tgsi->outputs[slot].index = index;
2301 tgsi->outputs[slot].undefined_mask = TOY_WRITEMASK_XYZW;
2302 tgsi->outputs[slot].usage_mask = decl->Declaration.UsageMask;
2303 tgsi->outputs[slot].semantic_name = decl->Semantic.Name;
2304 tgsi->outputs[slot].semantic_index = decl->Semantic.Index;
2309 decl_add_sv(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2313 if (decl->Range.Last >= Elements(tgsi->system_values)) {
2314 assert(!"invalid SV");
2318 for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2319 const int slot = tgsi->num_system_values++;
2321 tgsi->system_values[slot].index = index;
2322 if (decl->Declaration.Semantic) {
2323 tgsi->system_values[slot].semantic_name = decl->Semantic.Name;
2324 tgsi->system_values[slot].semantic_index = decl->Semantic.Index;
2327 tgsi->system_values[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
2328 tgsi->system_values[slot].semantic_index = index;
2334 * Emit an instruction to fetch the value of a TGSI register.
2337 fetch_source(struct toy_tgsi *tgsi, enum tgsi_file_type file, int dim, int idx)
2341 enum toy_opcode opcode;
2342 enum toy_type type = TOY_TYPE_F;
2345 case TGSI_FILE_INPUT:
2346 opcode = TOY_OPCODE_TGSI_IN;
2348 case TGSI_FILE_CONSTANT:
2349 opcode = TOY_OPCODE_TGSI_CONST;
2351 case TGSI_FILE_SYSTEM_VALUE:
2352 opcode = TOY_OPCODE_TGSI_SV;
2354 case TGSI_FILE_IMMEDIATE:
2355 opcode = TOY_OPCODE_TGSI_IMM;
2356 toy_tgsi_get_imm(tgsi, idx, &type);
2359 /* no need to fetch */
2364 vrf = ra_map_reg(tgsi, file, dim, idx, NULL);
2365 dst = tdst(TOY_FILE_VRF, vrf, 0);
2366 dst = tdst_type(dst, type);
2368 tc_add2(tgsi->tc, opcode, dst, tsrc_imm_d(dim), tsrc_imm_d(idx));
2372 parse_declaration(struct toy_tgsi *tgsi,
2373 const struct tgsi_full_declaration *decl)
2377 switch (decl->Declaration.File) {
2378 case TGSI_FILE_INPUT:
2379 decl_add_in(tgsi, decl);
2381 case TGSI_FILE_OUTPUT:
2382 decl_add_out(tgsi, decl);
2384 case TGSI_FILE_SYSTEM_VALUE:
2385 decl_add_sv(tgsi, decl);
2387 case TGSI_FILE_IMMEDIATE:
2388 /* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */
2389 assert(!"unexpected immediate declaration");
2391 case TGSI_FILE_NULL:
2392 case TGSI_FILE_CONSTANT:
2393 case TGSI_FILE_TEMPORARY:
2394 case TGSI_FILE_SAMPLER:
2395 case TGSI_FILE_PREDICATE:
2396 case TGSI_FILE_ADDRESS:
2397 case TGSI_FILE_RESOURCE:
2398 case TGSI_FILE_SAMPLER_VIEW:
2402 assert(!"unhandled TGSI file");
2406 /* fetch the registers now */
2407 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
2408 const int dim = (decl->Declaration.Dimension) ? decl->Dim.Index2D : 0;
2409 fetch_source(tgsi, decl->Declaration.File, dim, i);
2414 add_imm(struct toy_tgsi *tgsi, enum toy_type type, const uint32_t *buf)
2416 /* reallocate the buffer if necessary */
2417 if (tgsi->imm_data.cur >= tgsi->imm_data.size) {
2418 const int cur_size = tgsi->imm_data.size;
2420 enum toy_type *new_types;
2421 uint32_t (*new_buf)[4];
2423 new_size = (cur_size) ? cur_size << 1 : 16;
2424 while (new_size <= tgsi->imm_data.cur)
2427 new_buf = REALLOC(tgsi->imm_data.buf,
2428 cur_size * sizeof(new_buf[0]),
2429 new_size * sizeof(new_buf[0]));
2430 new_types = REALLOC(tgsi->imm_data.types,
2431 cur_size * sizeof(new_types[0]),
2432 new_size * sizeof(new_types[0]));
2433 if (!new_buf || !new_types) {
2441 tgsi->imm_data.buf = new_buf;
2442 tgsi->imm_data.types = new_types;
2443 tgsi->imm_data.size = new_size;
2446 tgsi->imm_data.types[tgsi->imm_data.cur] = type;
2447 memcpy(&tgsi->imm_data.buf[tgsi->imm_data.cur],
2448 buf, sizeof(tgsi->imm_data.buf[0]));
2450 return tgsi->imm_data.cur++;
2454 parse_immediate(struct toy_tgsi *tgsi, const struct tgsi_full_immediate *imm)
2457 uint32_t imm_buf[4];
2460 switch (imm->Immediate.DataType) {
2461 case TGSI_IMM_FLOAT32:
2463 imm_buf[0] = fui(imm->u[0].Float);
2464 imm_buf[1] = fui(imm->u[1].Float);
2465 imm_buf[2] = fui(imm->u[2].Float);
2466 imm_buf[3] = fui(imm->u[3].Float);
2468 case TGSI_IMM_INT32:
2470 imm_buf[0] = (uint32_t) imm->u[0].Int;
2471 imm_buf[1] = (uint32_t) imm->u[1].Int;
2472 imm_buf[2] = (uint32_t) imm->u[2].Int;
2473 imm_buf[3] = (uint32_t) imm->u[3].Int;
2475 case TGSI_IMM_UINT32:
2477 imm_buf[0] = imm->u[0].Uint;
2478 imm_buf[1] = imm->u[1].Uint;
2479 imm_buf[2] = imm->u[2].Uint;
2480 imm_buf[3] = imm->u[3].Uint;
2483 assert(!"unhandled TGSI imm type");
2485 memset(imm_buf, 0, sizeof(imm_buf));
2489 idx = add_imm(tgsi, type, imm_buf);
2491 fetch_source(tgsi, TGSI_FILE_IMMEDIATE, 0, idx);
2493 tc_fail(tgsi->tc, "failed to add TGSI imm");
2497 parse_property(struct toy_tgsi *tgsi, const struct tgsi_full_property *prop)
2499 switch (prop->Property.PropertyName) {
2500 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
2501 tgsi->props.vs_prohibit_ucps = prop->u[0].Data;
2503 case TGSI_PROPERTY_FS_COORD_ORIGIN:
2504 tgsi->props.fs_coord_origin = prop->u[0].Data;
2506 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
2507 tgsi->props.fs_coord_pixel_center = prop->u[0].Data;
2509 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2510 tgsi->props.fs_color0_writes_all_cbufs = prop->u[0].Data;
2512 case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
2513 tgsi->props.fs_depth_layout = prop->u[0].Data;
2515 case TGSI_PROPERTY_GS_INPUT_PRIM:
2516 tgsi->props.gs_input_prim = prop->u[0].Data;
2518 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
2519 tgsi->props.gs_output_prim = prop->u[0].Data;
2521 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
2522 tgsi->props.gs_max_output_vertices = prop->u[0].Data;
2525 assert(!"unhandled TGSI property");
2531 parse_token(struct toy_tgsi *tgsi, const union tgsi_full_token *token)
2533 switch (token->Token.Type) {
2534 case TGSI_TOKEN_TYPE_DECLARATION:
2535 parse_declaration(tgsi, &token->FullDeclaration);
2537 case TGSI_TOKEN_TYPE_IMMEDIATE:
2538 parse_immediate(tgsi, &token->FullImmediate);
2540 case TGSI_TOKEN_TYPE_INSTRUCTION:
2541 parse_instruction(tgsi, &token->FullInstruction);
2543 case TGSI_TOKEN_TYPE_PROPERTY:
2544 parse_property(tgsi, &token->FullProperty);
2547 assert(!"unhandled TGSI token type");
2552 static enum pipe_error
2553 dump_reg_mapping(void *key, void *val, void *data)
2555 int tgsi_file, tgsi_dim, tgsi_index;
2558 sig = (uint32_t) pointer_to_intptr(key);
2559 vrf = (uint32_t) pointer_to_intptr(val);
2561 /* see ra_get_map_key() */
2562 tgsi_file = (sig >> 28) & 0xf;
2563 tgsi_dim = (sig >> 16) & 0xfff;
2564 tgsi_index = (sig >> 0) & 0xffff;
2567 ilo_printf(" v%d:\t%s[%d][%d]\n", vrf,
2568 tgsi_file_names[tgsi_file], tgsi_dim, tgsi_index);
2571 ilo_printf(" v%d:\t%s[%d]\n", vrf,
2572 tgsi_file_names[tgsi_file], tgsi_index);
2579 * Dump the TGSI translator, currently only the register mapping.
2582 toy_tgsi_dump(const struct toy_tgsi *tgsi)
2584 util_hash_table_foreach(tgsi->reg_mapping, dump_reg_mapping, NULL);
2588 * Clean up the TGSI translator.
2591 toy_tgsi_cleanup(struct toy_tgsi *tgsi)
2593 FREE(tgsi->imm_data.buf);
2594 FREE(tgsi->imm_data.types);
2596 util_hash_table_destroy(tgsi->reg_mapping);
2600 reg_mapping_hash(void *key)
2602 return (unsigned) pointer_to_intptr(key);
2606 reg_mapping_compare(void *key1, void *key2)
2608 return (key1 != key2);
2612 * Initialize the TGSI translator.
2615 init_tgsi(struct toy_tgsi *tgsi, struct toy_compiler *tc, bool aos)
2617 memset(tgsi, 0, sizeof(*tgsi));
2621 tgsi->translate_table = (aos) ? aos_translate_table : soa_translate_table;
2623 /* create a mapping of TGSI registers to VRF reigsters */
2625 util_hash_table_create(reg_mapping_hash, reg_mapping_compare);
2627 return (tgsi->reg_mapping != NULL);
2631 * Translate TGSI tokens into toy instructions.
2634 toy_compiler_translate_tgsi(struct toy_compiler *tc,
2635 const struct tgsi_token *tokens, bool aos,
2636 struct toy_tgsi *tgsi)
2638 struct tgsi_parse_context parse;
2640 if (!init_tgsi(tgsi, tc, aos)) {
2641 tc_fail(tc, "failed to initialize TGSI translator");
2645 tgsi_parse_init(&parse, tokens);
2646 while (!tgsi_parse_end_of_tokens(&parse)) {
2647 tgsi_parse_token(&parse);
2648 parse_token(tgsi, &parse.FullToken);
2650 tgsi_parse_free(&parse);
2654 * Map the TGSI register to VRF register.
2657 toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
2658 enum tgsi_file_type file, int dimension, int index)
2662 key = ra_get_map_key(file, dimension, index);
2664 val = util_hash_table_get(tgsi->reg_mapping, key);
2666 return (val) ? pointer_to_intptr(val) : -1;