1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
31 * TGSI to LLVM IR translation -- SoA.
33 * @author Jose Fonseca <jfonseca@vmware.com>
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_bitarit.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_logic.h"
55 #include "lp_bld_swizzle.h"
56 #include "lp_bld_flow.h"
57 #include "lp_bld_quad.h"
58 #include "lp_bld_tgsi.h"
59 #include "lp_bld_limits.h"
60 #include "lp_bld_debug.h"
61 #include "lp_bld_printf.h"
64 #define FOR_EACH_CHANNEL( CHAN )\
65 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
67 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
70 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
71 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
73 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
74 FOR_EACH_CHANNEL( CHAN )\
75 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
81 #define NUM_CHANNELS 4
83 #define LP_MAX_INSTRUCTIONS 256
87 struct lp_build_context *bld;
91 LLVMTypeRef int_vec_type;
93 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
95 LLVMValueRef cond_mask;
97 LLVMBasicBlockRef loop_block;
98 LLVMValueRef cont_mask;
99 LLVMValueRef break_mask;
100 LLVMValueRef break_var;
102 LLVMBasicBlockRef loop_block;
103 LLVMValueRef cont_mask;
104 LLVMValueRef break_mask;
105 LLVMValueRef break_var;
106 } loop_stack[LP_MAX_TGSI_NESTING];
109 LLVMValueRef ret_mask;
112 LLVMValueRef ret_mask;
113 } call_stack[LP_MAX_TGSI_NESTING];
116 LLVMValueRef exec_mask;
119 struct lp_build_tgsi_soa_context
121 struct lp_build_context base;
123 /* Builder for vector integer masks and indices */
124 struct lp_build_context uint_bld;
126 /* Builder for scalar elements of shader's data type (float) */
127 struct lp_build_context elem_bld;
129 LLVMValueRef consts_ptr;
130 const LLVMValueRef *pos;
131 const LLVMValueRef (*inputs)[NUM_CHANNELS];
132 LLVMValueRef (*outputs)[NUM_CHANNELS];
134 const struct lp_build_sampler_soa *sampler;
136 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
137 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
138 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
139 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
141 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
142 * set in the indirect_files field.
143 * The temps[] array above is unused then.
145 LLVMValueRef temps_array;
147 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
148 * set in the indirect_files field.
149 * The outputs[] array above is unused then.
151 LLVMValueRef outputs_array;
153 const struct tgsi_shader_info *info;
154 /** bitmask indicating which register files are accessed indirectly */
155 unsigned indirect_files;
157 struct lp_build_mask_context *mask;
158 struct lp_exec_mask exec_mask;
160 struct tgsi_full_instruction *instructions;
161 uint max_instructions;
164 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
167 mask->has_mask = FALSE;
168 mask->cond_stack_size = 0;
169 mask->loop_stack_size = 0;
170 mask->call_stack_size = 0;
172 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
173 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
174 LLVMConstAllOnes(mask->int_vec_type);
177 static void lp_exec_mask_update(struct lp_exec_mask *mask)
179 if (mask->loop_stack_size) {
180 /*for loops we need to update the entire mask at runtime */
182 assert(mask->break_mask);
183 tmp = LLVMBuildAnd(mask->bld->builder,
187 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
192 mask->exec_mask = mask->cond_mask;
194 if (mask->call_stack_size) {
195 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
201 mask->has_mask = (mask->cond_stack_size > 0 ||
202 mask->loop_stack_size > 0 ||
203 mask->call_stack_size > 0);
206 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
209 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
210 if (mask->cond_stack_size == 0) {
211 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
213 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
214 assert(LLVMTypeOf(val) == mask->int_vec_type);
215 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
219 lp_exec_mask_update(mask);
222 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
224 LLVMValueRef prev_mask;
225 LLVMValueRef inv_mask;
227 assert(mask->cond_stack_size);
228 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
229 if (mask->cond_stack_size == 1) {
230 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
233 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
235 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
238 lp_exec_mask_update(mask);
241 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
243 assert(mask->cond_stack_size);
244 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
245 lp_exec_mask_update(mask);
248 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
250 if (mask->loop_stack_size == 0) {
251 assert(mask->loop_block == NULL);
252 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
253 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
254 assert(mask->break_var == NULL);
257 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
259 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
260 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
261 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
262 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
263 ++mask->loop_stack_size;
265 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
266 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
268 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
269 LLVMBuildBr(mask->bld->builder, mask->loop_block);
270 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
272 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
274 lp_exec_mask_update(mask);
277 static void lp_exec_break(struct lp_exec_mask *mask)
279 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
283 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
285 exec_mask, "break_full");
287 lp_exec_mask_update(mask);
290 static void lp_exec_continue(struct lp_exec_mask *mask)
292 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
296 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
300 lp_exec_mask_update(mask);
304 static void lp_exec_endloop(struct lp_exec_mask *mask)
306 LLVMBasicBlockRef endloop;
307 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
308 mask->bld->type.length);
311 assert(mask->break_mask);
314 * Restore the cont_mask, but don't pop
316 assert(mask->loop_stack_size);
317 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
318 lp_exec_mask_update(mask);
321 * Unlike the continue mask, the break_mask must be preserved across loop
324 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
326 /* i1cond = (mask == 0) */
327 i1cond = LLVMBuildICmp(
330 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
331 LLVMConstNull(reg_type), "");
333 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
335 LLVMBuildCondBr(mask->bld->builder,
336 i1cond, mask->loop_block, endloop);
338 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
340 assert(mask->loop_stack_size);
341 --mask->loop_stack_size;
342 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
343 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
344 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
345 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
347 lp_exec_mask_update(mask);
350 /* stores val into an address pointed to by dst.
351 * mask->exec_mask is used to figure out which bits of val
352 * should be stored into the address
353 * (0 means don't store this bit, 1 means do store).
355 static void lp_exec_mask_store(struct lp_exec_mask *mask,
360 /* Mix the predicate and execution mask */
361 if (mask->has_mask) {
363 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
365 pred = mask->exec_mask;
370 LLVMValueRef real_val, dst_val;
372 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
373 real_val = lp_build_select(mask->bld,
377 LLVMBuildStore(mask->bld->builder, real_val, dst);
379 LLVMBuildStore(mask->bld->builder, val, dst);
382 static void lp_exec_mask_call(struct lp_exec_mask *mask,
386 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
387 mask->call_stack[mask->call_stack_size].pc = *pc;
388 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
389 mask->call_stack_size++;
393 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
395 LLVMValueRef exec_mask;
397 if (mask->call_stack_size == 0) {
398 /* returning from main() */
402 exec_mask = LLVMBuildNot(mask->bld->builder,
406 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
408 exec_mask, "ret_full");
410 lp_exec_mask_update(mask);
413 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
417 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
419 assert(mask->call_stack_size);
420 mask->call_stack_size--;
421 *pc = mask->call_stack[mask->call_stack_size].pc;
422 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
423 lp_exec_mask_update(mask);
428 * Return pointer to a temporary register channel (src or dest).
429 * Note that indirect addressing cannot be handled here.
430 * \param index which temporary register
431 * \param chan which channel of the temp register.
434 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
439 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
440 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
441 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
444 return bld->temps[index][chan];
449 * Return pointer to a output register channel (src or dest).
450 * Note that indirect addressing cannot be handled here.
451 * \param index which output register
452 * \param chan which channel of the output register.
455 get_output_ptr(struct lp_build_tgsi_soa_context *bld,
460 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
461 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
462 return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, "");
465 return bld->outputs[index][chan];
473 * XXX the lp_build_gather() function should be capable of doing this
474 * with a little work.
477 build_gather(struct lp_build_tgsi_soa_context *bld,
478 LLVMValueRef base_ptr,
479 LLVMValueRef indexes)
481 LLVMValueRef res = bld->base.undef;
485 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
487 for (i = 0; i < bld->base.type.length; i++) {
488 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
489 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
491 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
492 &index, 1, "gather_ptr");
493 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
495 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
503 * Scatter/store vector.
506 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
507 LLVMValueRef base_ptr,
508 LLVMValueRef indexes,
510 struct lp_exec_mask *mask,
513 LLVMBuilderRef builder = bld->base.builder;
516 /* Mix the predicate and execution mask */
517 if (mask->has_mask) {
519 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
522 pred = mask->exec_mask;
527 * Loop over elements of index_vec, store scalar value.
529 for (i = 0; i < bld->base.type.length; i++) {
530 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
531 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
532 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
533 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
534 LLVMValueRef scalar_pred = pred ?
535 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
538 lp_build_printf(builder, "scatter %d: val %f at %d %p\n",
539 ii, val, index, scalar_ptr);
542 LLVMValueRef real_val, dst_val;
543 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
544 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
545 LLVMBuildStore(builder, real_val, scalar_ptr);
548 LLVMBuildStore(builder, val, scalar_ptr);
555 * Read the current value of the ADDR register, convert the floats to
556 * ints, add the base index and return the vector of offsets.
557 * The offsets will be used to index into the constant buffer or
558 * temporary register file.
561 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
562 unsigned reg_file, unsigned reg_index,
563 const struct tgsi_src_register *indirect_reg)
565 struct lp_build_context *uint_bld = &bld->uint_bld;
566 /* always use X component of address register */
567 unsigned swizzle = indirect_reg->SwizzleX;
570 LLVMValueRef max_index;
573 assert(bld->indirect_files & (1 << reg_file));
575 base = lp_build_const_int_vec(uint_bld->type, reg_index);
578 rel = LLVMBuildLoad(bld->base.builder,
579 bld->addr[indirect_reg->Index][swizzle],
582 /* for indexing we want integers */
583 rel = LLVMBuildFPToSI(bld->base.builder,
585 uint_bld->vec_type, "");
587 index = lp_build_add(uint_bld, base, rel);
589 max_index = lp_build_const_int_vec(uint_bld->type,
590 bld->info->file_max[reg_file]);
592 assert(!uint_bld->type.sign);
593 index = lp_build_min(uint_bld, index, max_index);
604 struct lp_build_tgsi_soa_context *bld,
605 const struct tgsi_full_instruction *inst,
607 const unsigned chan_index )
609 struct lp_build_context *uint_bld = &bld->uint_bld;
610 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
611 const unsigned swizzle =
612 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
614 LLVMValueRef indirect_index = NULL;
617 assert(0 && "invalid swizzle in emit_fetch()");
618 return bld->base.undef;
621 if (reg->Register.Indirect) {
622 indirect_index = get_indirect_index(bld,
627 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
630 switch (reg->Register.File) {
631 case TGSI_FILE_CONSTANT:
632 if (reg->Register.Indirect) {
633 LLVMValueRef swizzle_vec =
634 lp_build_const_int_vec(uint_bld->type, swizzle);
635 LLVMValueRef index_vec; /* index into the const buffer */
637 /* index_vec = indirect_index * 4 + swizzle */
638 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
639 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
641 /* Gather values from the constant buffer */
642 res = build_gather(bld, bld->consts_ptr, index_vec);
645 LLVMValueRef index; /* index into the const buffer */
646 LLVMValueRef scalar, scalar_ptr;
648 index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
650 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
652 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
654 res = lp_build_broadcast_scalar(&bld->base, scalar);
658 case TGSI_FILE_IMMEDIATE:
659 res = bld->immediates[reg->Register.Index][swizzle];
663 case TGSI_FILE_INPUT:
664 res = bld->inputs[reg->Register.Index][swizzle];
668 case TGSI_FILE_TEMPORARY:
669 if (reg->Register.Indirect) {
670 LLVMValueRef swizzle_vec =
671 lp_build_const_int_vec(uint_bld->type, swizzle);
672 LLVMValueRef length_vec =
673 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
674 LLVMValueRef index_vec; /* index into the const buffer */
675 LLVMValueRef temps_array;
676 LLVMTypeRef float4_ptr_type;
678 /* index_vec = (indirect_index * 4 + swizzle) * length */
679 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
680 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
681 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
683 /* cast temps_array pointer to float* */
684 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
685 temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array,
686 float4_ptr_type, "");
688 /* Gather values from the temporary register array */
689 res = build_gather(bld, temps_array, index_vec);
692 LLVMValueRef temp_ptr;
693 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
694 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
696 return bld->base.undef;
701 assert(0 && "invalid src register in emit_fetch()");
702 return bld->base.undef;
705 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
706 case TGSI_UTIL_SIGN_CLEAR:
707 res = lp_build_abs( &bld->base, res );
710 case TGSI_UTIL_SIGN_SET:
711 res = lp_build_abs( &bld->base, res );
713 case TGSI_UTIL_SIGN_TOGGLE:
714 res = lp_build_negate( &bld->base, res );
717 case TGSI_UTIL_SIGN_KEEP:
726 * Register fetch with derivatives.
730 struct lp_build_tgsi_soa_context *bld,
731 const struct tgsi_full_instruction *inst,
733 const unsigned chan_index,
740 src = emit_fetch(bld, inst, index, chan_index);
745 /* TODO: use interpolation coeffs for inputs */
748 *ddx = lp_build_ddx(&bld->base, src);
751 *ddy = lp_build_ddy(&bld->base, src);
759 emit_fetch_predicate(
760 struct lp_build_tgsi_soa_context *bld,
761 const struct tgsi_full_instruction *inst,
765 unsigned char swizzles[4];
766 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
770 if (!inst->Instruction.Predicate) {
771 FOR_EACH_CHANNEL( chan ) {
777 swizzles[0] = inst->Predicate.SwizzleX;
778 swizzles[1] = inst->Predicate.SwizzleY;
779 swizzles[2] = inst->Predicate.SwizzleZ;
780 swizzles[3] = inst->Predicate.SwizzleW;
782 index = inst->Predicate.Index;
783 assert(index < LP_MAX_TGSI_PREDS);
785 FOR_EACH_CHANNEL( chan ) {
786 unsigned swizzle = swizzles[chan];
789 * Only fetch the predicate register channels that are actually listed
792 if (!unswizzled[swizzle]) {
793 value = LLVMBuildLoad(bld->base.builder,
794 bld->preds[index][swizzle], "");
797 * Convert the value to an integer mask.
799 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
800 * is needlessly causing two comparisons due to storing the intermediate
801 * result as float vector instead of an integer mask vector.
803 value = lp_build_compare(bld->base.builder,
808 if (inst->Predicate.Negate) {
809 value = LLVMBuildNot(bld->base.builder, value, "");
812 unswizzled[swizzle] = value;
814 value = unswizzled[swizzle];
827 struct lp_build_tgsi_soa_context *bld,
828 const struct tgsi_full_instruction *inst,
834 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
835 struct lp_build_context *uint_bld = &bld->uint_bld;
836 LLVMValueRef indirect_index = NULL;
838 switch( inst->Instruction.Saturate ) {
842 case TGSI_SAT_ZERO_ONE:
843 value = lp_build_max(&bld->base, value, bld->base.zero);
844 value = lp_build_min(&bld->base, value, bld->base.one);
847 case TGSI_SAT_MINUS_PLUS_ONE:
848 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
849 value = lp_build_min(&bld->base, value, bld->base.one);
856 if (reg->Register.Indirect) {
857 indirect_index = get_indirect_index(bld,
862 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
865 switch( reg->Register.File ) {
866 case TGSI_FILE_OUTPUT:
867 if (reg->Register.Indirect) {
868 LLVMBuilderRef builder = bld->base.builder;
869 LLVMValueRef chan_vec =
870 lp_build_const_int_vec(uint_bld->type, chan_index);
871 LLVMValueRef length_vec =
872 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
873 LLVMValueRef index_vec; /* indexes into the temp registers */
874 LLVMValueRef outputs_array;
875 LLVMValueRef pixel_offsets;
876 LLVMTypeRef float_ptr_type;
879 /* build pixel offset vector: {0, 1, 2, 3, ...} */
880 pixel_offsets = uint_bld->undef;
881 for (i = 0; i < bld->base.type.length; i++) {
882 LLVMValueRef ii = lp_build_const_int32(i);
883 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
887 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
888 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
889 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
890 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
891 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
893 float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
894 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
897 /* Scatter store values into temp registers */
898 emit_mask_scatter(bld, outputs_array, index_vec, value,
899 &bld->exec_mask, pred);
902 LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index,
904 lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
908 case TGSI_FILE_TEMPORARY:
909 if (reg->Register.Indirect) {
910 LLVMBuilderRef builder = bld->base.builder;
911 LLVMValueRef chan_vec =
912 lp_build_const_int_vec(uint_bld->type, chan_index);
913 LLVMValueRef length_vec =
914 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
915 LLVMValueRef index_vec; /* indexes into the temp registers */
916 LLVMValueRef temps_array;
917 LLVMValueRef pixel_offsets;
918 LLVMTypeRef float_ptr_type;
921 /* build pixel offset vector: {0, 1, 2, 3, ...} */
922 pixel_offsets = uint_bld->undef;
923 for (i = 0; i < bld->base.type.length; i++) {
924 LLVMValueRef ii = lp_build_const_int32(i);
925 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
929 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
930 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
931 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
932 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
933 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
935 float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
936 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
939 /* Scatter store values into temp registers */
940 emit_mask_scatter(bld, temps_array, index_vec, value,
941 &bld->exec_mask, pred);
944 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
946 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
950 case TGSI_FILE_ADDRESS:
951 lp_exec_mask_store(&bld->exec_mask, pred, value,
952 bld->addr[reg->Indirect.Index][chan_index]);
955 case TGSI_FILE_PREDICATE:
956 lp_exec_mask_store(&bld->exec_mask, pred, value,
957 bld->preds[reg->Register.Index][chan_index]);
967 * High-level instruction translators.
971 emit_tex( struct lp_build_tgsi_soa_context *bld,
972 const struct tgsi_full_instruction *inst,
973 enum lp_build_tex_modifier modifier,
977 LLVMValueRef lod_bias, explicit_lod;
978 LLVMValueRef oow = NULL;
979 LLVMValueRef coords[3];
986 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
987 for (i = 0; i < 4; i++) {
988 texel[i] = bld->base.undef;
993 switch (inst->Texture.Texture) {
994 case TGSI_TEXTURE_1D:
997 case TGSI_TEXTURE_2D:
998 case TGSI_TEXTURE_RECT:
1001 case TGSI_TEXTURE_SHADOW1D:
1002 case TGSI_TEXTURE_SHADOW2D:
1003 case TGSI_TEXTURE_SHADOWRECT:
1004 case TGSI_TEXTURE_3D:
1005 case TGSI_TEXTURE_CUBE:
1013 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1014 lod_bias = emit_fetch( bld, inst, 0, 3 );
1015 explicit_lod = NULL;
1017 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1019 explicit_lod = emit_fetch( bld, inst, 0, 3 );
1023 explicit_lod = NULL;
1026 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1027 oow = emit_fetch( bld, inst, 0, 3 );
1028 oow = lp_build_rcp(&bld->base, oow);
1031 for (i = 0; i < num_coords; i++) {
1032 coords[i] = emit_fetch( bld, inst, 0, i );
1033 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1034 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
1036 for (i = num_coords; i < 3; i++) {
1037 coords[i] = bld->base.undef;
1040 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1041 LLVMTypeRef i32t = LLVMInt32Type();
1042 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
1043 for (i = 0; i < num_coords; i++) {
1044 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
1045 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
1046 ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, "");
1047 ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, "");
1049 unit = inst->Src[3].Register.Index;
1051 for (i = 0; i < num_coords; i++) {
1052 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
1053 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
1055 unit = inst->Src[1].Register.Index;
1057 for (i = num_coords; i < 3; i++) {
1058 ddx[i] = LLVMGetUndef(bld->base.elem_type);
1059 ddy[i] = LLVMGetUndef(bld->base.elem_type);
1062 bld->sampler->emit_fetch_texel(bld->sampler,
1065 unit, num_coords, coords,
1067 lod_bias, explicit_lod,
1072 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1077 for (i = 0; i < 5; i++) {
1080 if (pc + i >= bld->info->num_instructions)
1083 opcode = bld->instructions[pc + i].Instruction.Opcode;
1085 if (opcode == TGSI_OPCODE_END)
1088 if (opcode == TGSI_OPCODE_TEX ||
1089 opcode == TGSI_OPCODE_TXP ||
1090 opcode == TGSI_OPCODE_TXD ||
1091 opcode == TGSI_OPCODE_TXB ||
1092 opcode == TGSI_OPCODE_TXL ||
1093 opcode == TGSI_OPCODE_TXF ||
1094 opcode == TGSI_OPCODE_TXQ ||
1095 opcode == TGSI_OPCODE_CAL ||
1096 opcode == TGSI_OPCODE_CALLNZ ||
1097 opcode == TGSI_OPCODE_IF ||
1098 opcode == TGSI_OPCODE_IFC ||
1099 opcode == TGSI_OPCODE_BGNLOOP ||
1100 opcode == TGSI_OPCODE_SWITCH)
1110 * Kill fragment if any of the src register values are negative.
1114 struct lp_build_tgsi_soa_context *bld,
1115 const struct tgsi_full_instruction *inst,
1118 const struct tgsi_full_src_register *reg = &inst->Src[0];
1119 LLVMValueRef terms[NUM_CHANNELS];
1121 unsigned chan_index;
1123 memset(&terms, 0, sizeof terms);
1125 FOR_EACH_CHANNEL( chan_index ) {
1128 /* Unswizzle channel */
1129 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1131 /* Check if the component has not been already tested. */
1132 assert(swizzle < NUM_CHANNELS);
1133 if( !terms[swizzle] )
1134 /* TODO: change the comparison operator instead of setting the sign */
1135 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
1139 FOR_EACH_CHANNEL( chan_index ) {
1140 if(terms[chan_index]) {
1141 LLVMValueRef chan_mask;
1144 * If term < 0 then mask = 0 else mask = ~0.
1146 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1149 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
1156 lp_build_mask_update(bld->mask, mask);
1158 if (!near_end_of_shader(bld, pc))
1159 lp_build_mask_check(bld->mask);
1165 * Predicated fragment kill.
1166 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1167 * The only predication is the execution mask which will apply if
1168 * we're inside a loop or conditional.
1171 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1172 const struct tgsi_full_instruction *inst,
1177 /* For those channels which are "alive", disable fragment shader
1180 if (bld->exec_mask.has_mask) {
1181 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
1184 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
1188 lp_build_mask_update(bld->mask, mask);
1190 if (!near_end_of_shader(bld, pc))
1191 lp_build_mask_check(bld->mask);
1196 * Emit code which will dump the value of all the temporary registers
1200 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1202 LLVMBuilderRef builder = bld->base.builder;
1203 LLVMValueRef temp_ptr;
1204 LLVMValueRef i0 = lp_build_const_int32(0);
1205 LLVMValueRef i1 = lp_build_const_int32(1);
1206 LLVMValueRef i2 = lp_build_const_int32(2);
1207 LLVMValueRef i3 = lp_build_const_int32(3);
1209 int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
1211 for (index = 0; index < n; index++) {
1212 LLVMValueRef idx = lp_build_const_int32(index);
1213 LLVMValueRef v[4][4], res;
1216 lp_build_printf(builder, "TEMP[%d]:\n", idx);
1218 for (chan = 0; chan < 4; chan++) {
1219 temp_ptr = get_temp_ptr(bld, index, chan);
1220 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
1221 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1222 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1223 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1224 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1227 lp_build_printf(builder, " X: %f %f %f %f\n",
1228 v[0][0], v[0][1], v[0][2], v[0][3]);
1229 lp_build_printf(builder, " Y: %f %f %f %f\n",
1230 v[1][0], v[1][1], v[1][2], v[1][3]);
1231 lp_build_printf(builder, " Z: %f %f %f %f\n",
1232 v[2][0], v[2][1], v[2][2], v[2][3]);
1233 lp_build_printf(builder, " W: %f %f %f %f\n",
1234 v[3][0], v[3][1], v[3][2], v[3][3]);
1242 struct lp_build_tgsi_soa_context *bld,
1243 const struct tgsi_full_declaration *decl)
1245 LLVMTypeRef vec_type = bld->base.vec_type;
1246 const unsigned first = decl->Range.First;
1247 const unsigned last = decl->Range.Last;
1250 for (idx = first; idx <= last; ++idx) {
1251 assert(last <= bld->info->file_max[decl->Declaration.File]);
1252 switch (decl->Declaration.File) {
1253 case TGSI_FILE_TEMPORARY:
1254 assert(idx < LP_MAX_TGSI_TEMPS);
1255 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1256 /* ignore 'first' - we want to index into a 0-based array */
1257 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
1259 bld->temps_array = lp_build_array_alloca(bld->base.builder,
1260 vec_type, array_size,
1264 for (i = 0; i < NUM_CHANNELS; i++)
1265 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1270 case TGSI_FILE_OUTPUT:
1271 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
1272 /* ignore 'first' - we want to index into a 0-based array */
1273 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
1275 bld->outputs_array = lp_build_array_alloca(bld->base.builder,
1276 vec_type, array_size,
1280 for (i = 0; i < NUM_CHANNELS; i++)
1281 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1282 vec_type, "output");
1286 case TGSI_FILE_ADDRESS:
1287 assert(idx < LP_MAX_TGSI_ADDRS);
1288 for (i = 0; i < NUM_CHANNELS; i++)
1289 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1293 case TGSI_FILE_PREDICATE:
1294 assert(idx < LP_MAX_TGSI_PREDS);
1295 for (i = 0; i < NUM_CHANNELS; i++)
1296 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1297 vec_type, "predicate");
1301 /* don't need to declare other vars */
1309 * Emit LLVM for one TGSI instruction.
1310 * \param return TRUE for success, FALSE otherwise
1314 struct lp_build_tgsi_soa_context *bld,
1315 const struct tgsi_full_instruction *inst,
1316 const struct tgsi_opcode_info *info,
1319 unsigned chan_index;
1320 LLVMValueRef src0, src1, src2;
1321 LLVMValueRef tmp0, tmp1, tmp2;
1322 LLVMValueRef tmp3 = NULL;
1323 LLVMValueRef tmp4 = NULL;
1324 LLVMValueRef tmp5 = NULL;
1325 LLVMValueRef tmp6 = NULL;
1326 LLVMValueRef tmp7 = NULL;
1328 LLVMValueRef dst0[NUM_CHANNELS];
1331 * Stores and write masks are handled in a general fashion after the long
1332 * instruction opcode switch statement.
1334 * Although not stricitly necessary, we avoid generating instructions for
1335 * channels which won't be stored, in cases where's that easy. For some
1336 * complex instructions, like texture sampling, it is more convenient to
1337 * assume a full writemask and then let LLVM optimization passes eliminate
1343 assert(info->num_dst <= 1);
1344 if (info->num_dst) {
1345 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1346 dst0[chan_index] = bld->base.undef;
1350 switch (inst->Instruction.Opcode) {
1351 case TGSI_OPCODE_ARL:
1352 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1353 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1354 tmp0 = lp_build_floor(&bld->base, tmp0);
1355 dst0[chan_index] = tmp0;
1359 case TGSI_OPCODE_MOV:
1360 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1361 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1365 case TGSI_OPCODE_LIT:
1366 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1367 dst0[CHAN_X] = bld->base.one;
1369 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1370 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1371 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1373 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1374 /* XMM[1] = SrcReg[0].yyyy */
1375 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1376 /* XMM[1] = max(XMM[1], 0) */
1377 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1378 /* XMM[2] = SrcReg[0].wwww */
1379 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1380 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1381 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1382 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1383 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1385 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1386 dst0[CHAN_W] = bld->base.one;
1390 case TGSI_OPCODE_RCP:
1391 /* TGSI_OPCODE_RECIP */
1392 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1393 res = lp_build_rcp(&bld->base, src0);
1394 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1395 dst0[chan_index] = res;
1399 case TGSI_OPCODE_RSQ:
1400 /* TGSI_OPCODE_RECIPSQRT */
1401 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1402 src0 = lp_build_abs(&bld->base, src0);
1403 res = lp_build_rsqrt(&bld->base, src0);
1404 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1405 dst0[chan_index] = res;
1409 case TGSI_OPCODE_EXP:
1410 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1411 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1412 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1413 LLVMValueRef *p_exp2_int_part = NULL;
1414 LLVMValueRef *p_frac_part = NULL;
1415 LLVMValueRef *p_exp2 = NULL;
1417 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1419 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1420 p_exp2_int_part = &tmp0;
1421 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1422 p_frac_part = &tmp1;
1423 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1426 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1428 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1429 dst0[CHAN_X] = tmp0;
1430 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1431 dst0[CHAN_Y] = tmp1;
1432 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1433 dst0[CHAN_Z] = tmp2;
1436 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1437 dst0[CHAN_W] = bld->base.one;
1441 case TGSI_OPCODE_LOG:
1442 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1443 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1444 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1445 LLVMValueRef *p_floor_log2 = NULL;
1446 LLVMValueRef *p_exp = NULL;
1447 LLVMValueRef *p_log2 = NULL;
1449 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1450 src0 = lp_build_abs( &bld->base, src0 );
1452 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1453 p_floor_log2 = &tmp0;
1454 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1456 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1459 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1461 /* dst.x = floor(lg2(abs(src.x))) */
1462 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1463 dst0[CHAN_X] = tmp0;
1464 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1465 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1466 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1468 /* dst.z = lg2(abs(src.x)) */
1469 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1470 dst0[CHAN_Z] = tmp2;
1473 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1474 dst0[CHAN_W] = bld->base.one;
1478 case TGSI_OPCODE_MUL:
1479 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1480 src0 = emit_fetch( bld, inst, 0, chan_index );
1481 src1 = emit_fetch( bld, inst, 1, chan_index );
1482 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1486 case TGSI_OPCODE_ADD:
1487 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1488 src0 = emit_fetch( bld, inst, 0, chan_index );
1489 src1 = emit_fetch( bld, inst, 1, chan_index );
1490 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1494 case TGSI_OPCODE_DP3:
1495 /* TGSI_OPCODE_DOT3 */
1496 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1497 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1498 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1499 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1500 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1501 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1502 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1503 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1504 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1505 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1506 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1507 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1508 dst0[chan_index] = tmp0;
1512 case TGSI_OPCODE_DP4:
1513 /* TGSI_OPCODE_DOT4 */
1514 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1515 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1516 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1517 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1518 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1519 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1520 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1521 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1522 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1523 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1524 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1525 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1526 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1527 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1528 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1529 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1530 dst0[chan_index] = tmp0;
1534 case TGSI_OPCODE_DST:
1535 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1536 dst0[CHAN_X] = bld->base.one;
1538 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1539 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1540 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1541 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1543 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1544 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1546 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1547 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1551 case TGSI_OPCODE_MIN:
1552 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1553 src0 = emit_fetch( bld, inst, 0, chan_index );
1554 src1 = emit_fetch( bld, inst, 1, chan_index );
1555 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1559 case TGSI_OPCODE_MAX:
1560 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1561 src0 = emit_fetch( bld, inst, 0, chan_index );
1562 src1 = emit_fetch( bld, inst, 1, chan_index );
1563 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1567 case TGSI_OPCODE_SLT:
1568 /* TGSI_OPCODE_SETLT */
1569 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1570 src0 = emit_fetch( bld, inst, 0, chan_index );
1571 src1 = emit_fetch( bld, inst, 1, chan_index );
1572 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1573 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1577 case TGSI_OPCODE_SGE:
1578 /* TGSI_OPCODE_SETGE */
1579 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1580 src0 = emit_fetch( bld, inst, 0, chan_index );
1581 src1 = emit_fetch( bld, inst, 1, chan_index );
1582 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1583 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1587 case TGSI_OPCODE_MAD:
1588 /* TGSI_OPCODE_MADD */
1589 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1590 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1591 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1592 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1593 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1594 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1595 dst0[chan_index] = tmp0;
1599 case TGSI_OPCODE_SUB:
1600 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1601 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1602 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1603 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1607 case TGSI_OPCODE_LRP:
1608 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1609 src0 = emit_fetch( bld, inst, 0, chan_index );
1610 src1 = emit_fetch( bld, inst, 1, chan_index );
1611 src2 = emit_fetch( bld, inst, 2, chan_index );
1612 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1613 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1614 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1618 case TGSI_OPCODE_CND:
1619 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1620 src0 = emit_fetch( bld, inst, 0, chan_index );
1621 src1 = emit_fetch( bld, inst, 1, chan_index );
1622 src2 = emit_fetch( bld, inst, 2, chan_index );
1623 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1624 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1625 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1629 case TGSI_OPCODE_DP2A:
1630 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1631 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1632 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1633 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1634 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1635 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1636 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1637 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1638 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1639 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1640 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1644 case TGSI_OPCODE_FRC:
1645 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1646 src0 = emit_fetch( bld, inst, 0, chan_index );
1647 tmp0 = lp_build_floor(&bld->base, src0);
1648 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1649 dst0[chan_index] = tmp0;
1653 case TGSI_OPCODE_CLAMP:
1654 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1655 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1656 src1 = emit_fetch( bld, inst, 1, chan_index );
1657 src2 = emit_fetch( bld, inst, 2, chan_index );
1658 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1659 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1660 dst0[chan_index] = tmp0;
1664 case TGSI_OPCODE_FLR:
1665 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1666 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1667 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1671 case TGSI_OPCODE_ROUND:
1672 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1673 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1674 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1678 case TGSI_OPCODE_EX2: {
1679 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1680 tmp0 = lp_build_exp2( &bld->base, tmp0);
1681 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1682 dst0[chan_index] = tmp0;
1687 case TGSI_OPCODE_LG2:
1688 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1689 tmp0 = lp_build_log2( &bld->base, tmp0);
1690 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1691 dst0[chan_index] = tmp0;
1695 case TGSI_OPCODE_POW:
1696 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1697 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1698 res = lp_build_pow( &bld->base, src0, src1 );
1699 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1700 dst0[chan_index] = res;
1704 case TGSI_OPCODE_XPD:
1705 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1706 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1707 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1708 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1710 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1711 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1712 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1713 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1715 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1717 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1719 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1720 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1721 dst0[CHAN_X] = tmp2;
1723 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1724 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1725 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1726 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1728 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1729 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1730 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1731 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1732 dst0[CHAN_Y] = tmp3;
1734 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1735 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1736 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1737 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1738 dst0[CHAN_Z] = tmp5;
1740 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1741 dst0[CHAN_W] = bld->base.one;
1745 case TGSI_OPCODE_ABS:
1746 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1747 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1748 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1752 case TGSI_OPCODE_RCC:
1757 case TGSI_OPCODE_DPH:
1758 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1759 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1760 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1761 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1762 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1763 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1764 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1765 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1766 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1767 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1768 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1769 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1770 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1771 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1772 dst0[chan_index] = tmp0;
1776 case TGSI_OPCODE_COS:
1777 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1778 tmp0 = lp_build_cos( &bld->base, tmp0 );
1779 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1780 dst0[chan_index] = tmp0;
1784 case TGSI_OPCODE_DDX:
1785 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1786 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1790 case TGSI_OPCODE_DDY:
1791 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1792 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1796 case TGSI_OPCODE_KILP:
1797 /* predicated kill */
1798 emit_kilp( bld, inst, (*pc)-1 );
1801 case TGSI_OPCODE_KIL:
1802 /* conditional kill */
1803 emit_kil( bld, inst, (*pc)-1 );
1806 case TGSI_OPCODE_PK2H:
1810 case TGSI_OPCODE_PK2US:
1814 case TGSI_OPCODE_PK4B:
1818 case TGSI_OPCODE_PK4UB:
1822 case TGSI_OPCODE_RFL:
1826 case TGSI_OPCODE_SEQ:
1827 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1828 src0 = emit_fetch( bld, inst, 0, chan_index );
1829 src1 = emit_fetch( bld, inst, 1, chan_index );
1830 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1831 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1835 case TGSI_OPCODE_SFL:
1836 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1837 dst0[chan_index] = bld->base.zero;
1841 case TGSI_OPCODE_SGT:
1842 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1843 src0 = emit_fetch( bld, inst, 0, chan_index );
1844 src1 = emit_fetch( bld, inst, 1, chan_index );
1845 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1846 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1850 case TGSI_OPCODE_SIN:
1851 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1852 tmp0 = lp_build_sin( &bld->base, tmp0 );
1853 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1854 dst0[chan_index] = tmp0;
1858 case TGSI_OPCODE_SLE:
1859 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1860 src0 = emit_fetch( bld, inst, 0, chan_index );
1861 src1 = emit_fetch( bld, inst, 1, chan_index );
1862 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1863 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1867 case TGSI_OPCODE_SNE:
1868 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1869 src0 = emit_fetch( bld, inst, 0, chan_index );
1870 src1 = emit_fetch( bld, inst, 1, chan_index );
1871 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1872 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1876 case TGSI_OPCODE_STR:
1877 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1878 dst0[chan_index] = bld->base.one;
1882 case TGSI_OPCODE_TEX:
1883 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
1886 case TGSI_OPCODE_TXD:
1887 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1890 case TGSI_OPCODE_UP2H:
1896 case TGSI_OPCODE_UP2US:
1902 case TGSI_OPCODE_UP4B:
1908 case TGSI_OPCODE_UP4UB:
1914 case TGSI_OPCODE_X2D:
1920 case TGSI_OPCODE_ARA:
1926 case TGSI_OPCODE_ARR:
1927 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1928 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1929 tmp0 = lp_build_round(&bld->base, tmp0);
1930 dst0[chan_index] = tmp0;
1934 case TGSI_OPCODE_BRA:
1940 case TGSI_OPCODE_CAL:
1941 lp_exec_mask_call(&bld->exec_mask,
1947 case TGSI_OPCODE_RET:
1948 lp_exec_mask_ret(&bld->exec_mask, pc);
1951 case TGSI_OPCODE_END:
1954 emit_dump_temps(bld);
1959 case TGSI_OPCODE_SSG:
1960 /* TGSI_OPCODE_SGN */
1961 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1962 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1963 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1967 case TGSI_OPCODE_CMP:
1968 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1969 src0 = emit_fetch( bld, inst, 0, chan_index );
1970 src1 = emit_fetch( bld, inst, 1, chan_index );
1971 src2 = emit_fetch( bld, inst, 2, chan_index );
1972 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1973 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1977 case TGSI_OPCODE_SCS:
1978 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1979 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1980 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1982 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1983 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1984 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1986 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1987 dst0[CHAN_Z] = bld->base.zero;
1989 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1990 dst0[CHAN_W] = bld->base.one;
1994 case TGSI_OPCODE_TXB:
1995 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
1998 case TGSI_OPCODE_NRM:
2000 case TGSI_OPCODE_NRM4:
2001 /* 3 or 4-component normalization */
2003 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2005 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
2006 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
2007 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
2008 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
2010 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2013 /* xmm0 = src.x * src.x */
2014 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2015 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2018 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
2021 /* xmm0 = xmm0 + src.y * src.y */
2022 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
2023 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2026 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2027 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2030 /* xmm0 = xmm0 + src.z * src.z */
2031 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
2032 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2035 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2036 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2040 /* xmm0 = xmm0 + src.w * src.w */
2041 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
2042 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
2045 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2046 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2049 /* xmm1 = 1 / sqrt(xmm0) */
2050 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
2052 /* dst.x = xmm1 * src.x */
2053 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2054 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
2057 /* dst.y = xmm1 * src.y */
2058 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2059 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
2062 /* dst.z = xmm1 * src.z */
2063 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2064 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
2067 /* dst.w = xmm1 * src.w */
2068 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
2069 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
2074 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
2075 dst0[CHAN_W] = bld->base.one;
2080 case TGSI_OPCODE_DIV:
2086 case TGSI_OPCODE_DP2:
2087 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
2088 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
2089 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
2090 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
2091 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
2092 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
2093 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
2094 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2095 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
2099 case TGSI_OPCODE_TXL:
2100 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
2103 case TGSI_OPCODE_TXP:
2104 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
2107 case TGSI_OPCODE_BRK:
2108 lp_exec_break(&bld->exec_mask);
2111 case TGSI_OPCODE_IF:
2112 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2113 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2114 tmp0, bld->base.zero);
2115 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2118 case TGSI_OPCODE_BGNLOOP:
2119 lp_exec_bgnloop(&bld->exec_mask);
2122 case TGSI_OPCODE_BGNSUB:
2123 lp_exec_mask_bgnsub(&bld->exec_mask);
2126 case TGSI_OPCODE_ELSE:
2127 lp_exec_mask_cond_invert(&bld->exec_mask);
2130 case TGSI_OPCODE_ENDIF:
2131 lp_exec_mask_cond_pop(&bld->exec_mask);
2134 case TGSI_OPCODE_ENDLOOP:
2135 lp_exec_endloop(&bld->exec_mask);
2138 case TGSI_OPCODE_ENDSUB:
2139 lp_exec_mask_endsub(&bld->exec_mask, pc);
2142 case TGSI_OPCODE_PUSHA:
2148 case TGSI_OPCODE_POPA:
2154 case TGSI_OPCODE_CEIL:
2155 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2156 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2157 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2161 case TGSI_OPCODE_I2F:
2167 case TGSI_OPCODE_NOT:
2173 case TGSI_OPCODE_TRUNC:
2174 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2175 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2176 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2180 case TGSI_OPCODE_SHL:
2186 case TGSI_OPCODE_ISHR:
2192 case TGSI_OPCODE_AND:
2198 case TGSI_OPCODE_OR:
2204 case TGSI_OPCODE_MOD:
2210 case TGSI_OPCODE_XOR:
2216 case TGSI_OPCODE_SAD:
2222 case TGSI_OPCODE_TXF:
2228 case TGSI_OPCODE_TXQ:
2234 case TGSI_OPCODE_CONT:
2235 lp_exec_continue(&bld->exec_mask);
2238 case TGSI_OPCODE_EMIT:
2242 case TGSI_OPCODE_ENDPRIM:
2246 case TGSI_OPCODE_NOP:
2254 LLVMValueRef pred[NUM_CHANNELS];
2256 emit_fetch_predicate( bld, inst, pred );
2258 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2259 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2268 lp_build_tgsi_soa(LLVMBuilderRef builder,
2269 const struct tgsi_token *tokens,
2270 struct lp_type type,
2271 struct lp_build_mask_context *mask,
2272 LLVMValueRef consts_ptr,
2273 const LLVMValueRef *pos,
2274 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2275 LLVMValueRef (*outputs)[NUM_CHANNELS],
2276 struct lp_build_sampler_soa *sampler,
2277 const struct tgsi_shader_info *info)
2279 struct lp_build_tgsi_soa_context bld;
2280 struct tgsi_parse_context parse;
2281 uint num_immediates = 0;
2282 uint num_instructions = 0;
2286 struct lp_type res_type;
2288 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2289 memset(&res_type, 0, sizeof res_type);
2290 res_type.width = type.width;
2291 res_type.length = type.length;
2294 /* Setup build context */
2295 memset(&bld, 0, sizeof bld);
2296 lp_build_context_init(&bld.base, builder, type);
2297 lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type));
2298 lp_build_context_init(&bld.elem_bld, builder, lp_elem_type(type));
2301 bld.inputs = inputs;
2302 bld.outputs = outputs;
2303 bld.consts_ptr = consts_ptr;
2304 bld.sampler = sampler;
2306 bld.indirect_files = info->indirect_files;
2307 bld.instructions = (struct tgsi_full_instruction *)
2308 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2309 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2311 if (!bld.instructions) {
2315 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2317 tgsi_parse_init( &parse, tokens );
2319 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2320 tgsi_parse_token( &parse );
2322 switch( parse.FullToken.Token.Type ) {
2323 case TGSI_TOKEN_TYPE_DECLARATION:
2324 /* Inputs already interpolated */
2325 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2328 case TGSI_TOKEN_TYPE_INSTRUCTION:
2330 /* save expanded instruction */
2331 if (num_instructions == bld.max_instructions) {
2332 struct tgsi_full_instruction *instructions;
2333 instructions = REALLOC(bld.instructions,
2334 bld.max_instructions
2335 * sizeof(struct tgsi_full_instruction),
2336 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2337 * sizeof(struct tgsi_full_instruction));
2338 if (!instructions) {
2341 bld.instructions = instructions;
2342 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2345 memcpy(bld.instructions + num_instructions,
2346 &parse.FullToken.FullInstruction,
2347 sizeof(bld.instructions[0]));
2354 case TGSI_TOKEN_TYPE_IMMEDIATE:
2355 /* simply copy the immediate values into the next immediates[] slot */
2357 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2359 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2360 for( i = 0; i < size; ++i )
2361 bld.immediates[num_immediates][i] =
2362 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2363 for( i = size; i < 4; ++i )
2364 bld.immediates[num_immediates][i] = bld.base.undef;
2369 case TGSI_TOKEN_TYPE_PROPERTY:
2378 struct tgsi_full_instruction *instr = bld.instructions + pc;
2379 const struct tgsi_opcode_info *opcode_info =
2380 tgsi_get_opcode_info(instr->Instruction.Opcode);
2381 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2382 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2383 opcode_info->mnemonic);
2386 /* If we have indirect addressing in outputs we need to copy our alloca array
2387 * to the outputs slots specified by the called */
2388 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2389 tgsi_parse_init(&parse, tokens);
2390 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2391 tgsi_parse_token( &parse );
2393 switch( parse.FullToken.Token.Type ) {
2394 case TGSI_TOKEN_TYPE_DECLARATION: {
2395 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
2396 /* Inputs already interpolated */
2397 if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
2398 unsigned idx = decl->Range.Last;
2399 const unsigned first = decl->Range.First;
2400 const unsigned last = decl->Range.Last;
2401 for (idx = first; idx <= last; ++idx)
2402 for (i = 0; i < NUM_CHANNELS; i++)
2403 bld.outputs[idx][i] = get_output_ptr(&bld, idx, i);
2412 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2413 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2414 debug_printf("11111111111111111111111111111 \n");
2415 tgsi_dump(tokens, 0);
2416 lp_debug_dump_value(function);
2417 debug_printf("2222222222222222222222222222 \n");
2419 tgsi_parse_free( &parse );
2422 LLVMModuleRef module = LLVMGetGlobalParent(
2423 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2424 LLVMDumpModule(module);
2428 FREE( bld.instructions );