1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
31 * TGSI to LLVM IR translation -- SoA.
33 * @author Jose Fonseca <jfonseca@vmware.com>
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_tgsi_action.h"
51 #include "lp_bld_type.h"
52 #include "lp_bld_const.h"
53 #include "lp_bld_arit.h"
54 #include "lp_bld_bitarit.h"
55 #include "lp_bld_gather.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_logic.h"
58 #include "lp_bld_swizzle.h"
59 #include "lp_bld_flow.h"
60 #include "lp_bld_quad.h"
61 #include "lp_bld_tgsi.h"
62 #include "lp_bld_limits.h"
63 #include "lp_bld_debug.h"
64 #include "lp_bld_printf.h"
65 #include "lp_bld_sample.h"
66 #include "lp_bld_struct.h"
68 #define DUMP_GS_EMITS 0
70 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
72 LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
73 LLVMBuilderRef builder = bld->gallivm->builder;
76 mask->has_mask = FALSE;
77 mask->ret_in_main = FALSE;
78 mask->cond_stack_size = 0;
79 mask->loop_stack_size = 0;
80 mask->call_stack_size = 0;
81 mask->switch_stack_size = 0;
83 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
84 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
85 mask->cond_mask = mask->switch_mask =
86 LLVMConstAllOnes(mask->int_vec_type);
88 mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
92 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
96 static void lp_exec_mask_update(struct lp_exec_mask *mask)
98 LLVMBuilderRef builder = mask->bld->gallivm->builder;
100 if (mask->loop_stack_size) {
101 /*for loops we need to update the entire mask at runtime */
103 assert(mask->break_mask);
104 tmp = LLVMBuildAnd(builder,
108 mask->exec_mask = LLVMBuildAnd(builder,
113 mask->exec_mask = mask->cond_mask;
115 if (mask->switch_stack_size) {
116 mask->exec_mask = LLVMBuildAnd(builder,
122 if (mask->call_stack_size || mask->ret_in_main) {
123 mask->exec_mask = LLVMBuildAnd(builder,
129 mask->has_mask = (mask->cond_stack_size > 0 ||
130 mask->loop_stack_size > 0 ||
131 mask->call_stack_size > 0 ||
132 mask->switch_stack_size > 0 ||
136 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
139 LLVMBuilderRef builder = mask->bld->gallivm->builder;
141 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
142 if (mask->cond_stack_size == 0) {
143 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
145 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
146 assert(LLVMTypeOf(val) == mask->int_vec_type);
147 mask->cond_mask = LLVMBuildAnd(builder,
151 lp_exec_mask_update(mask);
154 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
156 LLVMBuilderRef builder = mask->bld->gallivm->builder;
157 LLVMValueRef prev_mask;
158 LLVMValueRef inv_mask;
160 assert(mask->cond_stack_size);
161 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
162 if (mask->cond_stack_size == 1) {
163 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
166 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
168 mask->cond_mask = LLVMBuildAnd(builder,
171 lp_exec_mask_update(mask);
174 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
176 assert(mask->cond_stack_size);
177 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
178 lp_exec_mask_update(mask);
181 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
183 LLVMBuilderRef builder = mask->bld->gallivm->builder;
185 if (mask->loop_stack_size == 0) {
186 assert(mask->loop_block == NULL);
187 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
188 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
189 assert(mask->break_var == NULL);
192 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
194 mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
196 mask->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
198 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
199 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
200 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
201 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
202 ++mask->loop_stack_size;
204 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
205 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
207 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
209 LLVMBuildBr(builder, mask->loop_block);
210 LLVMPositionBuilderAtEnd(builder, mask->loop_block);
212 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
214 lp_exec_mask_update(mask);
217 static void lp_exec_break(struct lp_exec_mask *mask,
218 struct lp_build_tgsi_context * bld_base)
220 LLVMBuilderRef builder = mask->bld->gallivm->builder;
222 if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
223 LLVMValueRef exec_mask = LLVMBuildNot(builder,
227 mask->break_mask = LLVMBuildAnd(builder,
229 exec_mask, "break_full");
232 unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
233 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
234 opcode == TGSI_OPCODE_CASE);
237 if (mask->switch_in_default) {
239 * stop default execution but only if this is an unconditional switch.
240 * (The condition here is not perfect since dead code after break is
241 * allowed but should be sufficient since false negatives are just
242 * unoptimized - so we don't have to pre-evaluate that).
244 if(break_always && mask->switch_pc) {
245 bld_base->pc = mask->switch_pc;
251 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
254 LLVMValueRef exec_mask = LLVMBuildNot(builder,
257 mask->switch_mask = LLVMBuildAnd(builder,
259 exec_mask, "break_switch");
263 lp_exec_mask_update(mask);
266 static void lp_exec_break_condition(struct lp_exec_mask *mask,
269 LLVMBuilderRef builder = mask->bld->gallivm->builder;
270 LLVMValueRef cond_mask = LLVMBuildAnd(builder,
273 cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
275 if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
276 mask->break_mask = LLVMBuildAnd(builder,
278 cond_mask, "breakc_full");
281 mask->switch_mask = LLVMBuildAnd(builder,
283 cond_mask, "breakc_switch");
286 lp_exec_mask_update(mask);
289 static void lp_exec_continue(struct lp_exec_mask *mask)
291 LLVMBuilderRef builder = mask->bld->gallivm->builder;
292 LLVMValueRef exec_mask = LLVMBuildNot(builder,
296 mask->cont_mask = LLVMBuildAnd(builder,
300 lp_exec_mask_update(mask);
304 static void lp_exec_endloop(struct gallivm_state *gallivm,
305 struct lp_exec_mask *mask)
307 LLVMBuilderRef builder = mask->bld->gallivm->builder;
308 LLVMBasicBlockRef endloop;
309 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
310 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
311 mask->bld->type.width *
312 mask->bld->type.length);
313 LLVMValueRef i1cond, i2cond, icond, limiter;
315 assert(mask->break_mask);
318 * Restore the cont_mask, but don't pop
320 assert(mask->loop_stack_size);
321 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
322 lp_exec_mask_update(mask);
325 * Unlike the continue mask, the break_mask must be preserved across loop
328 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
330 /* Decrement the loop limiter */
331 limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
333 limiter = LLVMBuildSub(
336 LLVMConstInt(int_type, 1, false),
339 LLVMBuildStore(builder, limiter, mask->loop_limiter);
341 /* i1cond = (mask != 0) */
342 i1cond = LLVMBuildICmp(
345 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
346 LLVMConstNull(reg_type), "i1cond");
348 /* i2cond = (looplimiter > 0) */
349 i2cond = LLVMBuildICmp(
353 LLVMConstNull(int_type), "i2cond");
355 /* if( i1cond && i2cond ) */
356 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
358 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
360 LLVMBuildCondBr(builder,
361 icond, mask->loop_block, endloop);
363 LLVMPositionBuilderAtEnd(builder, endloop);
365 assert(mask->loop_stack_size);
366 --mask->loop_stack_size;
367 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
368 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
369 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
370 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
371 mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
373 lp_exec_mask_update(mask);
376 static void lp_exec_switch(struct lp_exec_mask *mask,
377 LLVMValueRef switchval)
379 mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
381 mask->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
383 mask->switch_stack[mask->switch_stack_size].switch_val = mask->switch_val;
384 mask->switch_stack[mask->switch_stack_size].switch_mask = mask->switch_mask;
385 mask->switch_stack[mask->switch_stack_size].switch_mask_default = mask->switch_mask_default;
386 mask->switch_stack[mask->switch_stack_size].switch_in_default = mask->switch_in_default;
387 mask->switch_stack[mask->switch_stack_size].switch_pc = mask->switch_pc;
388 mask->switch_stack_size++;
390 mask->switch_val = switchval;
391 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
392 mask->switch_mask_default = LLVMConstNull(mask->int_vec_type);
393 mask->switch_in_default = false;
396 lp_exec_mask_update(mask);
399 static void lp_exec_endswitch(struct lp_exec_mask *mask,
400 struct lp_build_tgsi_context * bld_base)
402 LLVMBuilderRef builder = mask->bld->gallivm->builder;
404 /* check if there's deferred default if so do it now */
405 if (mask->switch_pc && !mask->switch_in_default) {
406 LLVMValueRef prevmask, defaultmask;
408 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
409 defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
410 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
411 mask->switch_in_default = true;
413 lp_exec_mask_update(mask);
415 assert(bld_base->instructions[mask->switch_pc - 1].Instruction.Opcode ==
416 TGSI_OPCODE_DEFAULT);
418 tmp_pc = bld_base->pc;
419 bld_base->pc = mask->switch_pc;
421 * re-purpose switch_pc to point to here again, since we stop execution of
422 * the deferred default after next break.
424 mask->switch_pc = tmp_pc - 1;
429 else if (mask->switch_pc && mask->switch_in_default) {
430 assert(bld_base->pc == mask->switch_pc + 1);
433 mask->switch_stack_size--;
434 mask->switch_val = mask->switch_stack[mask->switch_stack_size].switch_val;
435 mask->switch_mask = mask->switch_stack[mask->switch_stack_size].switch_mask;
436 mask->switch_mask_default = mask->switch_stack[mask->switch_stack_size].switch_mask_default;
437 mask->switch_in_default = mask->switch_stack[mask->switch_stack_size].switch_in_default;
438 mask->switch_pc = mask->switch_stack[mask->switch_stack_size].switch_pc;
440 mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
442 lp_exec_mask_update(mask);
445 static void lp_exec_case(struct lp_exec_mask *mask,
446 LLVMValueRef caseval)
448 LLVMBuilderRef builder = mask->bld->gallivm->builder;
450 LLVMValueRef casemask, prevmask;
452 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
453 if (!mask->switch_in_default) {
454 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
455 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, mask->switch_val);
456 mask->switch_mask_default = LLVMBuildOr(builder, casemask,
457 mask->switch_mask_default, "sw_default_mask");
458 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
459 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
461 lp_exec_mask_update(mask);
466 * Analyse default statement in a switch.
467 * \return true if default is last statement, false otherwise
468 * \param default_pc_start contains pc of instruction to jump to
469 * if default wasn't last but there's no
470 * fallthrough into default.
472 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
473 struct lp_build_tgsi_context * bld_base,
474 int *default_pc_start)
476 unsigned pc = bld_base->pc;
477 unsigned curr_switch_stack = mask->switch_stack_size;
479 /* skip over case statements which are together with default */
480 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
484 while (pc != -1 && pc < bld_base->num_instructions) {
485 unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
487 case TGSI_OPCODE_CASE:
488 if (curr_switch_stack == mask->switch_stack_size) {
489 *default_pc_start = pc - 1;
493 case TGSI_OPCODE_SWITCH:
496 case TGSI_OPCODE_ENDSWITCH:
497 if (curr_switch_stack == mask->switch_stack_size) {
498 *default_pc_start = pc - 1;
506 /* should never arrive here */
511 static void lp_exec_default(struct lp_exec_mask *mask,
512 struct lp_build_tgsi_context * bld_base)
514 LLVMBuilderRef builder = mask->bld->gallivm->builder;
517 boolean default_is_last;
520 * This is a messy opcode, because it may not be always at the end and
521 * there can be fallthrough in and out of it.
524 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
526 * If it is last statement in switch (note that case statements appearing
527 * "at the same time" as default don't change that) everything is just fine,
528 * update switch mask and go on. This means we can handle default with
529 * fallthrough INTO it without overhead, if it is last.
531 if (default_is_last) {
532 LLVMValueRef prevmask, defaultmask;
533 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
534 defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
535 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
536 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
537 mask->switch_in_default = true;
539 lp_exec_mask_update(mask);
543 * Technically, "case" immediately before default isn't really a
544 * fallthrough, however we still have to count them as such as we
545 * already have updated the masks.
546 * If that happens in practice could add a switch optimizer pass
547 * which just gets rid of all case statements appearing together with
548 * default (or could do switch analysis at switch start time instead).
550 unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
551 boolean ft_into = (opcode != TGSI_OPCODE_BRK ||
552 opcode != TGSI_OPCODE_SWITCH);
554 * If it is not last statement and there was no fallthrough into it,
555 * we record the PC and continue execution at next case (again, those
556 * case encountered at the same time don't count). At endswitch
557 * time, we update switchmask, and go back executing the code we skipped
558 * until the next break (possibly re-executing some code with changed mask
559 * if there was a fallthrough out of default).
560 * Finally, if it is not last statement and there was a fallthrough into it,
561 * do the same as with the former case, except instead of skipping the code
562 * just execute it without updating the mask, then go back and re-execute.
564 mask->switch_pc = bld_base->pc;
566 bld_base->pc = default_exec_pc;
572 /* stores val into an address pointed to by dst_ptr.
573 * mask->exec_mask is used to figure out which bits of val
574 * should be stored into the address
575 * (0 means don't store this bit, 1 means do store).
577 static void lp_exec_mask_store(struct lp_exec_mask *mask,
578 struct lp_build_context *bld_store,
581 LLVMValueRef dst_ptr)
583 LLVMBuilderRef builder = mask->bld->gallivm->builder;
585 assert(lp_check_value(bld_store->type, val));
586 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
587 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
589 /* Mix the predicate and execution mask */
590 if (mask->has_mask) {
592 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
594 pred = mask->exec_mask;
599 LLVMValueRef res, dst;
601 dst = LLVMBuildLoad(builder, dst_ptr, "");
602 res = lp_build_select(bld_store, pred, val, dst);
603 LLVMBuildStore(builder, res, dst_ptr);
605 LLVMBuildStore(builder, val, dst_ptr);
608 static void lp_exec_mask_call(struct lp_exec_mask *mask,
612 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
613 mask->call_stack[mask->call_stack_size].pc = *pc;
614 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
615 mask->call_stack_size++;
619 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
621 LLVMBuilderRef builder = mask->bld->gallivm->builder;
622 LLVMValueRef exec_mask;
624 if (mask->cond_stack_size == 0 &&
625 mask->loop_stack_size == 0 &&
626 mask->switch_stack_size == 0 &&
627 mask->call_stack_size == 0) {
628 /* returning from main() */
633 if (mask->call_stack_size == 0) {
635 * This requires special handling since we need to ensure
636 * we don't drop the mask even if we have no call stack
637 * (e.g. after a ret in a if clause after the endif)
639 mask->ret_in_main = TRUE;
642 exec_mask = LLVMBuildNot(builder,
646 mask->ret_mask = LLVMBuildAnd(builder,
648 exec_mask, "ret_full");
650 lp_exec_mask_update(mask);
653 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
657 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
659 assert(mask->call_stack_size);
660 mask->call_stack_size--;
661 *pc = mask->call_stack[mask->call_stack_size].pc;
662 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
663 lp_exec_mask_update(mask);
668 * Return pointer to a temporary register channel (src or dest).
669 * Note that indirect addressing cannot be handled here.
670 * \param index which temporary register
671 * \param chan which channel of the temp register.
674 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
678 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
680 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
681 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
682 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
685 return bld->temps[index][chan];
690 * Return pointer to a output register channel (src or dest).
691 * Note that indirect addressing cannot be handled here.
692 * \param index which output register
693 * \param chan which channel of the output register.
696 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
700 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
702 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
703 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
705 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
708 return bld->outputs[index][chan];
713 * If we have indirect addressing in outputs copy our alloca array
714 * to the outputs slots specified by the caller to make sure
715 * our outputs are delivered consistently via the same interface.
718 gather_outputs(struct lp_build_tgsi_soa_context * bld)
720 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
721 unsigned index, chan;
722 assert(bld->bld_base.info->num_outputs <=
723 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
724 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
725 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
726 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
734 * XXX the lp_build_gather() function should be capable of doing this
735 * with a little work.
738 build_gather(struct lp_build_context *bld,
739 LLVMValueRef base_ptr,
740 LLVMValueRef indexes)
742 LLVMBuilderRef builder = bld->gallivm->builder;
743 LLVMValueRef res = bld->undef;
747 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
749 for (i = 0; i < bld->type.length; i++) {
750 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
751 LLVMValueRef index = LLVMBuildExtractElement(builder,
753 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
754 &index, 1, "gather_ptr");
755 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
757 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
765 * Scatter/store vector.
768 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
769 LLVMValueRef base_ptr,
770 LLVMValueRef indexes,
772 struct lp_exec_mask *mask,
775 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
776 LLVMBuilderRef builder = gallivm->builder;
779 /* Mix the predicate and execution mask */
780 if (mask->has_mask) {
782 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
785 pred = mask->exec_mask;
790 * Loop over elements of index_vec, store scalar value.
792 for (i = 0; i < bld->bld_base.base.type.length; i++) {
793 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
794 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
795 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
796 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
797 LLVMValueRef scalar_pred = pred ?
798 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
801 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
802 ii, val, index, scalar_ptr);
805 LLVMValueRef real_val, dst_val;
806 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
807 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
808 LLVMBuildStore(builder, real_val, scalar_ptr);
811 LLVMBuildStore(builder, val, scalar_ptr);
818 * Read the current value of the ADDR register, convert the floats to
819 * ints, add the base index and return the vector of offsets.
820 * The offsets will be used to index into the constant buffer or
821 * temporary register file.
824 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
825 unsigned reg_file, unsigned reg_index,
826 const struct tgsi_ind_register *indirect_reg)
828 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
829 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
830 /* always use X component of address register */
831 unsigned swizzle = indirect_reg->Swizzle;
834 LLVMValueRef max_index;
837 assert(bld->indirect_files & (1 << reg_file));
839 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
842 switch (indirect_reg->File) {
843 case TGSI_FILE_ADDRESS:
844 rel = LLVMBuildLoad(builder,
845 bld->addr[indirect_reg->Index][swizzle],
847 /* ADDR LLVM values already have LLVM integer type. */
849 case TGSI_FILE_TEMPORARY:
850 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
851 rel = LLVMBuildLoad(builder, rel, "load temp reg");
852 /* TEMP LLVM values always have LLVM float type, but for indirection, the
853 * value actually stored is expected to be an integer */
854 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
858 rel = uint_bld->zero;
861 index = lp_build_add(uint_bld, base, rel);
863 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
865 bld->bld_base.info->file_max[reg_file]);
867 assert(!uint_bld->type.sign);
868 index = lp_build_min(uint_bld, index, max_index);
873 static struct lp_build_context *
874 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
875 enum tgsi_opcode_type stype)
877 struct lp_build_context *bld_fetch;
880 case TGSI_TYPE_FLOAT:
881 case TGSI_TYPE_UNTYPED:
882 bld_fetch = &bld_base->base;
884 case TGSI_TYPE_UNSIGNED:
885 bld_fetch = &bld_base->uint_bld;
887 case TGSI_TYPE_SIGNED:
888 bld_fetch = &bld_base->int_bld;
891 case TGSI_TYPE_DOUBLE:
902 struct lp_build_tgsi_context * bld_base,
903 const struct tgsi_full_src_register * reg,
904 enum tgsi_opcode_type stype,
907 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
908 struct gallivm_state *gallivm = bld_base->base.gallivm;
909 LLVMBuilderRef builder = gallivm->builder;
910 struct lp_build_context *uint_bld = &bld_base->uint_bld;
911 LLVMValueRef indirect_index = NULL;
912 unsigned dimension = 0;
913 LLVMValueRef dimension_index;
914 LLVMValueRef consts_ptr;
917 /* XXX: Handle fetching xyzw components as a vector */
918 assert(swizzle != ~0);
920 if (reg->Register.Dimension) {
921 assert(!reg->Dimension.Indirect);
922 dimension = reg->Dimension.Index;
923 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
926 dimension_index = lp_build_const_int32(gallivm, dimension);
927 consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, dimension_index);
929 if (reg->Register.Indirect) {
930 indirect_index = get_indirect_index(bld,
936 if (reg->Register.Indirect) {
937 LLVMValueRef swizzle_vec =
938 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
939 LLVMValueRef index_vec; /* index into the const buffer */
941 /* index_vec = indirect_index * 4 + swizzle */
942 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
943 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
945 /* Gather values from the constant buffer */
946 res = build_gather(&bld_base->base, consts_ptr, index_vec);
949 LLVMValueRef index; /* index into the const buffer */
950 LLVMValueRef scalar, scalar_ptr;
952 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
954 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
956 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
957 res = lp_build_broadcast_scalar(&bld_base->base, scalar);
960 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
961 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
962 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
968 emit_fetch_immediate(
969 struct lp_build_tgsi_context * bld_base,
970 const struct tgsi_full_src_register * reg,
971 enum tgsi_opcode_type stype,
974 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
975 LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
978 if (stype == TGSI_TYPE_UNSIGNED) {
979 res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type);
980 } else if (stype == TGSI_TYPE_SIGNED) {
981 res = LLVMConstBitCast(res, bld_base->int_bld.vec_type);
988 struct lp_build_tgsi_context * bld_base,
989 const struct tgsi_full_src_register * reg,
990 enum tgsi_opcode_type stype,
993 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
994 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
995 LLVMBuilderRef builder = gallivm->builder;
996 struct lp_build_context *uint_bld = &bld_base->uint_bld;
997 LLVMValueRef indirect_index = NULL;
1000 if (reg->Register.Indirect) {
1001 indirect_index = get_indirect_index(bld,
1003 reg->Register.Index,
1007 if (reg->Register.Indirect) {
1008 LLVMValueRef swizzle_vec =
1009 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1010 LLVMValueRef length_vec =
1011 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
1012 LLVMValueRef index_vec; /* index into the const buffer */
1013 LLVMValueRef inputs_array;
1014 LLVMTypeRef float4_ptr_type;
1016 /* index_vec = (indirect_index * 4 + swizzle) * length */
1017 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1018 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1019 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1021 /* cast inputs_array pointer to float* */
1022 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1023 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
1024 float4_ptr_type, "");
1026 /* Gather values from the temporary register array */
1027 res = build_gather(&bld_base->base, inputs_array, index_vec);
1029 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1030 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1031 reg->Register.Index * 4 + swizzle);
1032 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1033 bld->inputs_array, &lindex, 1, "");
1034 res = LLVMBuildLoad(builder, input_ptr, "");
1037 res = bld->inputs[reg->Register.Index][swizzle];
1043 if (stype == TGSI_TYPE_UNSIGNED) {
1044 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1045 } else if (stype == TGSI_TYPE_SIGNED) {
1046 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1054 emit_fetch_gs_input(
1055 struct lp_build_tgsi_context * bld_base,
1056 const struct tgsi_full_src_register * reg,
1057 enum tgsi_opcode_type stype,
1060 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1061 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1062 LLVMBuilderRef builder = gallivm->builder;
1063 LLVMValueRef attrib_index = NULL;
1064 LLVMValueRef vertex_index = NULL;
1065 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1068 if (reg->Register.Indirect) {
1069 attrib_index = get_indirect_index(bld,
1071 reg->Register.Index,
1074 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1077 if (reg->Dimension.Indirect) {
1078 vertex_index = get_indirect_index(bld,
1080 reg->Dimension.Index,
1083 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1086 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1087 reg->Dimension.Indirect,
1088 vertex_index, attrib_index,
1093 if (stype == TGSI_TYPE_UNSIGNED) {
1094 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1095 } else if (stype == TGSI_TYPE_SIGNED) {
1096 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1103 emit_fetch_temporary(
1104 struct lp_build_tgsi_context * bld_base,
1105 const struct tgsi_full_src_register * reg,
1106 enum tgsi_opcode_type stype,
1109 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1110 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1111 LLVMBuilderRef builder = gallivm->builder;
1112 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1113 struct lp_build_context *float_bld = &bld_base->base;
1114 LLVMValueRef indirect_index = NULL;
1117 if (reg->Register.Indirect) {
1118 indirect_index = get_indirect_index(bld,
1120 reg->Register.Index,
1124 if (reg->Register.Indirect) {
1125 LLVMValueRef swizzle_vec =
1126 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
1127 LLVMValueRef length_vec =
1128 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
1129 bld->bld_base.base.type.length);
1130 LLVMValueRef index_vec; /* index into the const buffer */
1131 LLVMValueRef temps_array;
1132 LLVMValueRef pixel_offsets;
1133 LLVMValueRef offsets[LP_MAX_VECTOR_LENGTH];
1134 LLVMTypeRef float4_ptr_type;
1137 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1138 for (i = 0; i < float_bld->type.length; i++) {
1139 offsets[i] = lp_build_const_int32(gallivm, i);
1141 pixel_offsets = LLVMConstVector(offsets, float_bld->type.length);
1143 /* index_vec = (indirect_index * 4 + swizzle) * length */
1144 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1145 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1146 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1147 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1149 /* cast temps_array pointer to float* */
1150 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
1151 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1152 float4_ptr_type, "");
1154 /* Gather values from the temporary register array */
1155 res = build_gather(&bld_base->base, temps_array, index_vec);
1158 LLVMValueRef temp_ptr;
1159 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1160 res = LLVMBuildLoad(builder, temp_ptr, "");
1163 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
1164 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1165 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1172 emit_fetch_system_value(
1173 struct lp_build_tgsi_context * bld_base,
1174 const struct tgsi_full_src_register * reg,
1175 enum tgsi_opcode_type stype,
1178 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1179 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1180 const struct tgsi_shader_info *info = bld->bld_base.info;
1181 LLVMBuilderRef builder = gallivm->builder;
1183 enum tgsi_opcode_type atype; // Actual type of the value
1185 assert(!reg->Register.Indirect);
1187 switch (info->system_value_semantic_name[reg->Register.Index]) {
1188 case TGSI_SEMANTIC_INSTANCEID:
1189 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1190 atype = TGSI_TYPE_UNSIGNED;
1193 case TGSI_SEMANTIC_VERTEXID:
1194 res = bld->system_values.vertex_id;
1195 atype = TGSI_TYPE_UNSIGNED;
1198 case TGSI_SEMANTIC_PRIMID:
1199 res = bld->system_values.prim_id;
1200 atype = TGSI_TYPE_UNSIGNED;
1204 assert(!"unexpected semantic in emit_fetch_system_value");
1205 res = bld_base->base.zero;
1206 atype = TGSI_TYPE_FLOAT;
1210 if (atype != stype) {
1211 if (stype == TGSI_TYPE_FLOAT) {
1212 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1213 } else if (stype == TGSI_TYPE_UNSIGNED) {
1214 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1215 } else if (stype == TGSI_TYPE_SIGNED) {
1216 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1224 * Register fetch with derivatives.
1228 struct lp_build_tgsi_soa_context *bld,
1237 /* TODO: use interpolation coeffs for inputs */
1240 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1243 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1251 emit_fetch_predicate(
1252 struct lp_build_tgsi_soa_context *bld,
1253 const struct tgsi_full_instruction *inst,
1256 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1258 unsigned char swizzles[4];
1259 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
1263 if (!inst->Instruction.Predicate) {
1264 TGSI_FOR_EACH_CHANNEL( chan ) {
1270 swizzles[0] = inst->Predicate.SwizzleX;
1271 swizzles[1] = inst->Predicate.SwizzleY;
1272 swizzles[2] = inst->Predicate.SwizzleZ;
1273 swizzles[3] = inst->Predicate.SwizzleW;
1275 index = inst->Predicate.Index;
1276 assert(index < LP_MAX_TGSI_PREDS);
1278 TGSI_FOR_EACH_CHANNEL( chan ) {
1279 unsigned swizzle = swizzles[chan];
1282 * Only fetch the predicate register channels that are actually listed
1285 if (!unswizzled[swizzle]) {
1286 value = LLVMBuildLoad(builder,
1287 bld->preds[index][swizzle], "");
1290 * Convert the value to an integer mask.
1292 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
1293 * is needlessly causing two comparisons due to storing the intermediate
1294 * result as float vector instead of an integer mask vector.
1296 value = lp_build_compare(bld->bld_base.base.gallivm,
1297 bld->bld_base.base.type,
1300 bld->bld_base.base.zero);
1301 if (inst->Predicate.Negate) {
1302 value = LLVMBuildNot(builder, value, "");
1305 unswizzled[swizzle] = value;
1307 value = unswizzled[swizzle];
1319 struct lp_build_tgsi_context *bld_base,
1320 const struct tgsi_full_instruction *inst,
1322 unsigned chan_index,
1326 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1327 struct gallivm_state *gallivm = bld_base->base.gallivm;
1328 LLVMBuilderRef builder = gallivm->builder;
1329 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1330 struct lp_build_context *float_bld = &bld_base->base;
1331 struct lp_build_context *int_bld = &bld_base->int_bld;
1332 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1333 LLVMValueRef indirect_index = NULL;
1334 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1339 * It is always assumed to be float.
1341 switch( inst->Instruction.Saturate ) {
1345 case TGSI_SAT_ZERO_ONE:
1346 assert(dtype == TGSI_TYPE_FLOAT ||
1347 dtype == TGSI_TYPE_UNTYPED);
1348 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1349 value = lp_build_max(float_bld, value, float_bld->zero);
1350 value = lp_build_min(float_bld, value, float_bld->one);
1353 case TGSI_SAT_MINUS_PLUS_ONE:
1354 assert(dtype == TGSI_TYPE_FLOAT ||
1355 dtype == TGSI_TYPE_UNTYPED);
1356 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1357 value = lp_build_max(float_bld, value, lp_build_const_vec(gallivm, float_bld->type, -1.0));
1358 value = lp_build_min(float_bld, value, float_bld->one);
1365 if (reg->Register.Indirect) {
1366 indirect_index = get_indirect_index(bld,
1368 reg->Register.Index,
1371 assert(reg->Register.Index <=
1372 bld_base->info->file_max[reg->Register.File]);
1375 switch( reg->Register.File ) {
1376 case TGSI_FILE_OUTPUT:
1377 /* Outputs are always stored as floats */
1378 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1380 if (reg->Register.Indirect) {
1381 LLVMValueRef chan_vec =
1382 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1383 LLVMValueRef length_vec =
1384 lp_build_const_int_vec(gallivm, uint_bld->type, float_bld->type.length);
1385 LLVMValueRef index_vec; /* indexes into the temp registers */
1386 LLVMValueRef outputs_array;
1387 LLVMValueRef pixel_offsets;
1388 LLVMTypeRef float_ptr_type;
1391 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1392 pixel_offsets = uint_bld->undef;
1393 for (i = 0; i < float_bld->type.length; i++) {
1394 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1395 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1399 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1400 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1401 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1402 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1403 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1406 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1407 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
1408 float_ptr_type, "");
1410 /* Scatter store values into temp registers */
1411 emit_mask_scatter(bld, outputs_array, index_vec, value,
1412 &bld->exec_mask, pred);
1415 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1417 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
1421 case TGSI_FILE_TEMPORARY:
1422 /* Temporaries are always stored as floats */
1423 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1425 if (reg->Register.Indirect) {
1426 LLVMValueRef chan_vec =
1427 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1428 LLVMValueRef length_vec =
1429 lp_build_const_int_vec(gallivm, uint_bld->type,
1430 float_bld->type.length);
1431 LLVMValueRef index_vec; /* indexes into the temp registers */
1432 LLVMValueRef temps_array;
1433 LLVMValueRef pixel_offsets;
1434 LLVMTypeRef float_ptr_type;
1437 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1438 pixel_offsets = uint_bld->undef;
1439 for (i = 0; i < float_bld->type.length; i++) {
1440 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1441 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1445 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1446 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1447 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1448 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1449 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1452 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1453 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1454 float_ptr_type, "");
1456 /* Scatter store values into temp registers */
1457 emit_mask_scatter(bld, temps_array, index_vec, value,
1458 &bld->exec_mask, pred);
1461 LLVMValueRef temp_ptr;
1462 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1464 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
1468 case TGSI_FILE_ADDRESS:
1469 assert(dtype == TGSI_TYPE_SIGNED);
1470 assert(LLVMTypeOf(value) == int_bld->vec_type);
1471 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1472 lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value,
1473 bld->addr[reg->Register.Index][chan_index]);
1476 case TGSI_FILE_PREDICATE:
1477 assert(LLVMTypeOf(value) == float_bld->vec_type);
1478 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1479 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value,
1480 bld->preds[reg->Register.Index][chan_index]);
1492 struct lp_build_tgsi_context * bld_base,
1493 const struct tgsi_full_instruction * inst,
1494 const struct tgsi_opcode_info * info,
1495 LLVMValueRef dst[4])
1498 unsigned chan_index;
1499 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1502 LLVMValueRef pred[TGSI_NUM_CHANNELS];
1504 emit_fetch_predicate( bld, inst, pred );
1506 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1507 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1513 * High-level instruction translators.
1517 emit_tex( struct lp_build_tgsi_soa_context *bld,
1518 const struct tgsi_full_instruction *inst,
1519 enum lp_build_tex_modifier modifier,
1520 LLVMValueRef *texel)
1523 LLVMValueRef lod_bias, explicit_lod;
1524 LLVMValueRef oow = NULL;
1525 LLVMValueRef coords[4];
1526 LLVMValueRef offsets[3] = { NULL };
1527 struct lp_derivatives derivs;
1528 struct lp_derivatives *deriv_ptr = NULL;
1529 unsigned num_coords, num_derivs, num_offsets;
1532 if (!bld->sampler) {
1533 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1534 for (i = 0; i < 4; i++) {
1535 texel[i] = bld->bld_base.base.undef;
1540 switch (inst->Texture.Texture) {
1541 case TGSI_TEXTURE_1D:
1546 case TGSI_TEXTURE_1D_ARRAY:
1551 case TGSI_TEXTURE_2D:
1552 case TGSI_TEXTURE_RECT:
1557 case TGSI_TEXTURE_SHADOW1D:
1558 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1563 case TGSI_TEXTURE_SHADOW2D:
1564 case TGSI_TEXTURE_SHADOWRECT:
1565 case TGSI_TEXTURE_2D_ARRAY:
1570 case TGSI_TEXTURE_CUBE:
1575 case TGSI_TEXTURE_3D:
1580 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1585 case TGSI_TEXTURE_SHADOWCUBE:
1595 /* Note lod and especially projected are illegal in a LOT of cases */
1596 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1597 assert(num_coords < 4);
1598 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1599 explicit_lod = NULL;
1601 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1602 assert(num_coords < 4);
1604 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1608 explicit_lod = NULL;
1611 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1612 assert(num_coords < 4);
1613 oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1614 oow = lp_build_rcp(&bld->bld_base.base, oow);
1617 for (i = 0; i < num_coords; i++) {
1618 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1619 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1620 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1622 for (i = num_coords; i < 4; i++) {
1623 coords[i] = bld->bld_base.base.undef;
1626 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1628 for (dim = 0; dim < num_derivs; ++dim) {
1629 derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
1630 derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
1632 deriv_ptr = &derivs;
1633 unit = inst->Src[3].Register.Index;
1635 unit = inst->Src[1].Register.Index;
1638 /* some advanced gather instructions (txgo) would require 4 offsets */
1639 if (inst->Texture.NumOffsets == 1) {
1641 for (dim = 0; dim < num_offsets; dim++) {
1642 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1646 bld->sampler->emit_fetch_texel(bld->sampler,
1647 bld->bld_base.base.gallivm,
1648 bld->bld_base.base.type,
1654 lod_bias, explicit_lod,
1659 emit_sample(struct lp_build_tgsi_soa_context *bld,
1660 const struct tgsi_full_instruction *inst,
1661 enum lp_build_tex_modifier modifier,
1663 LLVMValueRef *texel)
1665 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1666 unsigned texture_unit, sampler_unit;
1667 LLVMValueRef lod_bias, explicit_lod;
1668 LLVMValueRef coords[4];
1669 LLVMValueRef offsets[3] = { NULL };
1670 struct lp_derivatives derivs;
1671 struct lp_derivatives *deriv_ptr = NULL;
1672 unsigned num_coords, num_offsets, num_derivs;
1675 if (!bld->sampler) {
1676 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1677 for (i = 0; i < 4; i++) {
1678 texel[i] = bld->bld_base.base.undef;
1684 * unlike old-style tex opcodes the texture/sampler indices
1685 * always come from src1 and src2 respectively.
1687 texture_unit = inst->Src[1].Register.Index;
1688 sampler_unit = inst->Src[2].Register.Index;
1691 * Note inst->Texture.Texture will contain the number of offsets,
1692 * however the target information is NOT there and comes from the
1693 * declared sampler views instead.
1695 switch (bld->sv[texture_unit].Resource) {
1696 case TGSI_TEXTURE_1D:
1701 case TGSI_TEXTURE_1D_ARRAY:
1706 case TGSI_TEXTURE_2D:
1707 case TGSI_TEXTURE_RECT:
1712 case TGSI_TEXTURE_2D_ARRAY:
1717 case TGSI_TEXTURE_CUBE:
1722 case TGSI_TEXTURE_3D:
1727 case TGSI_TEXTURE_CUBE_ARRAY:
1738 * unlike old-style tex opcodes the texture/sampler indices
1739 * always come from src1 and src2 respectively.
1741 texture_unit = inst->Src[1].Register.Index;
1742 sampler_unit = inst->Src[2].Register.Index;
1744 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1745 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1746 explicit_lod = NULL;
1748 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1750 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1752 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
1754 /* XXX might be better to explicitly pass the level zero information */
1755 explicit_lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
1759 explicit_lod = NULL;
1762 for (i = 0; i < num_coords; i++) {
1763 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1765 for (i = num_coords; i < 4; i++) {
1766 coords[i] = bld->bld_base.base.undef;
1769 * XXX: whack shadow comparison value into place.
1770 * Should probably fix the interface for separate value
1771 * (it will not work for cube arrays if it is part of coords).
1774 unsigned c_coord = num_coords > 2 ? 3 : 2;
1775 assert(num_coords < 4);
1776 coords[c_coord] = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1779 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1781 for (dim = 0; dim < num_derivs; ++dim) {
1782 derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim );
1783 derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim );
1785 deriv_ptr = &derivs;
1788 /* some advanced gather instructions (txgo) would require 4 offsets */
1789 if (inst->Texture.NumOffsets == 1) {
1791 for (dim = 0; dim < num_offsets; dim++) {
1792 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1796 bld->sampler->emit_fetch_texel(bld->sampler,
1797 bld->bld_base.base.gallivm,
1798 bld->bld_base.base.type,
1800 texture_unit, sampler_unit,
1804 lod_bias, explicit_lod,
1809 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
1810 const struct tgsi_full_instruction *inst,
1811 LLVMValueRef *texel,
1814 unsigned unit, target;
1815 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
1816 LLVMValueRef explicit_lod = NULL;
1817 LLVMValueRef coords[3];
1818 LLVMValueRef offsets[3] = { NULL };
1819 unsigned num_coords;
1823 if (!bld->sampler) {
1824 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1825 for (i = 0; i < 4; i++) {
1826 texel[i] = coord_undef;
1831 unit = inst->Src[1].Register.Index;
1834 target = bld->sv[unit].Resource;
1837 target = inst->Texture.Texture;
1841 case TGSI_TEXTURE_1D:
1842 case TGSI_TEXTURE_BUFFER:
1846 case TGSI_TEXTURE_1D_ARRAY:
1850 case TGSI_TEXTURE_2D:
1851 case TGSI_TEXTURE_RECT:
1855 case TGSI_TEXTURE_2D_ARRAY:
1859 case TGSI_TEXTURE_3D:
1868 /* always have lod except for buffers ? */
1869 if (target != TGSI_TEXTURE_BUFFER) {
1870 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1873 for (i = 0; i < num_coords; i++) {
1874 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1876 for (i = num_coords; i < 3; i++) {
1877 coords[i] = coord_undef;
1880 if (inst->Texture.NumOffsets == 1) {
1882 for (dim = 0; dim < dims; dim++) {
1883 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1887 bld->sampler->emit_fetch_texel(bld->sampler,
1888 bld->bld_base.base.gallivm,
1889 bld->bld_base.base.type,
1900 emit_size_query( struct lp_build_tgsi_soa_context *bld,
1901 const struct tgsi_full_instruction *inst,
1902 LLVMValueRef *sizes_out,
1903 boolean is_sviewinfo)
1905 LLVMValueRef explicit_lod;
1908 unsigned unit = inst->Src[1].Register.Index;
1912 target = bld->sv[unit].Resource;
1915 target = inst->Texture.Texture;
1918 case TGSI_TEXTURE_BUFFER:
1919 case TGSI_TEXTURE_RECT:
1920 case TGSI_TEXTURE_SHADOWRECT:
1928 if (!bld->sampler) {
1929 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
1930 for (i = 0; i < 4; i++)
1931 sizes_out[i] = bld->bld_base.int_bld.undef;
1936 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 0 );
1938 explicit_lod = NULL;
1940 bld->sampler->emit_size_query(bld->sampler,
1941 bld->bld_base.base.gallivm,
1942 bld->bld_base.int_bld.type,
1950 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1955 for (i = 0; i < 5; i++) {
1958 if (pc + i >= bld->bld_base.info->num_instructions)
1961 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
1963 if (opcode == TGSI_OPCODE_END)
1966 if (opcode == TGSI_OPCODE_TEX ||
1967 opcode == TGSI_OPCODE_TXP ||
1968 opcode == TGSI_OPCODE_TXD ||
1969 opcode == TGSI_OPCODE_TXB ||
1970 opcode == TGSI_OPCODE_TXL ||
1971 opcode == TGSI_OPCODE_TXF ||
1972 opcode == TGSI_OPCODE_TXQ ||
1973 opcode == TGSI_OPCODE_CAL ||
1974 opcode == TGSI_OPCODE_CALLNZ ||
1975 opcode == TGSI_OPCODE_IF ||
1976 opcode == TGSI_OPCODE_UIF ||
1977 opcode == TGSI_OPCODE_BGNLOOP ||
1978 opcode == TGSI_OPCODE_SWITCH)
1988 * Kill fragment if any of the src register values are negative.
1992 struct lp_build_tgsi_soa_context *bld,
1993 const struct tgsi_full_instruction *inst,
1996 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1997 const struct tgsi_full_src_register *reg = &inst->Src[0];
1998 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2000 unsigned chan_index;
2002 memset(&terms, 0, sizeof terms);
2004 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2007 /* Unswizzle channel */
2008 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2010 /* Check if the component has not been already tested. */
2011 assert(swizzle < TGSI_NUM_CHANNELS);
2012 if( !terms[swizzle] )
2013 /* TODO: change the comparison operator instead of setting the sign */
2014 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2018 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2019 if(terms[chan_index]) {
2020 LLVMValueRef chan_mask;
2023 * If term < 0 then mask = 0 else mask = ~0.
2025 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2028 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2035 lp_build_mask_update(bld->mask, mask);
2037 if (!near_end_of_shader(bld, pc))
2038 lp_build_mask_check(bld->mask);
2044 * Predicated fragment kill.
2045 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
2046 * The only predication is the execution mask which will apply if
2047 * we're inside a loop or conditional.
2050 emit_kilp(struct lp_build_tgsi_soa_context *bld,
2053 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2056 /* For those channels which are "alive", disable fragment shader
2059 if (bld->exec_mask.has_mask) {
2060 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2063 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2067 lp_build_mask_update(bld->mask, mask);
2069 if (!near_end_of_shader(bld, pc))
2070 lp_build_mask_check(bld->mask);
2075 * Emit code which will dump the value of all the temporary registers
2079 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
2081 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2082 LLVMBuilderRef builder = gallivm->builder;
2083 LLVMValueRef temp_ptr;
2084 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
2085 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
2086 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
2087 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
2089 int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
2091 for (index = 0; index < n; index++) {
2092 LLVMValueRef idx = lp_build_const_int32(gallivm, index);
2093 LLVMValueRef v[4][4], res;
2096 lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
2098 for (chan = 0; chan < 4; chan++) {
2099 temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2100 res = LLVMBuildLoad(builder, temp_ptr, "");
2101 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
2102 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
2103 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
2104 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
2107 lp_build_printf(gallivm, " X: %f %f %f %f\n",
2108 v[0][0], v[0][1], v[0][2], v[0][3]);
2109 lp_build_printf(gallivm, " Y: %f %f %f %f\n",
2110 v[1][0], v[1][1], v[1][2], v[1][3]);
2111 lp_build_printf(gallivm, " Z: %f %f %f %f\n",
2112 v[2][0], v[2][1], v[2][2], v[2][3]);
2113 lp_build_printf(gallivm, " W: %f %f %f %f\n",
2114 v[3][0], v[3][1], v[3][2], v[3][3]);
2121 lp_emit_declaration_soa(
2122 struct lp_build_tgsi_context *bld_base,
2123 const struct tgsi_full_declaration *decl)
2125 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2126 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2127 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2128 const unsigned first = decl->Range.First;
2129 const unsigned last = decl->Range.Last;
2132 for (idx = first; idx <= last; ++idx) {
2133 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2134 switch (decl->Declaration.File) {
2135 case TGSI_FILE_TEMPORARY:
2136 assert(idx < LP_MAX_TGSI_TEMPS);
2137 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2138 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2139 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2143 case TGSI_FILE_OUTPUT:
2144 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2145 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2146 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2147 vec_type, "output");
2151 case TGSI_FILE_ADDRESS:
2152 /* ADDR registers are only allocated with an integer LLVM IR type,
2153 * as they are guaranteed to always have integers.
2154 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2155 * an ADDR register for that matter).
2157 assert(idx < LP_MAX_TGSI_ADDRS);
2158 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2159 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2162 case TGSI_FILE_PREDICATE:
2163 assert(idx < LP_MAX_TGSI_PREDS);
2164 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2165 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
2169 case TGSI_FILE_SAMPLER_VIEW:
2171 * The target stored here MUST match whatever there actually
2172 * is in the set sampler views (what about return type?).
2174 assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2175 bld->sv[idx] = decl->SamplerView;
2179 /* don't need to declare other vars */
2186 void lp_emit_immediate_soa(
2187 struct lp_build_tgsi_context *bld_base,
2188 const struct tgsi_full_immediate *imm)
2190 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2191 struct gallivm_state * gallivm = bld_base->base.gallivm;
2193 /* simply copy the immediate values into the next immediates[] slot */
2195 const uint size = imm->Immediate.NrTokens - 1;
2197 assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
2198 switch (imm->Immediate.DataType) {
2199 case TGSI_IMM_FLOAT32:
2200 for( i = 0; i < size; ++i )
2201 bld->immediates[bld->num_immediates][i] =
2202 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2205 case TGSI_IMM_UINT32:
2206 for( i = 0; i < size; ++i ) {
2207 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2208 bld->immediates[bld->num_immediates][i] =
2209 LLVMConstBitCast(tmp, bld_base->base.vec_type);
2213 case TGSI_IMM_INT32:
2214 for( i = 0; i < size; ++i ) {
2215 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2216 bld->immediates[bld->num_immediates][i] =
2217 LLVMConstBitCast(tmp, bld_base->base.vec_type);
2222 for( i = size; i < 4; ++i )
2223 bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
2225 bld->num_immediates++;
2230 const struct lp_build_tgsi_action * action,
2231 struct lp_build_tgsi_context * bld_base,
2232 struct lp_build_emit_data * emit_data)
2234 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2236 emit_fetch_deriv(bld, emit_data->args[0], NULL,
2237 &emit_data->output[emit_data->chan], NULL);
2242 const struct lp_build_tgsi_action * action,
2243 struct lp_build_tgsi_context * bld_base,
2244 struct lp_build_emit_data * emit_data)
2246 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2248 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
2249 &emit_data->output[emit_data->chan]);
2254 const struct lp_build_tgsi_action * action,
2255 struct lp_build_tgsi_context * bld_base,
2256 struct lp_build_emit_data * emit_data)
2258 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2260 emit_kilp(bld, bld_base->pc - 1);
2265 const struct lp_build_tgsi_action * action,
2266 struct lp_build_tgsi_context * bld_base,
2267 struct lp_build_emit_data * emit_data)
2269 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2271 emit_kil(bld, emit_data->inst, bld_base->pc - 1);
2276 const struct lp_build_tgsi_action * action,
2277 struct lp_build_tgsi_context * bld_base,
2278 struct lp_build_emit_data * emit_data)
2280 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2282 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
2287 const struct lp_build_tgsi_action * action,
2288 struct lp_build_tgsi_context * bld_base,
2289 struct lp_build_emit_data * emit_data)
2291 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2293 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2299 const struct lp_build_tgsi_action * action,
2300 struct lp_build_tgsi_context * bld_base,
2301 struct lp_build_emit_data * emit_data)
2303 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2305 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2311 const struct lp_build_tgsi_action * action,
2312 struct lp_build_tgsi_context * bld_base,
2313 struct lp_build_emit_data * emit_data)
2315 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2317 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2323 const struct lp_build_tgsi_action * action,
2324 struct lp_build_tgsi_context * bld_base,
2325 struct lp_build_emit_data * emit_data)
2327 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2329 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
2335 const struct lp_build_tgsi_action * action,
2336 struct lp_build_tgsi_context * bld_base,
2337 struct lp_build_emit_data * emit_data)
2339 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2341 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
2346 const struct lp_build_tgsi_action * action,
2347 struct lp_build_tgsi_context * bld_base,
2348 struct lp_build_emit_data * emit_data)
2350 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2352 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
2357 const struct lp_build_tgsi_action * action,
2358 struct lp_build_tgsi_context * bld_base,
2359 struct lp_build_emit_data * emit_data)
2361 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2363 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
2368 const struct lp_build_tgsi_action * action,
2369 struct lp_build_tgsi_context * bld_base,
2370 struct lp_build_emit_data * emit_data)
2372 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2374 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2375 FALSE, emit_data->output);
2380 const struct lp_build_tgsi_action * action,
2381 struct lp_build_tgsi_context * bld_base,
2382 struct lp_build_emit_data * emit_data)
2384 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2386 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2387 FALSE, emit_data->output);
2392 const struct lp_build_tgsi_action * action,
2393 struct lp_build_tgsi_context * bld_base,
2394 struct lp_build_emit_data * emit_data)
2396 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2398 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2399 TRUE, emit_data->output);
2404 const struct lp_build_tgsi_action * action,
2405 struct lp_build_tgsi_context * bld_base,
2406 struct lp_build_emit_data * emit_data)
2408 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2410 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
2411 TRUE, emit_data->output);
2416 const struct lp_build_tgsi_action * action,
2417 struct lp_build_tgsi_context * bld_base,
2418 struct lp_build_emit_data * emit_data)
2420 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2422 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2423 FALSE, emit_data->output);
2428 const struct lp_build_tgsi_action * action,
2429 struct lp_build_tgsi_context * bld_base,
2430 struct lp_build_emit_data * emit_data)
2432 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2434 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2435 FALSE, emit_data->output);
2440 const struct lp_build_tgsi_action * action,
2441 struct lp_build_tgsi_context * bld_base,
2442 struct lp_build_emit_data * emit_data)
2444 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2446 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
2450 mask_to_one_vec(struct lp_build_tgsi_context *bld_base)
2452 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2453 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2454 LLVMValueRef one_vec = bld_base->int_bld.one;
2455 struct lp_exec_mask *exec_mask = &bld->exec_mask;
2457 if (exec_mask->has_mask) {
2458 one_vec = LLVMBuildAnd(builder, one_vec, exec_mask->exec_mask, "");
2460 one_vec = LLVMBuildAnd(builder, one_vec,
2461 lp_build_mask_value(bld->mask), "");
2466 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
2470 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2472 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2474 current_vec = LLVMBuildAdd(builder, current_vec, mask, "");
2476 LLVMBuildStore(builder, current_vec, ptr);
2480 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
2484 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2486 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2487 LLVMValueRef full_mask = lp_build_cmp(&bld_base->uint_bld,
2490 bld_base->uint_bld.zero);
2492 current_vec = lp_build_select(&bld_base->uint_bld,
2494 bld_base->uint_bld.zero,
2497 LLVMBuildStore(builder, current_vec, ptr);
2501 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
2502 LLVMValueRef current_mask_vec,
2503 LLVMValueRef total_emitted_vertices_vec)
2505 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2506 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
2507 LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS,
2508 total_emitted_vertices_vec,
2509 bld->max_output_vertices_vec);
2511 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
2516 const struct lp_build_tgsi_action * action,
2517 struct lp_build_tgsi_context * bld_base,
2518 struct lp_build_emit_data * emit_data)
2520 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2521 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2523 if (bld->gs_iface->emit_vertex) {
2524 LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
2525 LLVMValueRef total_emitted_vertices_vec =
2526 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
2527 masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones,
2528 total_emitted_vertices_vec);
2529 gather_outputs(bld);
2530 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
2532 total_emitted_vertices_vec);
2533 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
2535 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
2538 lp_build_print_value(bld->bld_base.base.gallivm,
2539 " +++ emit vertex masked ones = ",
2541 lp_build_print_value(bld->bld_base.base.gallivm,
2542 " +++ emit vertex emitted = ",
2543 total_emitted_vertices_vec);
2550 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
2551 LLVMValueRef masked_ones)
2553 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2554 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2556 if (bld->gs_iface->end_primitive) {
2557 LLVMValueRef emitted_vertices_vec =
2558 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
2559 LLVMValueRef emitted_prims_vec =
2560 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
2562 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
2563 emitted_vertices_vec,
2567 lp_build_print_value(bld->bld_base.base.gallivm,
2568 " +++ end prim masked ones = ",
2570 lp_build_print_value(bld->bld_base.base.gallivm,
2571 " +++ end prim emitted verts1 = ",
2572 emitted_vertices_vec);
2573 lp_build_print_value(bld->bld_base.base.gallivm,
2574 " +++ end prim emitted prims1 = ",
2575 LLVMBuildLoad(builder,
2576 bld->emitted_prims_vec_ptr, ""));
2578 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
2580 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
2583 lp_build_print_value(bld->bld_base.base.gallivm,
2584 " +++ end prim emitted verts2 = ",
2585 LLVMBuildLoad(builder,
2586 bld->emitted_vertices_vec_ptr, ""));
2594 const struct lp_build_tgsi_action * action,
2595 struct lp_build_tgsi_context * bld_base,
2596 struct lp_build_emit_data * emit_data)
2598 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2600 if (bld->gs_iface->end_primitive) {
2601 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2602 LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
2603 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2604 LLVMValueRef emitted_verts = LLVMBuildLoad(
2605 builder, bld->emitted_vertices_vec_ptr, "");
2606 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2609 /* We need to combine the current execution mask with the mask
2610 telling us which, if any, execution slots actually have
2611 unemitted primitives, this way we make sure that end_primitives
2612 executes only on the paths that have unflushed vertices */
2613 masked_ones = LLVMBuildAnd(builder, masked_ones, emitted_mask, "");
2615 end_primitive_masked(bld_base, masked_ones);
2621 const struct lp_build_tgsi_action * action,
2622 struct lp_build_tgsi_context * bld_base,
2623 struct lp_build_emit_data * emit_data)
2625 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2627 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
2633 const struct lp_build_tgsi_action * action,
2634 struct lp_build_tgsi_context * bld_base,
2635 struct lp_build_emit_data * emit_data)
2637 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2639 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
2644 const struct lp_build_tgsi_action * action,
2645 struct lp_build_tgsi_context * bld_base,
2646 struct lp_build_emit_data * emit_data)
2648 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2650 lp_exec_break(&bld->exec_mask, bld_base);
2655 const struct lp_build_tgsi_action * action,
2656 struct lp_build_tgsi_context * bld_base,
2657 struct lp_build_emit_data * emit_data)
2659 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2660 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2661 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2662 LLVMValueRef unsigned_cond =
2663 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
2664 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2668 lp_exec_break_condition(&bld->exec_mask, cond);
2673 const struct lp_build_tgsi_action * action,
2674 struct lp_build_tgsi_context * bld_base,
2675 struct lp_build_emit_data * emit_data)
2678 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2680 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
2681 emit_data->args[0], bld->bld_base.base.zero);
2682 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
2687 const struct lp_build_tgsi_action * action,
2688 struct lp_build_tgsi_context * bld_base,
2689 struct lp_build_emit_data * emit_data)
2692 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2693 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2695 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2696 emit_data->args[0], uint_bld->zero);
2697 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
2702 const struct lp_build_tgsi_action * action,
2703 struct lp_build_tgsi_context * bld_base,
2704 struct lp_build_emit_data * emit_data)
2706 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2708 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
2713 const struct lp_build_tgsi_action * action,
2714 struct lp_build_tgsi_context * bld_base,
2715 struct lp_build_emit_data * emit_data)
2717 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2719 lp_exec_default(&bld->exec_mask, bld_base);
2724 const struct lp_build_tgsi_action * action,
2725 struct lp_build_tgsi_context * bld_base,
2726 struct lp_build_emit_data * emit_data)
2728 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2730 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
2735 const struct lp_build_tgsi_action * action,
2736 struct lp_build_tgsi_context * bld_base,
2737 struct lp_build_emit_data * emit_data)
2739 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2741 lp_exec_endswitch(&bld->exec_mask, bld_base);
2746 const struct lp_build_tgsi_action * action,
2747 struct lp_build_tgsi_context * bld_base,
2748 struct lp_build_emit_data * emit_data)
2750 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2752 lp_exec_bgnloop(&bld->exec_mask);
2757 const struct lp_build_tgsi_action * action,
2758 struct lp_build_tgsi_context * bld_base,
2759 struct lp_build_emit_data * emit_data)
2761 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2763 lp_exec_mask_bgnsub(&bld->exec_mask);
2768 const struct lp_build_tgsi_action * action,
2769 struct lp_build_tgsi_context * bld_base,
2770 struct lp_build_emit_data * emit_data)
2772 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2774 lp_exec_mask_cond_invert(&bld->exec_mask);
2779 const struct lp_build_tgsi_action * action,
2780 struct lp_build_tgsi_context * bld_base,
2781 struct lp_build_emit_data * emit_data)
2783 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2785 lp_exec_mask_cond_pop(&bld->exec_mask);
2790 const struct lp_build_tgsi_action * action,
2791 struct lp_build_tgsi_context * bld_base,
2792 struct lp_build_emit_data * emit_data)
2794 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2796 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
2801 const struct lp_build_tgsi_action * action,
2802 struct lp_build_tgsi_context * bld_base,
2803 struct lp_build_emit_data * emit_data)
2805 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2807 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
2812 const struct lp_build_tgsi_action * action,
2813 struct lp_build_tgsi_context * bld_base,
2814 struct lp_build_emit_data * emit_data)
2816 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2818 lp_exec_continue(&bld->exec_mask);
2821 /* XXX: Refactor and move it to lp_bld_tgsi_action.c
2823 * XXX: What do the comments about xmm registers mean? Maybe they are left over
2824 * from old code, but there is no garauntee that LLVM will use those registers
2827 * XXX: There should be no calls to lp_build_emit_fetch in this function. This
2828 * should be handled by the emit_data->fetch_args function. */
2831 const struct lp_build_tgsi_action * action,
2832 struct lp_build_tgsi_context * bld_base,
2833 struct lp_build_emit_data * emit_data)
2835 LLVMValueRef tmp0, tmp1;
2836 LLVMValueRef tmp4 = NULL;
2837 LLVMValueRef tmp5 = NULL;
2838 LLVMValueRef tmp6 = NULL;
2839 LLVMValueRef tmp7 = NULL;
2840 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2842 uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2844 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
2845 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
2846 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
2847 (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
2849 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2852 /* xmm0 = src.x * src.x */
2853 tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
2854 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
2857 tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
2860 /* xmm0 = xmm0 + src.y * src.y */
2861 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
2862 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
2865 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2866 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2869 /* xmm0 = xmm0 + src.z * src.z */
2870 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
2871 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
2874 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2875 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2879 /* xmm0 = xmm0 + src.w * src.w */
2880 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
2881 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
2884 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2885 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2887 /* xmm1 = 1 / sqrt(xmm0) */
2888 tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
2889 /* dst.x = xmm1 * src.x */
2890 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
2891 emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
2893 /* dst.y = xmm1 * src.y */
2894 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
2895 emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
2898 /* dst.z = xmm1 * src.z */
2899 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
2900 emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
2902 /* dst.w = xmm1 * src.w */
2903 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
2904 emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
2909 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
2910 emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
2914 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
2916 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2917 struct gallivm_state * gallivm = bld_base->base.gallivm;
2919 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2920 LLVMValueRef array_size =
2921 lp_build_const_int32(gallivm,
2922 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
2923 bld->temps_array = lp_build_array_alloca(gallivm,
2924 bld_base->base.vec_type, array_size,
2928 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2929 LLVMValueRef array_size =
2930 lp_build_const_int32(gallivm,
2931 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
2932 bld->outputs_array = lp_build_array_alloca(gallivm,
2933 bld_base->base.vec_type, array_size,
2937 /* If we have indirect addressing in inputs we need to copy them into
2938 * our alloca array to be able to iterate over them */
2939 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
2940 unsigned index, chan;
2941 LLVMTypeRef vec_type = bld_base->base.vec_type;
2942 LLVMValueRef array_size = lp_build_const_int32(gallivm,
2943 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
2944 bld->inputs_array = lp_build_array_alloca(gallivm,
2945 vec_type, array_size,
2948 assert(bld_base->info->num_inputs
2949 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
2951 for (index = 0; index < bld_base->info->num_inputs; ++index) {
2952 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2953 LLVMValueRef lindex =
2954 lp_build_const_int32(gallivm, index * 4 + chan);
2955 LLVMValueRef input_ptr =
2956 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
2958 LLVMValueRef value = bld->inputs[index][chan];
2960 LLVMBuildStore(gallivm->builder, value, input_ptr);
2965 if (bld->gs_iface) {
2966 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
2967 bld->emitted_prims_vec_ptr =
2968 lp_build_alloca(gallivm,
2970 "emitted_prims_ptr");
2971 bld->emitted_vertices_vec_ptr =
2972 lp_build_alloca(gallivm,
2974 "emitted_vertices_ptr");
2975 bld->total_emitted_vertices_vec_ptr =
2976 lp_build_alloca(gallivm,
2978 "total_emitted_vertices_ptr");
2980 LLVMBuildStore(gallivm->builder, uint_bld->zero,
2981 bld->emitted_prims_vec_ptr);
2982 LLVMBuildStore(gallivm->builder, uint_bld->zero,
2983 bld->emitted_vertices_vec_ptr);
2984 LLVMBuildStore(gallivm->builder, uint_bld->zero,
2985 bld->total_emitted_vertices_vec_ptr);
2989 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
2991 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2992 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2996 emit_dump_temps(bld);
2999 /* If we have indirect addressing in outputs we need to copy our alloca array
3000 * to the outputs slots specified by the caller */
3001 if (bld->gs_iface) {
3002 LLVMValueRef total_emitted_vertices_vec;
3003 LLVMValueRef emitted_prims_vec;
3004 /* implicit end_primitives, needed in case there are any unflushed
3005 vertices in the cache */
3006 end_primitive(NULL, bld_base, NULL);
3008 total_emitted_vertices_vec =
3009 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3011 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3013 bld->gs_iface->gs_epilogue(bld->gs_iface,
3015 total_emitted_vertices_vec,
3018 gather_outputs(bld);
3023 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3024 const struct tgsi_token *tokens,
3025 struct lp_type type,
3026 struct lp_build_mask_context *mask,
3027 LLVMValueRef consts_ptr,
3028 const struct lp_bld_tgsi_system_values *system_values,
3029 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3030 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3031 struct lp_build_sampler_soa *sampler,
3032 const struct tgsi_shader_info *info,
3033 const struct lp_build_tgsi_gs_iface *gs_iface)
3035 struct lp_build_tgsi_soa_context bld;
3037 struct lp_type res_type;
3039 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3040 memset(&res_type, 0, sizeof res_type);
3041 res_type.width = type.width;
3042 res_type.length = type.length;
3045 /* Setup build context */
3046 memset(&bld, 0, sizeof bld);
3047 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3048 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3049 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3050 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3052 bld.inputs = inputs;
3053 bld.outputs = outputs;
3054 bld.consts_ptr = consts_ptr;
3055 bld.sampler = sampler;
3056 bld.bld_base.info = info;
3057 bld.indirect_files = info->indirect_files;
3059 bld.bld_base.soa = TRUE;
3060 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3061 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3062 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3063 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3064 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3065 bld.bld_base.emit_store = emit_store;
3067 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3068 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3070 bld.bld_base.emit_prologue = emit_prologue;
3071 bld.bld_base.emit_epilogue = emit_epilogue;
3073 /* Set opcode actions */
3074 lp_set_default_actions_cpu(&bld.bld_base);
3076 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3077 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3078 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3079 bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
3080 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3081 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3082 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3083 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3084 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3085 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3086 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3087 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3088 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3089 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3090 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3091 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3092 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3093 bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
3094 bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
3095 bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
3096 bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
3097 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3098 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3099 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3100 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3101 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3102 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3103 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3104 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3105 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3106 /* DX10 sampling ops */
3107 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3108 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3109 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3110 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3111 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3112 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3113 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3114 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3117 /* There's no specific value for this because it should always
3118 * be set, but apps using ext_geometry_shader4 quite often
3119 * were forgetting so we're using MAX_VERTEX_VARYING from
3120 * that spec even though we could debug_assert if it's not
3121 * set, but that's a lot uglier. */
3122 uint max_output_vertices = 32;
3124 /* inputs are always indirect with gs */
3125 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3126 bld.gs_iface = gs_iface;
3127 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3128 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3129 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3131 for (i = 0; i < info->num_properties; ++i) {
3132 if (info->properties[i].name ==
3133 TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
3134 max_output_vertices = info->properties[i].data[0];
3137 bld.max_output_vertices_vec =
3138 lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type,
3139 max_output_vertices);
3142 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3144 bld.system_values = *system_values;
3146 lp_build_tgsi_llvm(&bld.bld_base, tokens);
3149 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
3150 LLVMValueRef function = LLVMGetBasicBlockParent(block);
3151 debug_printf("11111111111111111111111111111 \n");
3152 tgsi_dump(tokens, 0);
3153 lp_debug_dump_value(function);
3154 debug_printf("2222222222222222222222222222 \n");
3158 LLVMModuleRef module = LLVMGetGlobalParent(
3159 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
3160 LLVMDumpModule(module);