OSDN Git Service

radeonsi: use ordered compares for SSG and face selection
[android-x86/external-mesa.git] / src / gallium / drivers / radeon / radeon_setup_tgsi_llvm.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors: Tom Stellard <thomas.stellard@amd.com>
24  *
25  */
26 #include "radeon_llvm.h"
27
28 #include "gallivm/lp_bld_const.h"
29 #include "gallivm/lp_bld_gather.h"
30 #include "gallivm/lp_bld_flow.h"
31 #include "gallivm/lp_bld_init.h"
32 #include "gallivm/lp_bld_intr.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
39
40 #include <llvm-c/Core.h>
41 #include <llvm-c/Transforms/Scalar.h>
42
43 static struct radeon_llvm_loop * get_current_loop(struct radeon_llvm_context * ctx)
44 {
45         return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
46 }
47
48 static struct radeon_llvm_branch * get_current_branch(
49         struct radeon_llvm_context * ctx)
50 {
51         return ctx->branch_depth > 0 ?
52                         ctx->branch + (ctx->branch_depth - 1) : NULL;
53 }
54
55 unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
56 {
57  return (index * 4) + chan;
58 }
59
60 static LLVMValueRef emit_swizzle(
61         struct lp_build_tgsi_context * bld_base,
62         LLVMValueRef value,
63         unsigned swizzle_x,
64         unsigned swizzle_y,
65         unsigned swizzle_z,
66         unsigned swizzle_w)
67 {
68         LLVMValueRef swizzles[4];
69         LLVMTypeRef i32t =
70                 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
71
72         swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
73         swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
74         swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
75         swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
76
77         return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
78                 value,
79                 LLVMGetUndef(LLVMTypeOf(value)),
80                 LLVMConstVector(swizzles, 4), "");
81 }
82
83 static struct tgsi_declaration_range
84 get_array_range(struct lp_build_tgsi_context *bld_base,
85                 unsigned File, const struct tgsi_ind_register *reg)
86 {
87         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
88         if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
89             reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
90                 struct tgsi_declaration_range range;
91                 range.First = 0;
92                 range.Last = bld_base->info->file_max[File];
93                 return range;
94         }
95
96         return ctx->arrays[reg->ArrayID - 1];
97 }
98
99 static LLVMValueRef
100 emit_array_index(
101         struct lp_build_tgsi_soa_context *bld,
102         const struct tgsi_ind_register *reg,
103         unsigned offset)
104 {
105         struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
106
107         LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], "");
108         return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
109 }
110
111 static LLVMValueRef
112 emit_fetch(
113         struct lp_build_tgsi_context *bld_base,
114         const struct tgsi_full_src_register *reg,
115         enum tgsi_opcode_type type,
116         unsigned swizzle);
117
118 static LLVMValueRef
119 emit_array_fetch(
120         struct lp_build_tgsi_context *bld_base,
121         unsigned File, enum tgsi_opcode_type type,
122         struct tgsi_declaration_range range,
123         unsigned swizzle)
124 {
125         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
126         struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
127         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
128
129         unsigned i, size = range.Last - range.First + 1;
130         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
131         LLVMValueRef result = LLVMGetUndef(vec);
132
133         struct tgsi_full_src_register tmp_reg = {};
134         tmp_reg.Register.File = File;
135
136         for (i = 0; i < size; ++i) {
137                 tmp_reg.Register.Index = i + range.First;
138                 LLVMValueRef temp = emit_fetch(bld_base, &tmp_reg, type, swizzle);
139                 result = LLVMBuildInsertElement(builder, result, temp,
140                         lp_build_const_int32(gallivm, i), "");
141         }
142         return result;
143 }
144
145 static bool uses_temp_indirect_addressing(
146         struct lp_build_tgsi_context *bld_base)
147 {
148         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
149         return (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
150 }
151
152 static LLVMValueRef
153 emit_fetch(
154         struct lp_build_tgsi_context *bld_base,
155         const struct tgsi_full_src_register *reg,
156         enum tgsi_opcode_type type,
157         unsigned swizzle)
158 {
159         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
160         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
161         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
162         LLVMValueRef result = NULL, ptr;
163
164         if (swizzle == ~0) {
165                 LLVMValueRef values[TGSI_NUM_CHANNELS];
166                 unsigned chan;
167                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
168                         values[chan] = emit_fetch(bld_base, reg, type, chan);
169                 }
170                 return lp_build_gather_values(bld_base->base.gallivm, values,
171                                               TGSI_NUM_CHANNELS);
172         }
173
174         if (reg->Register.Indirect) {
175                 struct tgsi_declaration_range range = get_array_range(bld_base,
176                         reg->Register.File, &reg->Indirect);
177                 return LLVMBuildExtractElement(builder,
178                         emit_array_fetch(bld_base, reg->Register.File, type, range, swizzle),
179                         emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First),
180                         "");
181         }
182
183         switch(reg->Register.File) {
184         case TGSI_FILE_IMMEDIATE: {
185                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
186                 return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
187         }
188
189         case TGSI_FILE_INPUT:
190                 result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
191                 break;
192
193         case TGSI_FILE_TEMPORARY:
194                 if (uses_temp_indirect_addressing(bld_base)) {
195                         ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
196                         break;
197                 }
198                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
199                 result = LLVMBuildLoad(builder, ptr, "");
200                 break;
201
202         case TGSI_FILE_OUTPUT:
203                 ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
204                 result = LLVMBuildLoad(builder, ptr, "");
205                 break;
206
207         default:
208                 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
209         }
210
211         return bitcast(bld_base, type, result);
212 }
213
214 static LLVMValueRef fetch_system_value(
215         struct lp_build_tgsi_context * bld_base,
216         const struct tgsi_full_src_register *reg,
217         enum tgsi_opcode_type type,
218         unsigned swizzle)
219 {
220         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
221         struct gallivm_state *gallivm = bld_base->base.gallivm;
222
223         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
224         if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
225                 cval = LLVMBuildExtractElement(gallivm->builder, cval,
226                                                lp_build_const_int32(gallivm, swizzle), "");
227         }
228         return bitcast(bld_base, type, cval);
229 }
230
231 static void emit_declaration(
232         struct lp_build_tgsi_context * bld_base,
233         const struct tgsi_full_declaration *decl)
234 {
235         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
236         unsigned first, last, i, idx;
237         switch(decl->Declaration.File) {
238         case TGSI_FILE_ADDRESS:
239         {
240                  unsigned idx;
241                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
242                         unsigned chan;
243                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
244                                  ctx->soa.addr[idx][chan] = lp_build_alloca(
245                                         &ctx->gallivm,
246                                         ctx->soa.bld_base.uint_bld.elem_type, "");
247                         }
248                 }
249                 break;
250         }
251
252         case TGSI_FILE_TEMPORARY:
253                 if (decl->Declaration.Array && decl->Array.ArrayID <= RADEON_LLVM_MAX_ARRAYS)
254                         ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
255                 if (uses_temp_indirect_addressing(bld_base)) {
256                         lp_emit_declaration_soa(bld_base, decl);
257                         break;
258                 }
259                 first = decl->Range.First;
260                 last = decl->Range.Last;
261                 if (!ctx->temps_count) {
262                         ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
263                         ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
264                 }
265                 for (idx = first; idx <= last; idx++) {
266                         for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
267                                 ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
268                                         lp_build_alloca(bld_base->base.gallivm, bld_base->base.vec_type,
269                                                 "temp");
270                         }
271                 }
272                 break;
273
274         case TGSI_FILE_INPUT:
275         {
276                 unsigned idx;
277                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
278                         if (ctx->load_input)
279                                 ctx->load_input(ctx, idx, decl);
280                 }
281         }
282         break;
283
284         case TGSI_FILE_SYSTEM_VALUE:
285         {
286                 unsigned idx;
287                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
288                         ctx->load_system_value(ctx, idx, decl);
289                 }
290         }
291         break;
292
293         case TGSI_FILE_OUTPUT:
294         {
295                 unsigned idx;
296                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
297                         unsigned chan;
298                         assert(idx < RADEON_LLVM_MAX_OUTPUTS);
299                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
300                                 ctx->soa.outputs[idx][chan] = lp_build_alloca(&ctx->gallivm,
301                                         ctx->soa.bld_base.base.elem_type, "");
302                         }
303                 }
304
305                 ctx->output_reg_count = MAX2(ctx->output_reg_count,
306                                                          decl->Range.Last + 1);
307                 break;
308         }
309
310         default:
311                 break;
312         }
313 }
314
315 static void
316 emit_store(
317         struct lp_build_tgsi_context * bld_base,
318         const struct tgsi_full_instruction * inst,
319         const struct tgsi_opcode_info * info,
320         LLVMValueRef dst[4])
321 {
322         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
323         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
324         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
325         struct lp_build_context base = bld->bld_base.base;
326         const struct tgsi_full_dst_register *reg = &inst->Dst[0];
327         LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
328         LLVMValueRef temp_ptr;
329         unsigned chan, chan_index;
330         boolean is_vec_store = FALSE;
331
332         if (dst[0]) {
333                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
334                 is_vec_store = (k == LLVMVectorTypeKind);
335         }
336
337         if (is_vec_store) {
338                 LLVMValueRef values[4] = {};
339                 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
340                         LLVMValueRef index = lp_build_const_int32(gallivm, chan);
341                         values[chan]  = LLVMBuildExtractElement(gallivm->builder,
342                                                         dst[0], index, "");
343                 }
344                 bld_base->emit_store(bld_base, inst, info, values);
345                 return;
346         }
347
348         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
349                 LLVMValueRef value = dst[chan_index];
350
351                 if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
352                         struct lp_build_emit_data clamp_emit_data;
353
354                         memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
355                         clamp_emit_data.arg_count = 3;
356                         clamp_emit_data.args[0] = value;
357                         clamp_emit_data.args[2] = base.one;
358
359                         switch(inst->Instruction.Saturate) {
360                         case TGSI_SAT_ZERO_ONE:
361                                 clamp_emit_data.args[1] = base.zero;
362                                 break;
363                         case TGSI_SAT_MINUS_PLUS_ONE:
364                                 clamp_emit_data.args[1] = LLVMConstReal(
365                                                 base.elem_type, -1.0f);
366                                 break;
367                         default:
368                                 assert(0);
369                         }
370                         value = lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
371                                                 &clamp_emit_data);
372                 }
373
374                 if (reg->Register.File == TGSI_FILE_ADDRESS) {
375                         temp_ptr = bld->addr[reg->Register.Index][chan_index];
376                         LLVMBuildStore(builder, value, temp_ptr);
377                         continue;
378                 }
379         
380                 value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
381
382                 if (reg->Register.Indirect) {
383                         struct tgsi_declaration_range range = get_array_range(bld_base,
384                                 reg->Register.File, &reg->Indirect);
385
386                         unsigned i, size = range.Last - range.First + 1;
387                         LLVMValueRef array = LLVMBuildInsertElement(builder,
388                                 emit_array_fetch(bld_base, reg->Register.File, TGSI_TYPE_FLOAT, range, chan_index),
389                                 value,  emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First), "");
390
391                         for (i = 0; i < size; ++i) {
392                                 switch(reg->Register.File) {
393                                 case TGSI_FILE_OUTPUT:
394                                         temp_ptr = bld->outputs[i + range.First][chan_index];
395                                         break;
396
397                                 case TGSI_FILE_TEMPORARY:
398                                         if (uses_temp_indirect_addressing(bld_base))
399                                                 temp_ptr = lp_get_temp_ptr_soa(bld, i + range.First, chan_index);
400                                         else
401                                                 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
402                                         break;
403
404                                 default:
405                                         return;
406                                 }
407                                 value = LLVMBuildExtractElement(builder, array, 
408                                         lp_build_const_int32(gallivm, i), "");
409                                 LLVMBuildStore(builder, value, temp_ptr);
410                         }
411
412                 } else {
413                         switch(reg->Register.File) {
414                         case TGSI_FILE_OUTPUT:
415                                 temp_ptr = bld->outputs[reg->Register.Index][chan_index];
416                                 break;
417
418                         case TGSI_FILE_TEMPORARY:
419                                 if (uses_temp_indirect_addressing(bld_base)) {
420                                         temp_ptr = NULL;
421                                         break;
422                                 }
423                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
424                                 break;
425
426                         default:
427                                 return;
428                         }
429                         LLVMBuildStore(builder, value, temp_ptr);
430                 }
431         }
432 }
433
434 static void bgnloop_emit(
435         const struct lp_build_tgsi_action * action,
436         struct lp_build_tgsi_context * bld_base,
437         struct lp_build_emit_data * emit_data)
438 {
439         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
440         struct gallivm_state * gallivm = bld_base->base.gallivm;
441         LLVMBasicBlockRef loop_block;
442         LLVMBasicBlockRef endloop_block;
443         endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
444                                                 ctx->main_fn, "ENDLOOP");
445         loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
446                                                 endloop_block, "LOOP");
447         LLVMBuildBr(gallivm->builder, loop_block);
448         LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
449
450         if (++ctx->loop_depth > ctx->loop_depth_max) {
451                 unsigned new_max = ctx->loop_depth_max << 1;
452
453                 if (!new_max)
454                         new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
455
456                 ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
457                                     sizeof(ctx->loop[0]),
458                                     new_max * sizeof(ctx->loop[0]));
459                 ctx->loop_depth_max = new_max;
460         }
461
462         ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
463         ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
464 }
465
466 static void brk_emit(
467         const struct lp_build_tgsi_action * action,
468         struct lp_build_tgsi_context * bld_base,
469         struct lp_build_emit_data * emit_data)
470 {
471         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
472         struct gallivm_state * gallivm = bld_base->base.gallivm;
473         struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
474
475         LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
476 }
477
478 static void cont_emit(
479         const struct lp_build_tgsi_action * action,
480         struct lp_build_tgsi_context * bld_base,
481         struct lp_build_emit_data * emit_data)
482 {
483         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
484         struct gallivm_state * gallivm = bld_base->base.gallivm;
485         struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
486
487         LLVMBuildBr(gallivm->builder, current_loop->loop_block);
488 }
489
490 static void else_emit(
491         const struct lp_build_tgsi_action * action,
492         struct lp_build_tgsi_context * bld_base,
493         struct lp_build_emit_data * emit_data)
494 {
495         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
496         struct gallivm_state * gallivm = bld_base->base.gallivm;
497         struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
498         LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
499
500         /* We need to add a terminator to the current block if the previous
501          * instruction was an ENDIF.Example:
502          * IF
503          *   [code]
504          *   IF
505          *     [code]
506          *   ELSE
507          *    [code]
508          *   ENDIF <--
509          * ELSE<--
510          *   [code]
511          * ENDIF
512          */
513
514         if (current_block != current_branch->if_block) {
515                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
516         }
517         if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
518                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
519         }
520         current_branch->has_else = 1;
521         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
522 }
523
524 static void endif_emit(
525         const struct lp_build_tgsi_action * action,
526         struct lp_build_tgsi_context * bld_base,
527         struct lp_build_emit_data * emit_data)
528 {
529         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
530         struct gallivm_state * gallivm = bld_base->base.gallivm;
531         struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
532         LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
533
534         /* If we have consecutive ENDIF instructions, then the first ENDIF
535          * will not have a terminator, so we need to add one. */
536         if (current_block != current_branch->if_block
537                         && current_block != current_branch->else_block
538                         && !LLVMGetBasicBlockTerminator(current_block)) {
539
540                  LLVMBuildBr(gallivm->builder, current_branch->endif_block);
541         }
542         if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
543                 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
544                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
545         }
546
547         if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
548                 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
549                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
550         }
551
552         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
553         ctx->branch_depth--;
554 }
555
556 static void endloop_emit(
557         const struct lp_build_tgsi_action * action,
558         struct lp_build_tgsi_context * bld_base,
559         struct lp_build_emit_data * emit_data)
560 {
561         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
562         struct gallivm_state * gallivm = bld_base->base.gallivm;
563         struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
564
565         if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
566                  LLVMBuildBr(gallivm->builder, current_loop->loop_block);
567         }
568
569         LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
570         ctx->loop_depth--;
571 }
572
573 static void if_cond_emit(
574         const struct lp_build_tgsi_action * action,
575         struct lp_build_tgsi_context * bld_base,
576         struct lp_build_emit_data * emit_data,
577         LLVMValueRef cond)
578 {
579         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
580         struct gallivm_state * gallivm = bld_base->base.gallivm;
581         LLVMBasicBlockRef if_block, else_block, endif_block;
582
583         endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
584                                                 ctx->main_fn, "ENDIF");
585         if_block = LLVMInsertBasicBlockInContext(gallivm->context,
586                                                 endif_block, "IF");
587         else_block = LLVMInsertBasicBlockInContext(gallivm->context,
588                                                 endif_block, "ELSE");
589         LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
590         LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
591
592         if (++ctx->branch_depth > ctx->branch_depth_max) {
593                 unsigned new_max = ctx->branch_depth_max << 1;
594
595                 if (!new_max)
596                         new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
597
598                 ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
599                                       sizeof(ctx->branch[0]),
600                                       new_max * sizeof(ctx->branch[0]));
601                 ctx->branch_depth_max = new_max;
602         }
603
604         ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
605         ctx->branch[ctx->branch_depth - 1].if_block = if_block;
606         ctx->branch[ctx->branch_depth - 1].else_block = else_block;
607         ctx->branch[ctx->branch_depth - 1].has_else = 0;
608 }
609
610 static void if_emit(
611         const struct lp_build_tgsi_action * action,
612         struct lp_build_tgsi_context * bld_base,
613         struct lp_build_emit_data * emit_data)
614 {
615         struct gallivm_state * gallivm = bld_base->base.gallivm;
616         LLVMValueRef cond;
617
618         cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
619                         emit_data->args[0],
620                         bld_base->base.zero, "");
621
622         if_cond_emit(action, bld_base, emit_data, cond);
623 }
624
625 static void uif_emit(
626         const struct lp_build_tgsi_action * action,
627         struct lp_build_tgsi_context * bld_base,
628         struct lp_build_emit_data * emit_data)
629 {
630         struct gallivm_state * gallivm = bld_base->base.gallivm;
631         LLVMValueRef cond;
632
633         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
634                 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
635                         bld_base->int_bld.zero, "");
636
637         if_cond_emit(action, bld_base, emit_data, cond);
638 }
639
640 static void kil_emit(
641         const struct lp_build_tgsi_action * action,
642         struct lp_build_tgsi_context * bld_base,
643         struct lp_build_emit_data * emit_data)
644 {
645         unsigned i;
646         for (i = 0; i < emit_data->arg_count; i++) {
647                 emit_data->output[i] = lp_build_intrinsic_unary(
648                         bld_base->base.gallivm->builder,
649                         action->intr_name,
650                         emit_data->dst_type, emit_data->args[i]);
651         }
652 }
653
654 void radeon_llvm_emit_prepare_cube_coords(
655                 struct lp_build_tgsi_context * bld_base,
656                 struct lp_build_emit_data * emit_data,
657                 LLVMValueRef *coords_arg)
658 {
659
660         unsigned target = emit_data->inst->Texture.Texture;
661         unsigned opcode = emit_data->inst->Instruction.Opcode;
662         struct gallivm_state * gallivm = bld_base->base.gallivm;
663         LLVMBuilderRef builder = gallivm->builder;
664         LLVMTypeRef type = bld_base->base.elem_type;
665         LLVMValueRef coords[4];
666         LLVMValueRef mad_args[3];
667         LLVMValueRef idx;
668         struct LLVMOpaqueValue *cube_vec;
669         LLVMValueRef v;
670         unsigned i;
671
672         cube_vec = lp_build_gather_values(bld_base->base.gallivm, coords_arg, 4);
673         v = build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
674                             &cube_vec, 1, LLVMReadNoneAttribute);
675
676         for (i = 0; i < 4; ++i) {
677                 idx = lp_build_const_int32(gallivm, i);
678                 coords[i] = LLVMBuildExtractElement(builder, v, idx, "");
679         }
680
681         coords[2] = build_intrinsic(builder, "fabs",
682                         type, &coords[2], 1, LLVMReadNoneAttribute);
683         coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);
684
685         mad_args[1] = coords[2];
686         mad_args[2] = LLVMConstReal(type, 1.5);
687
688         mad_args[0] = coords[0];
689         coords[0] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
690                         mad_args[0], mad_args[1], mad_args[2]);
691
692         mad_args[0] = coords[1];
693         coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
694                         mad_args[0], mad_args[1], mad_args[2]);
695
696         /* apply xyz = yxw swizzle to cooords */
697         coords[2] = coords[3];
698         coords[3] = coords[1];
699         coords[1] = coords[0];
700         coords[0] = coords[3];
701
702         if (target == TGSI_TEXTURE_CUBE_ARRAY ||
703             target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
704                 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
705                 /* coords_arg.w component - array_index for cube arrays */
706                 coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
707                                                        coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
708         }
709
710         /* Preserve compare/lod/bias. Put it in coords.w. */
711         if (opcode == TGSI_OPCODE_TEX2 ||
712             opcode == TGSI_OPCODE_TXB2 ||
713             opcode == TGSI_OPCODE_TXL2) {
714                 coords[3] = coords_arg[4];
715         } else if (opcode == TGSI_OPCODE_TXB ||
716                    opcode == TGSI_OPCODE_TXL ||
717                    target == TGSI_TEXTURE_SHADOWCUBE) {
718                 coords[3] = coords_arg[3];
719         }
720
721         memcpy(coords_arg, coords, sizeof(coords));
722 }
723
724 static void txd_fetch_args(
725         struct lp_build_tgsi_context * bld_base,
726         struct lp_build_emit_data * emit_data)
727 {
728         const struct tgsi_full_instruction * inst = emit_data->inst;
729
730         LLVMValueRef coords[4];
731         unsigned chan, src;
732         for (src = 0; src < 3; src++) {
733                 for (chan = 0; chan < 4; chan++)
734                         coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
735
736                 emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
737                                 coords, 4);
738         }
739         emit_data->arg_count = 3;
740         emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
741 }
742
743
744 static void txp_fetch_args(
745         struct lp_build_tgsi_context * bld_base,
746         struct lp_build_emit_data * emit_data)
747 {
748         const struct tgsi_full_instruction * inst = emit_data->inst;
749         LLVMValueRef src_w;
750         unsigned chan;
751         LLVMValueRef coords[4];
752
753         emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
754         src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
755
756         for (chan = 0; chan < 3; chan++ ) {
757                 LLVMValueRef arg = lp_build_emit_fetch(bld_base,
758                                                 emit_data->inst, 0, chan);
759                 coords[chan] = lp_build_emit_llvm_binary(bld_base,
760                                         TGSI_OPCODE_DIV, arg, src_w);
761         }
762         coords[3] = bld_base->base.one;
763
764         if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
765              inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
766              inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
767              inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
768             inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
769             inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
770                 radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
771         }
772
773         emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
774                                                 coords, 4);
775         emit_data->arg_count = 1;
776 }
777
778 static void tex_fetch_args(
779         struct lp_build_tgsi_context * bld_base,
780         struct lp_build_emit_data * emit_data)
781 {
782         /* XXX: lp_build_swizzle_aos() was failing with wrong arg types,
783          * when we used CHAN_ALL.  We should be able to get this to work,
784          * but for now we will swizzle it ourselves
785         emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
786                                                  0, CHAN_ALL);
787
788         */
789
790         const struct tgsi_full_instruction * inst = emit_data->inst;
791
792         LLVMValueRef coords[5];
793         unsigned chan;
794         for (chan = 0; chan < 4; chan++) {
795                 coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
796         }
797
798         if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
799                 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
800                 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
801                 /* These instructions have additional operand that should be packed
802                  * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
803                  * That operand should be passed as a float value in the args array
804                  * right after the coord vector. After packing it's not used anymore,
805                  * that's why arg_count is not increased */
806                 coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
807         }
808
809         if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
810              inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
811              inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
812              inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
813             inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
814             inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
815                 radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
816         }
817
818         emit_data->arg_count = 1;
819         emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
820                                                 coords, 4);
821         emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
822 }
823
824 static void txf_fetch_args(
825         struct lp_build_tgsi_context * bld_base,
826         struct lp_build_emit_data * emit_data)
827 {
828         const struct tgsi_full_instruction * inst = emit_data->inst;
829         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
830         const struct tgsi_texture_offset * off = inst->TexOffsets;
831         LLVMTypeRef offset_type = bld_base->int_bld.elem_type;
832
833         /* fetch tex coords */
834         tex_fetch_args(bld_base, emit_data);
835
836         /* fetch tex offsets */
837         if (inst->Texture.NumOffsets) {
838                 assert(inst->Texture.NumOffsets == 1);
839
840                 emit_data->args[1] = LLVMConstBitCast(
841                         bld->immediates[off->Index][off->SwizzleX],
842                         offset_type);
843                 emit_data->args[2] = LLVMConstBitCast(
844                         bld->immediates[off->Index][off->SwizzleY],
845                         offset_type);
846                 emit_data->args[3] = LLVMConstBitCast(
847                         bld->immediates[off->Index][off->SwizzleZ],
848                         offset_type);
849         } else {
850                 emit_data->args[1] = bld_base->int_bld.zero;
851                 emit_data->args[2] = bld_base->int_bld.zero;
852                 emit_data->args[3] = bld_base->int_bld.zero;
853         }
854
855         emit_data->arg_count = 4;
856 }
857
858 static void emit_icmp(
859                 const struct lp_build_tgsi_action * action,
860                 struct lp_build_tgsi_context * bld_base,
861                 struct lp_build_emit_data * emit_data)
862 {
863         unsigned pred;
864         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
865         LLVMContextRef context = bld_base->base.gallivm->context;
866
867         switch (emit_data->inst->Instruction.Opcode) {
868         case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
869         case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
870         case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
871         case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
872         case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
873         case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
874         default:
875                 assert(!"unknown instruction");
876                 pred = 0;
877                 break;
878         }
879
880         LLVMValueRef v = LLVMBuildICmp(builder, pred,
881                         emit_data->args[0], emit_data->args[1],"");
882
883         v = LLVMBuildSExtOrBitCast(builder, v,
884                         LLVMInt32TypeInContext(context), "");
885
886         emit_data->output[emit_data->chan] = v;
887 }
888
889 static void emit_ucmp(
890                 const struct lp_build_tgsi_action * action,
891                 struct lp_build_tgsi_context * bld_base,
892                 struct lp_build_emit_data * emit_data)
893 {
894         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
895
896         LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],
897                                              bld_base->uint_bld.elem_type, "");
898
899         LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,
900                                        bld_base->uint_bld.zero, "");
901
902         emit_data->output[emit_data->chan] =
903                 LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
904 }
905
906 static void emit_cmp(
907                 const struct lp_build_tgsi_action *action,
908                 struct lp_build_tgsi_context * bld_base,
909                 struct lp_build_emit_data * emit_data)
910 {
911         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
912         LLVMRealPredicate pred;
913         LLVMValueRef cond;
914
915         /* Use ordered for everything but NE (which is usual for
916          * float comparisons)
917          */
918         switch (emit_data->inst->Instruction.Opcode) {
919         case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
920         case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
921         case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
922         case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
923         case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
924         case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
925         default: assert(!"unknown instruction"); pred = 0; break;
926         }
927
928         cond = LLVMBuildFCmp(builder,
929                 pred, emit_data->args[0], emit_data->args[1], "");
930
931         emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
932                 cond, bld_base->base.one, bld_base->base.zero, "");
933 }
934
935 static void emit_fcmp(
936                 const struct lp_build_tgsi_action *action,
937                 struct lp_build_tgsi_context * bld_base,
938                 struct lp_build_emit_data * emit_data)
939 {
940         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
941         LLVMContextRef context = bld_base->base.gallivm->context;
942         LLVMRealPredicate pred;
943
944         /* Use ordered for everything but NE (which is usual for
945          * float comparisons)
946          */
947         switch (emit_data->inst->Instruction.Opcode) {
948         case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
949         case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
950         case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
951         case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
952         default: assert(!"unknown instruction"); pred = 0; break;
953         }
954
955         LLVMValueRef v = LLVMBuildFCmp(builder, pred,
956                         emit_data->args[0], emit_data->args[1],"");
957
958         v = LLVMBuildSExtOrBitCast(builder, v,
959                         LLVMInt32TypeInContext(context), "");
960
961         emit_data->output[emit_data->chan] = v;
962 }
963
964 static void emit_not(
965                 const struct lp_build_tgsi_action * action,
966                 struct lp_build_tgsi_context * bld_base,
967                 struct lp_build_emit_data * emit_data)
968 {
969         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
970         LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
971                         emit_data->args[0]);
972         emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
973 }
974
975 static void emit_arl(
976                 const struct lp_build_tgsi_action * action,
977                 struct lp_build_tgsi_context * bld_base,
978                 struct lp_build_emit_data * emit_data)
979 {
980         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
981         LLVMValueRef floor_index =  lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
982         emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
983                         floor_index, bld_base->base.int_elem_type , "");
984 }
985
986 static void emit_and(
987                 const struct lp_build_tgsi_action * action,
988                 struct lp_build_tgsi_context * bld_base,
989                 struct lp_build_emit_data * emit_data)
990 {
991         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
992         emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
993                         emit_data->args[0], emit_data->args[1], "");
994 }
995
996 static void emit_or(
997                 const struct lp_build_tgsi_action * action,
998                 struct lp_build_tgsi_context * bld_base,
999                 struct lp_build_emit_data * emit_data)
1000 {
1001         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1002         emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1003                         emit_data->args[0], emit_data->args[1], "");
1004 }
1005
1006 static void emit_uadd(
1007                 const struct lp_build_tgsi_action * action,
1008                 struct lp_build_tgsi_context * bld_base,
1009                 struct lp_build_emit_data * emit_data)
1010 {
1011         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1012         emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
1013                         emit_data->args[0], emit_data->args[1], "");
1014 }
1015
1016 static void emit_udiv(
1017                 const struct lp_build_tgsi_action * action,
1018                 struct lp_build_tgsi_context * bld_base,
1019                 struct lp_build_emit_data * emit_data)
1020 {
1021         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1022         emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
1023                         emit_data->args[0], emit_data->args[1], "");
1024 }
1025
1026 static void emit_idiv(
1027                 const struct lp_build_tgsi_action * action,
1028                 struct lp_build_tgsi_context * bld_base,
1029                 struct lp_build_emit_data * emit_data)
1030 {
1031         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1032         emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
1033                         emit_data->args[0], emit_data->args[1], "");
1034 }
1035
1036 static void emit_mod(
1037                 const struct lp_build_tgsi_action * action,
1038                 struct lp_build_tgsi_context * bld_base,
1039                 struct lp_build_emit_data * emit_data)
1040 {
1041         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1042         emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
1043                         emit_data->args[0], emit_data->args[1], "");
1044 }
1045
1046 static void emit_umod(
1047                 const struct lp_build_tgsi_action * action,
1048                 struct lp_build_tgsi_context * bld_base,
1049                 struct lp_build_emit_data * emit_data)
1050 {
1051         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1052         emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
1053                         emit_data->args[0], emit_data->args[1], "");
1054 }
1055
1056 static void emit_shl(
1057                 const struct lp_build_tgsi_action * action,
1058                 struct lp_build_tgsi_context * bld_base,
1059                 struct lp_build_emit_data * emit_data)
1060 {
1061         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1062         emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
1063                         emit_data->args[0], emit_data->args[1], "");
1064 }
1065
1066 static void emit_ushr(
1067                 const struct lp_build_tgsi_action * action,
1068                 struct lp_build_tgsi_context * bld_base,
1069                 struct lp_build_emit_data * emit_data)
1070 {
1071         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1072         emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
1073                         emit_data->args[0], emit_data->args[1], "");
1074 }
1075 static void emit_ishr(
1076                 const struct lp_build_tgsi_action * action,
1077                 struct lp_build_tgsi_context * bld_base,
1078                 struct lp_build_emit_data * emit_data)
1079 {
1080         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1081         emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
1082                         emit_data->args[0], emit_data->args[1], "");
1083 }
1084
1085 static void emit_xor(
1086                 const struct lp_build_tgsi_action * action,
1087                 struct lp_build_tgsi_context * bld_base,
1088                 struct lp_build_emit_data * emit_data)
1089 {
1090         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1091         emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
1092                         emit_data->args[0], emit_data->args[1], "");
1093 }
1094
1095 static void emit_ssg(
1096                 const struct lp_build_tgsi_action * action,
1097                 struct lp_build_tgsi_context * bld_base,
1098                 struct lp_build_emit_data * emit_data)
1099 {
1100         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1101
1102         LLVMValueRef cmp, val;
1103
1104         if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
1105                 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
1106                 val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
1107                 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
1108                 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
1109         } else { // float SSG
1110                 cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, "");
1111                 val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
1112                 cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, "");
1113                 val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
1114         }
1115
1116         emit_data->output[emit_data->chan] = val;
1117 }
1118
1119 static void emit_ineg(
1120                 const struct lp_build_tgsi_action * action,
1121                 struct lp_build_tgsi_context * bld_base,
1122                 struct lp_build_emit_data * emit_data)
1123 {
1124         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1125         emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
1126                         emit_data->args[0], "");
1127 }
1128
1129 static void emit_f2i(
1130                 const struct lp_build_tgsi_action * action,
1131                 struct lp_build_tgsi_context * bld_base,
1132                 struct lp_build_emit_data * emit_data)
1133 {
1134         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1135         emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1136                         emit_data->args[0], bld_base->int_bld.elem_type, "");
1137 }
1138
1139 static void emit_f2u(
1140                 const struct lp_build_tgsi_action * action,
1141                 struct lp_build_tgsi_context * bld_base,
1142                 struct lp_build_emit_data * emit_data)
1143 {
1144         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1145         emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
1146                         emit_data->args[0], bld_base->uint_bld.elem_type, "");
1147 }
1148
1149 static void emit_i2f(
1150                 const struct lp_build_tgsi_action * action,
1151                 struct lp_build_tgsi_context * bld_base,
1152                 struct lp_build_emit_data * emit_data)
1153 {
1154         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1155         emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
1156                         emit_data->args[0], bld_base->base.elem_type, "");
1157 }
1158
1159 static void emit_u2f(
1160                 const struct lp_build_tgsi_action * action,
1161                 struct lp_build_tgsi_context * bld_base,
1162                 struct lp_build_emit_data * emit_data)
1163 {
1164         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1165         emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
1166                         emit_data->args[0], bld_base->base.elem_type, "");
1167 }
1168
1169 static void emit_immediate(struct lp_build_tgsi_context * bld_base,
1170                 const struct tgsi_full_immediate *imm)
1171 {
1172         unsigned i;
1173         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
1174
1175         for (i = 0; i < 4; ++i) {
1176                 ctx->soa.immediates[ctx->soa.num_immediates][i] =
1177                                 LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false   );
1178         }
1179
1180         ctx->soa.num_immediates++;
1181 }
1182
1183 LLVMValueRef
1184 build_intrinsic(LLVMBuilderRef builder,
1185                    const char *name,
1186                    LLVMTypeRef ret_type,
1187                    LLVMValueRef *args,
1188                    unsigned num_args,
1189                    LLVMAttribute attr)
1190 {
1191    LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
1192    LLVMValueRef function;
1193
1194    function = LLVMGetNamedFunction(module, name);
1195    if(!function) {
1196       LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
1197       unsigned i;
1198
1199       assert(num_args <= LP_MAX_FUNC_ARGS);
1200
1201       for(i = 0; i < num_args; ++i) {
1202          assert(args[i]);
1203          arg_types[i] = LLVMTypeOf(args[i]);
1204       }
1205
1206       function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
1207
1208       if (attr)
1209           LLVMAddFunctionAttr(function, attr);
1210    }
1211
1212    return LLVMBuildCall(builder, function, args, num_args, "");
1213 }
1214
1215 static void build_tgsi_intrinsic(
1216  const struct lp_build_tgsi_action * action,
1217  struct lp_build_tgsi_context * bld_base,
1218  struct lp_build_emit_data * emit_data,
1219  LLVMAttribute attr)
1220 {
1221    struct lp_build_context * base = &bld_base->base;
1222    emit_data->output[emit_data->chan] = build_intrinsic(
1223                base->gallivm->builder, action->intr_name,
1224                emit_data->dst_type, emit_data->args,
1225                emit_data->arg_count, attr);
1226 }
1227 void
1228 build_tgsi_intrinsic_nomem(
1229  const struct lp_build_tgsi_action * action,
1230  struct lp_build_tgsi_context * bld_base,
1231  struct lp_build_emit_data * emit_data)
1232 {
1233         build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadNoneAttribute);
1234 }
1235
1236 static void build_tgsi_intrinsic_readonly(
1237  const struct lp_build_tgsi_action * action,
1238  struct lp_build_tgsi_context * bld_base,
1239  struct lp_build_emit_data * emit_data)
1240 {
1241         build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadOnlyAttribute);
1242 }
1243
1244 void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
1245 {
1246         struct lp_type type;
1247
1248         /* Initialize the gallivm object:
1249          * We are only using the module, context, and builder fields of this struct.
1250          * This should be enough for us to be able to pass our gallivm struct to the
1251          * helper functions in the gallivm module.
1252          */
1253         memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
1254         memset(&ctx->soa, 0, sizeof(ctx->soa));
1255         ctx->gallivm.context = LLVMContextCreate();
1256         ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1257                                                 ctx->gallivm.context);
1258         ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
1259
1260         ctx->store_output_intr = "llvm.AMDGPU.store.output.";
1261         ctx->swizzle_intr = "llvm.AMDGPU.swizzle";
1262         struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
1263
1264         /* XXX: We need to revisit this.I think the correct way to do this is
1265          * to use length = 4 here and use the elem_bld for everything. */
1266         type.floating = TRUE;
1267         type.fixed = FALSE;
1268         type.sign = TRUE;
1269         type.norm = FALSE;
1270         type.width = 32;
1271         type.length = 1;
1272
1273         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1274         lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1275         lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1276
1277         bld_base->soa = 1;
1278         bld_base->emit_store = emit_store;
1279         bld_base->emit_swizzle = emit_swizzle;
1280         bld_base->emit_declaration = emit_declaration;
1281         bld_base->emit_immediate = emit_immediate;
1282
1283         bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch;
1284         bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch;
1285         bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch;
1286         bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch;
1287         bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1288
1289         /* Allocate outputs */
1290         ctx->soa.outputs = ctx->outputs;
1291
1292         ctx->num_arrays = 0;
1293
1294         /* XXX: Is there a better way to initialize all this ? */
1295
1296         lp_set_default_actions(bld_base);
1297
1298         bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_readonly;
1299         bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "fabs";
1300         bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
1301         bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
1302         bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1303         bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1304         bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_readonly;
1305         bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "ceil";
1306         bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
1307         bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
1308         bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem;
1309         bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt";
1310         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1311         bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_readonly;
1312         bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
1313         bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
1314         bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
1315         bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
1316         bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
1317         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1318         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1319         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1320         bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
1321         bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
1322         bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_readonly;
1323         bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "floor";
1324         bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem;
1325         bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction.";
1326         bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
1327         bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
1328         bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
1329         bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
1330         bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
1331         bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
1332         bld_base->op_actions[TGSI_OPCODE_IABS].emit = build_tgsi_intrinsic_nomem;
1333         bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
1334         bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
1335         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1336         bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1337         bld_base->op_actions[TGSI_OPCODE_IMAX].emit = build_tgsi_intrinsic_nomem;
1338         bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax";
1339         bld_base->op_actions[TGSI_OPCODE_IMIN].emit = build_tgsi_intrinsic_nomem;
1340         bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin";
1341         bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
1342         bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
1343         bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
1344         bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
1345         bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
1346         bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
1347         bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
1348         bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
1349         bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
1350         bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp";
1351         bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_readonly;
1352         bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
1353         bld_base->op_actions[TGSI_OPCODE_LRP].emit = build_tgsi_intrinsic_nomem;
1354         bld_base->op_actions[TGSI_OPCODE_LRP].intr_name = "llvm.AMDGPU.lrp";
1355         bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
1356         bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
1357         bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
1358         bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_readonly;
1359         bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
1360         bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
1361         bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
1362         bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
1363         bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp;
1364         bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
1365         bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_cmp;
1366         bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_cmp;
1367         bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_cmp;
1368         bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_cmp;
1369         bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_readonly;
1370         bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
1371         bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
1372         bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
1373         bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
1374         bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args;
1375         bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex";
1376         bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
1377         bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
1378         bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args;
1379         bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb";
1380         bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
1381         bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
1382         bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
1383         bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
1384         bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
1385         bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
1386         bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args;
1387         bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl";
1388         bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
1389         bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
1390         bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
1391         bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
1392         bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
1393         bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc";
1394         bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
1395         bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
1396         bld_base->op_actions[TGSI_OPCODE_UMAX].emit = build_tgsi_intrinsic_nomem;
1397         bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax";
1398         bld_base->op_actions[TGSI_OPCODE_UMIN].emit = build_tgsi_intrinsic_nomem;
1399         bld_base->op_actions[TGSI_OPCODE_UMIN].intr_name = "llvm.AMDGPU.umin";
1400         bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
1401         bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
1402         bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
1403         bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
1404         bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
1405         bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
1406         bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
1407         bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
1408         bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
1409
1410         bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem;
1411 #if HAVE_LLVM >= 0x0305
1412         bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq.clamped.f32";
1413 #else
1414         bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq";
1415 #endif
1416 }
1417
1418 void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
1419                              LLVMTypeRef *ParamTypes, unsigned ParamCount)
1420 {
1421         LLVMTypeRef main_fn_type;
1422         LLVMBasicBlockRef main_fn_body;
1423
1424         /* Setup the function */
1425         main_fn_type = LLVMFunctionType(LLVMVoidTypeInContext(ctx->gallivm.context),
1426                                         ParamTypes, ParamCount, 0);
1427         ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
1428         main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1429                         ctx->main_fn, "main_body");
1430         LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1431 }
1432
1433 void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
1434 {
1435         struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm;
1436         /* End the main function with Return*/
1437         LLVMBuildRetVoid(gallivm->builder);
1438
1439         /* Create the pass manager */
1440         ctx->gallivm.passmgr = LLVMCreateFunctionPassManagerForModule(
1441                                                         gallivm->module);
1442
1443         /* This pass should eliminate all the load and store instructions */
1444         LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1445
1446         /* Add some optimization passes */
1447         LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1448         LLVMAddLICMPass(gallivm->passmgr);
1449         LLVMAddAggressiveDCEPass(gallivm->passmgr);
1450         LLVMAddCFGSimplificationPass(gallivm->passmgr);
1451
1452         /* Run the pass */
1453         LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
1454
1455         LLVMDisposeBuilder(gallivm->builder);
1456         LLVMDisposePassManager(gallivm->passmgr);
1457
1458 }
1459
1460 void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
1461 {
1462         LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
1463         LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
1464         FREE(ctx->temps);
1465         ctx->temps = NULL;
1466         FREE(ctx->loop);
1467         ctx->loop = NULL;
1468         ctx->loop_depth_max = 0;
1469         FREE(ctx->branch);
1470         ctx->branch = NULL;
1471         ctx->branch_depth_max = 0;
1472 }