OSDN Git Service

2a5f916ccc9d075a8bdf3d04eb5a9de1fc0e658c
[android-x86/external-mesa.git] / src / amd / common / ac_nir_to_llvm.c
1 /*
2  * Copyright © 2016 Bas Nieuwenhuizen
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_util.h"
26 #include "ac_binary.h"
27 #include "sid.h"
28 #include "nir/nir.h"
29 #include "../vulkan/radv_descriptor_set.h"
30 #include "util/bitscan.h"
31 #include <llvm-c/Transforms/Scalar.h>
32
33 enum radeon_llvm_calling_convention {
34         RADEON_LLVM_AMDGPU_VS = 87,
35         RADEON_LLVM_AMDGPU_GS = 88,
36         RADEON_LLVM_AMDGPU_PS = 89,
37         RADEON_LLVM_AMDGPU_CS = 90,
38 };
39
40 #define CONST_ADDR_SPACE 2
41 #define LOCAL_ADDR_SPACE 3
42
43 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
44 #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
45
46 enum desc_type {
47         DESC_IMAGE,
48         DESC_FMASK,
49         DESC_SAMPLER,
50         DESC_BUFFER,
51 };
52
53 struct nir_to_llvm_context {
54         struct ac_llvm_context ac;
55         const struct ac_nir_compiler_options *options;
56         struct ac_shader_variant_info *shader_info;
57
58         LLVMContextRef context;
59         LLVMModuleRef module;
60         LLVMBuilderRef builder;
61         LLVMValueRef main_function;
62
63         struct hash_table *defs;
64         struct hash_table *phis;
65
66         LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
67         LLVMValueRef push_constants;
68         LLVMValueRef num_work_groups;
69         LLVMValueRef workgroup_ids;
70         LLVMValueRef local_invocation_ids;
71         LLVMValueRef tg_size;
72
73         LLVMValueRef vertex_buffers;
74         LLVMValueRef base_vertex;
75         LLVMValueRef start_instance;
76         LLVMValueRef vertex_id;
77         LLVMValueRef rel_auto_id;
78         LLVMValueRef vs_prim_id;
79         LLVMValueRef instance_id;
80
81         LLVMValueRef prim_mask;
82         LLVMValueRef sample_positions;
83         LLVMValueRef persp_sample, persp_center, persp_centroid;
84         LLVMValueRef linear_sample, linear_center, linear_centroid;
85         LLVMValueRef front_face;
86         LLVMValueRef ancillary;
87         LLVMValueRef frag_pos[4];
88
89         LLVMBasicBlockRef continue_block;
90         LLVMBasicBlockRef break_block;
91
92         LLVMTypeRef i1;
93         LLVMTypeRef i8;
94         LLVMTypeRef i16;
95         LLVMTypeRef i32;
96         LLVMTypeRef i64;
97         LLVMTypeRef v2i32;
98         LLVMTypeRef v3i32;
99         LLVMTypeRef v4i32;
100         LLVMTypeRef v8i32;
101         LLVMTypeRef f32;
102         LLVMTypeRef f16;
103         LLVMTypeRef v2f32;
104         LLVMTypeRef v4f32;
105         LLVMTypeRef v16i8;
106         LLVMTypeRef voidt;
107
108         LLVMValueRef i32zero;
109         LLVMValueRef i32one;
110         LLVMValueRef f32zero;
111         LLVMValueRef f32one;
112         LLVMValueRef v4f32empty;
113
114         unsigned range_md_kind;
115         unsigned uniform_md_kind;
116         unsigned fpmath_md_kind;
117         unsigned invariant_load_md_kind;
118         LLVMValueRef empty_md;
119         LLVMValueRef fpmath_md_2p5_ulp;
120         gl_shader_stage stage;
121
122         LLVMValueRef lds;
123         LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
124         LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
125
126         LLVMValueRef shared_memory;
127         uint64_t input_mask;
128         uint64_t output_mask;
129         int num_locals;
130         LLVMValueRef *locals;
131         bool has_ddxy;
132         unsigned num_clips;
133         unsigned num_culls;
134
135         bool has_ds_bpermute;
136 };
137
138 struct ac_tex_info {
139         LLVMValueRef args[12];
140         int arg_count;
141         LLVMTypeRef dst_type;
142         bool has_offset;
143 };
144
145 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
146                                      nir_deref_var *deref,
147                                      enum desc_type desc_type);
148 static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
149 {
150         return (index * 4) + chan;
151 }
152
153 static unsigned llvm_get_type_size(LLVMTypeRef type)
154 {
155         LLVMTypeKind kind = LLVMGetTypeKind(type);
156
157         switch (kind) {
158         case LLVMIntegerTypeKind:
159                 return LLVMGetIntTypeWidth(type) / 8;
160         case LLVMFloatTypeKind:
161                 return 4;
162         case LLVMPointerTypeKind:
163                 return 8;
164         case LLVMVectorTypeKind:
165                 return LLVMGetVectorSize(type) *
166                        llvm_get_type_size(LLVMGetElementType(type));
167         default:
168                 assert(0);
169                 return 0;
170         }
171 }
172
173 static void set_llvm_calling_convention(LLVMValueRef func,
174                                         gl_shader_stage stage)
175 {
176         enum radeon_llvm_calling_convention calling_conv;
177
178         switch (stage) {
179         case MESA_SHADER_VERTEX:
180         case MESA_SHADER_TESS_CTRL:
181         case MESA_SHADER_TESS_EVAL:
182                 calling_conv = RADEON_LLVM_AMDGPU_VS;
183                 break;
184         case MESA_SHADER_GEOMETRY:
185                 calling_conv = RADEON_LLVM_AMDGPU_GS;
186                 break;
187         case MESA_SHADER_FRAGMENT:
188                 calling_conv = RADEON_LLVM_AMDGPU_PS;
189                 break;
190         case MESA_SHADER_COMPUTE:
191                 calling_conv = RADEON_LLVM_AMDGPU_CS;
192                 break;
193         default:
194                 unreachable("Unhandle shader type");
195         }
196
197         LLVMSetFunctionCallConv(func, calling_conv);
198 }
199
200 static LLVMValueRef
201 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
202                      LLVMBuilderRef builder, LLVMTypeRef *return_types,
203                      unsigned num_return_elems, LLVMTypeRef *param_types,
204                      unsigned param_count, unsigned array_params_mask,
205                      unsigned sgpr_params, bool unsafe_math)
206 {
207         LLVMTypeRef main_function_type, ret_type;
208         LLVMBasicBlockRef main_function_body;
209
210         if (num_return_elems)
211                 ret_type = LLVMStructTypeInContext(ctx, return_types,
212                                                    num_return_elems, true);
213         else
214                 ret_type = LLVMVoidTypeInContext(ctx);
215
216         /* Setup the function */
217         main_function_type =
218             LLVMFunctionType(ret_type, param_types, param_count, 0);
219         LLVMValueRef main_function =
220             LLVMAddFunction(module, "main", main_function_type);
221         main_function_body =
222             LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
223         LLVMPositionBuilderAtEnd(builder, main_function_body);
224
225         LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
226         for (unsigned i = 0; i < sgpr_params; ++i) {
227                 if (array_params_mask & (1 << i)) {
228                         LLVMValueRef P = LLVMGetParam(main_function, i);
229                         ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_BYVAL);
230                         ac_add_attr_dereferenceable(P, UINT64_MAX);
231                 }
232                 else {
233                         ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_INREG);
234                 }
235         }
236
237         if (unsafe_math) {
238                 /* These were copied from some LLVM test. */
239                 LLVMAddTargetDependentFunctionAttr(main_function,
240                                                    "less-precise-fpmad",
241                                                    "true");
242                 LLVMAddTargetDependentFunctionAttr(main_function,
243                                                    "no-infs-fp-math",
244                                                    "true");
245                 LLVMAddTargetDependentFunctionAttr(main_function,
246                                                    "no-nans-fp-math",
247                                                    "true");
248                 LLVMAddTargetDependentFunctionAttr(main_function,
249                                                    "unsafe-fp-math",
250                                                    "true");
251         }
252         return main_function;
253 }
254
255 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
256 {
257         return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
258                                CONST_ADDR_SPACE);
259 }
260
261 static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
262                                           int idx,
263                                           LLVMTypeRef type)
264 {
265         LLVMValueRef offset;
266         LLVMValueRef ptr;
267         int addr_space;
268
269         offset = LLVMConstInt(ctx->i32, idx, false);
270
271         ptr = ctx->shared_memory;
272         ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
273         addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
274         ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
275         return ptr;
276 }
277
278 static LLVMValueRef to_integer(struct nir_to_llvm_context *ctx, LLVMValueRef v)
279 {
280         LLVMTypeRef type = LLVMTypeOf(v);
281         if (type == ctx->f32) {
282                 return LLVMBuildBitCast(ctx->builder, v, ctx->i32, "");
283         } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
284                 LLVMTypeRef elem_type = LLVMGetElementType(type);
285                 if (elem_type == ctx->f32) {
286                         LLVMTypeRef nt = LLVMVectorType(ctx->i32, LLVMGetVectorSize(type));
287                         return LLVMBuildBitCast(ctx->builder, v, nt, "");
288                 }
289         }
290         return v;
291 }
292
293 static LLVMValueRef to_float(struct nir_to_llvm_context *ctx, LLVMValueRef v)
294 {
295         LLVMTypeRef type = LLVMTypeOf(v);
296         if (type == ctx->i32) {
297                 return LLVMBuildBitCast(ctx->builder, v, ctx->f32, "");
298         } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
299                 LLVMTypeRef elem_type = LLVMGetElementType(type);
300                 if (elem_type == ctx->i32) {
301                         LLVMTypeRef nt = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type));
302                         return LLVMBuildBitCast(ctx->builder, v, nt, "");
303                 }
304         }
305         return v;
306 }
307
308 static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx,
309                                  LLVMValueRef param, unsigned rshift,
310                                  unsigned bitwidth)
311 {
312         LLVMValueRef value = param;
313         if (rshift)
314                 value = LLVMBuildLShr(ctx->builder, value,
315                                       LLVMConstInt(ctx->i32, rshift, false), "");
316
317         if (rshift + bitwidth < 32) {
318                 unsigned mask = (1 << bitwidth) - 1;
319                 value = LLVMBuildAnd(ctx->builder, value,
320                                      LLVMConstInt(ctx->i32, mask, false), "");
321         }
322         return value;
323 }
324
325 static LLVMValueRef build_gep0(struct nir_to_llvm_context *ctx,
326                                LLVMValueRef base_ptr, LLVMValueRef index)
327 {
328         LLVMValueRef indices[2] = {
329                 ctx->i32zero,
330                 index,
331         };
332         return LLVMBuildGEP(ctx->builder, base_ptr,
333                             indices, 2, "");
334 }
335
336 static LLVMValueRef build_indexed_load(struct nir_to_llvm_context *ctx,
337                                        LLVMValueRef base_ptr, LLVMValueRef index,
338                                        bool uniform)
339 {
340         LLVMValueRef pointer;
341         pointer = build_gep0(ctx, base_ptr, index);
342         if (uniform)
343                 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
344         return LLVMBuildLoad(ctx->builder, pointer, "");
345 }
346
347 static LLVMValueRef build_indexed_load_const(struct nir_to_llvm_context *ctx,
348                                              LLVMValueRef base_ptr, LLVMValueRef index)
349 {
350         LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
351         LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
352         return result;
353 }
354
355 static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs)
356 {
357         ud_info->sgpr_idx = sgpr_idx;
358         ud_info->num_sgprs = num_sgprs;
359         ud_info->indirect = false;
360         ud_info->indirect_offset = 0;
361 }
362
363 static void set_userdata_location_shader(struct nir_to_llvm_context *ctx,
364                                          int idx, uint8_t sgpr_idx, uint8_t num_sgprs)
365 {
366         set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs);
367 }
368
369 #if 0
370 static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs,
371                                            uint32_t indirect_offset)
372 {
373         ud_info->sgpr_idx = sgpr_idx;
374         ud_info->num_sgprs = num_sgprs;
375         ud_info->indirect = true;
376         ud_info->indirect_offset = indirect_offset;
377 }
378 #endif
379
380 static void create_function(struct nir_to_llvm_context *ctx)
381 {
382         LLVMTypeRef arg_types[23];
383         unsigned arg_idx = 0;
384         unsigned array_params_mask = 0;
385         unsigned sgpr_count = 0, user_sgpr_count;
386         unsigned i;
387         unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
388         unsigned user_sgpr_idx;
389         bool need_push_constants;
390
391         need_push_constants = true;
392         if (!ctx->options->layout)
393                 need_push_constants = false;
394         else if (!ctx->options->layout->push_constant_size &&
395                  !ctx->options->layout->dynamic_offset_count)
396                 need_push_constants = false;
397
398         /* 1 for each descriptor set */
399         for (unsigned i = 0; i < num_sets; ++i) {
400                 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
401                         array_params_mask |= (1 << arg_idx);
402                         arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
403                 }
404         }
405
406         if (need_push_constants) {
407                 /* 1 for push constants and dynamic descriptors */
408                 array_params_mask |= (1 << arg_idx);
409                 arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
410         }
411
412         switch (ctx->stage) {
413         case MESA_SHADER_COMPUTE:
414                 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */
415                 user_sgpr_count = arg_idx;
416                 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
417                 arg_types[arg_idx++] = ctx->i32;
418                 sgpr_count = arg_idx;
419
420                 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
421                 break;
422         case MESA_SHADER_VERTEX:
423                 arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* vertex buffers */
424                 arg_types[arg_idx++] = ctx->i32; // base vertex
425                 arg_types[arg_idx++] = ctx->i32; // start instance
426                 user_sgpr_count = sgpr_count = arg_idx;
427                 arg_types[arg_idx++] = ctx->i32; // vertex id
428                 arg_types[arg_idx++] = ctx->i32; // rel auto id
429                 arg_types[arg_idx++] = ctx->i32; // vs prim id
430                 arg_types[arg_idx++] = ctx->i32; // instance id
431                 break;
432         case MESA_SHADER_FRAGMENT:
433                 arg_types[arg_idx++] = const_array(ctx->f32, 32); /* sample positions */
434                 user_sgpr_count = arg_idx;
435                 arg_types[arg_idx++] = ctx->i32; /* prim mask */
436                 sgpr_count = arg_idx;
437                 arg_types[arg_idx++] = ctx->v2i32; /* persp sample */
438                 arg_types[arg_idx++] = ctx->v2i32; /* persp center */
439                 arg_types[arg_idx++] = ctx->v2i32; /* persp centroid */
440                 arg_types[arg_idx++] = ctx->v3i32; /* persp pull model */
441                 arg_types[arg_idx++] = ctx->v2i32; /* linear sample */
442                 arg_types[arg_idx++] = ctx->v2i32; /* linear center */
443                 arg_types[arg_idx++] = ctx->v2i32; /* linear centroid */
444                 arg_types[arg_idx++] = ctx->f32;  /* line stipple tex */
445                 arg_types[arg_idx++] = ctx->f32;  /* pos x float */
446                 arg_types[arg_idx++] = ctx->f32;  /* pos y float */
447                 arg_types[arg_idx++] = ctx->f32;  /* pos z float */
448                 arg_types[arg_idx++] = ctx->f32;  /* pos w float */
449                 arg_types[arg_idx++] = ctx->i32;  /* front face */
450                 arg_types[arg_idx++] = ctx->i32;  /* ancillary */
451                 arg_types[arg_idx++] = ctx->f32;  /* sample coverage */
452                 arg_types[arg_idx++] = ctx->i32;  /* fixed pt */
453                 break;
454         default:
455                 unreachable("Shader stage not implemented");
456         }
457
458         ctx->main_function = create_llvm_function(
459             ctx->context, ctx->module, ctx->builder, NULL, 0, arg_types,
460             arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math);
461         set_llvm_calling_convention(ctx->main_function, ctx->stage);
462
463
464         ctx->shader_info->num_input_sgprs = 0;
465         ctx->shader_info->num_input_vgprs = 0;
466
467         for (i = 0; i < user_sgpr_count; i++)
468                 ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4;
469
470         ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs;
471         for (; i < sgpr_count; i++)
472                 ctx->shader_info->num_input_sgprs += llvm_get_type_size(arg_types[i]) / 4;
473
474         if (ctx->stage != MESA_SHADER_FRAGMENT)
475                 for (; i < arg_idx; ++i)
476                         ctx->shader_info->num_input_vgprs += llvm_get_type_size(arg_types[i]) / 4;
477
478         arg_idx = 0;
479         user_sgpr_idx = 0;
480         for (unsigned i = 0; i < num_sets; ++i) {
481                 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
482                         set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
483                         user_sgpr_idx += 2;
484                         ctx->descriptor_sets[i] =
485                                 LLVMGetParam(ctx->main_function, arg_idx++);
486                 } else
487                         ctx->descriptor_sets[i] = NULL;
488         }
489
490         if (need_push_constants) {
491                 ctx->push_constants = LLVMGetParam(ctx->main_function, arg_idx++);
492                 set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
493                 user_sgpr_idx += 2;
494         }
495
496         switch (ctx->stage) {
497         case MESA_SHADER_COMPUTE:
498                 set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3);
499                 user_sgpr_idx += 3;
500                 ctx->num_work_groups =
501                     LLVMGetParam(ctx->main_function, arg_idx++);
502                 ctx->workgroup_ids =
503                     LLVMGetParam(ctx->main_function, arg_idx++);
504                 ctx->tg_size =
505                     LLVMGetParam(ctx->main_function, arg_idx++);
506                 ctx->local_invocation_ids =
507                     LLVMGetParam(ctx->main_function, arg_idx++);
508                 break;
509         case MESA_SHADER_VERTEX:
510                 set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx, 2);
511                 user_sgpr_idx += 2;
512                 ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++);
513                 set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, 2);
514                 user_sgpr_idx += 2;
515                 ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++);
516                 ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++);
517                 ctx->vertex_id = LLVMGetParam(ctx->main_function, arg_idx++);
518                 ctx->rel_auto_id = LLVMGetParam(ctx->main_function, arg_idx++);
519                 ctx->vs_prim_id = LLVMGetParam(ctx->main_function, arg_idx++);
520                 ctx->instance_id = LLVMGetParam(ctx->main_function, arg_idx++);
521                 break;
522         case MESA_SHADER_FRAGMENT:
523                 set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS, user_sgpr_idx, 2);
524                 user_sgpr_idx += 2;
525                 ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++);
526                 ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++);
527                 ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++);
528                 ctx->persp_center = LLVMGetParam(ctx->main_function, arg_idx++);
529                 ctx->persp_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
530                 arg_idx++;
531                 ctx->linear_sample = LLVMGetParam(ctx->main_function, arg_idx++);
532                 ctx->linear_center = LLVMGetParam(ctx->main_function, arg_idx++);
533                 ctx->linear_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
534                 arg_idx++; /* line stipple */
535                 ctx->frag_pos[0] = LLVMGetParam(ctx->main_function, arg_idx++);
536                 ctx->frag_pos[1] = LLVMGetParam(ctx->main_function, arg_idx++);
537                 ctx->frag_pos[2] = LLVMGetParam(ctx->main_function, arg_idx++);
538                 ctx->frag_pos[3] = LLVMGetParam(ctx->main_function, arg_idx++);
539                 ctx->front_face = LLVMGetParam(ctx->main_function, arg_idx++);
540                 ctx->ancillary = LLVMGetParam(ctx->main_function, arg_idx++);
541                 break;
542         default:
543                 unreachable("Shader stage not implemented");
544         }
545 }
546
547 static void setup_types(struct nir_to_llvm_context *ctx)
548 {
549         LLVMValueRef args[4];
550
551         ctx->voidt = LLVMVoidTypeInContext(ctx->context);
552         ctx->i1 = LLVMIntTypeInContext(ctx->context, 1);
553         ctx->i8 = LLVMIntTypeInContext(ctx->context, 8);
554         ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
555         ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
556         ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
557         ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
558         ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
559         ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
560         ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
561         ctx->f32 = LLVMFloatTypeInContext(ctx->context);
562         ctx->f16 = LLVMHalfTypeInContext(ctx->context);
563         ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
564         ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
565         ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
566
567         ctx->i32zero = LLVMConstInt(ctx->i32, 0, false);
568         ctx->i32one = LLVMConstInt(ctx->i32, 1, false);
569         ctx->f32zero = LLVMConstReal(ctx->f32, 0.0);
570         ctx->f32one = LLVMConstReal(ctx->f32, 1.0);
571
572         args[0] = ctx->f32zero;
573         args[1] = ctx->f32zero;
574         args[2] = ctx->f32zero;
575         args[3] = ctx->f32one;
576         ctx->v4f32empty = LLVMConstVector(args, 4);
577
578         ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
579                                                       "range", 5);
580         ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
581                                                                "invariant.load", 14);
582         ctx->uniform_md_kind =
583             LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
584         ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
585
586         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
587
588         args[0] = LLVMConstReal(ctx->f32, 2.5);
589         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
590 }
591
592 static int get_llvm_num_components(LLVMValueRef value)
593 {
594         LLVMTypeRef type = LLVMTypeOf(value);
595         unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
596                                       ? LLVMGetVectorSize(type)
597                                       : 1;
598         return num_components;
599 }
600
601 static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx,
602                                       LLVMValueRef value,
603                                       int index)
604 {
605         int count = get_llvm_num_components(value);
606
607         assert(index < count);
608         if (count == 1)
609                 return value;
610
611         return LLVMBuildExtractElement(ctx->builder, value,
612                                        LLVMConstInt(ctx->i32, index, false), "");
613 }
614
615 static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx,
616                                 LLVMValueRef value, unsigned count)
617 {
618         unsigned num_components = get_llvm_num_components(value);
619         if (count == num_components)
620                 return value;
621
622         LLVMValueRef masks[] = {
623             LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
624             LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
625
626         if (count == 1)
627                 return LLVMBuildExtractElement(ctx->builder, value, masks[0],
628                                                "");
629
630         LLVMValueRef swizzle = LLVMConstVector(masks, count);
631         return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
632 }
633
634 static LLVMValueRef
635 build_gather_values_extended(struct nir_to_llvm_context *ctx,
636                              LLVMValueRef *values,
637                              unsigned value_count,
638                              unsigned value_stride,
639                              bool load)
640 {
641         LLVMBuilderRef builder = ctx->builder;
642         LLVMValueRef vec;
643         unsigned i;
644
645
646         if (value_count == 1) {
647                 if (load)
648                         return LLVMBuildLoad(builder, values[0], "");
649                 return values[0];
650         } else if (!value_count)
651                 unreachable("value_count is 0");
652
653         for (i = 0; i < value_count; i++) {
654                 LLVMValueRef value = values[i * value_stride];
655                 if (load)
656                         value = LLVMBuildLoad(builder, value, "");
657
658                 if (!i)
659                         vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
660                 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
661                 vec = LLVMBuildInsertElement(builder, vec, value, index, "");
662         }
663         return vec;
664 }
665
666
667 static void
668 build_store_values_extended(struct nir_to_llvm_context *ctx,
669                              LLVMValueRef *values,
670                              unsigned value_count,
671                              unsigned value_stride,
672                              LLVMValueRef vec)
673 {
674         LLVMBuilderRef builder = ctx->builder;
675         unsigned i;
676
677         if (value_count == 1) {
678                 LLVMBuildStore(builder, vec, values[0]);
679                 return;
680         }
681
682         for (i = 0; i < value_count; i++) {
683                 LLVMValueRef ptr = values[i * value_stride];
684                 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
685                 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
686                 LLVMBuildStore(builder, value, ptr);
687         }
688 }
689
690 static LLVMValueRef
691 build_gather_values(struct nir_to_llvm_context *ctx,
692                     LLVMValueRef *values,
693                     unsigned value_count)
694 {
695         return build_gather_values_extended(ctx, values, value_count, 1, false);
696 }
697
698 static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx,
699                                 nir_ssa_def *def)
700 {
701         LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size);
702         if (def->num_components > 1) {
703                 type = LLVMVectorType(type, def->num_components);
704         }
705         return type;
706 }
707
708 static LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src)
709 {
710         assert(src.is_ssa);
711         struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa);
712         return (LLVMValueRef)entry->data;
713 }
714
715
716 static LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx,
717                                    struct nir_block *b)
718 {
719         struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b);
720         return (LLVMBasicBlockRef)entry->data;
721 }
722
723 static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx,
724                                 nir_alu_src src,
725                                 unsigned num_components)
726 {
727         LLVMValueRef value = get_src(ctx, src.src);
728         bool need_swizzle = false;
729
730         assert(value);
731         LLVMTypeRef type = LLVMTypeOf(value);
732         unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
733                                       ? LLVMGetVectorSize(type)
734                                       : 1;
735
736         for (unsigned i = 0; i < num_components; ++i) {
737                 assert(src.swizzle[i] < src_components);
738                 if (src.swizzle[i] != i)
739                         need_swizzle = true;
740         }
741
742         if (need_swizzle || num_components != src_components) {
743                 LLVMValueRef masks[] = {
744                     LLVMConstInt(ctx->i32, src.swizzle[0], false),
745                     LLVMConstInt(ctx->i32, src.swizzle[1], false),
746                     LLVMConstInt(ctx->i32, src.swizzle[2], false),
747                     LLVMConstInt(ctx->i32, src.swizzle[3], false)};
748
749                 if (src_components > 1 && num_components == 1) {
750                         value = LLVMBuildExtractElement(ctx->builder, value,
751                                                         masks[0], "");
752                 } else if (src_components == 1 && num_components > 1) {
753                         LLVMValueRef values[] = {value, value, value, value};
754                         value = build_gather_values(ctx, values, num_components);
755                 } else {
756                         LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
757                         value = LLVMBuildShuffleVector(ctx->builder, value, value,
758                                                        swizzle, "");
759                 }
760         }
761         assert(!src.negate);
762         assert(!src.abs);
763         return value;
764 }
765
766 static LLVMValueRef emit_int_cmp(struct nir_to_llvm_context *ctx,
767                                  LLVMIntPredicate pred, LLVMValueRef src0,
768                                  LLVMValueRef src1)
769 {
770         LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
771         return LLVMBuildSelect(ctx->builder, result,
772                                LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
773                                LLVMConstInt(ctx->i32, 0, false), "");
774 }
775
776 static LLVMValueRef emit_float_cmp(struct nir_to_llvm_context *ctx,
777                                    LLVMRealPredicate pred, LLVMValueRef src0,
778                                    LLVMValueRef src1)
779 {
780         LLVMValueRef result;
781         src0 = to_float(ctx, src0);
782         src1 = to_float(ctx, src1);
783         result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
784         return LLVMBuildSelect(ctx->builder, result,
785                                LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
786                                LLVMConstInt(ctx->i32, 0, false), "");
787 }
788
789 static LLVMValueRef emit_intrin_1f_param(struct nir_to_llvm_context *ctx,
790                                          const char *intrin,
791                                          LLVMValueRef src0)
792 {
793         LLVMValueRef params[] = {
794                 to_float(ctx, src0),
795         };
796         return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 1, AC_FUNC_ATTR_READNONE);
797 }
798
799 static LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx,
800                                        const char *intrin,
801                                        LLVMValueRef src0, LLVMValueRef src1)
802 {
803         LLVMValueRef params[] = {
804                 to_float(ctx, src0),
805                 to_float(ctx, src1),
806         };
807         return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 2, AC_FUNC_ATTR_READNONE);
808 }
809
810 static LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx,
811                                          const char *intrin,
812                                          LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
813 {
814         LLVMValueRef params[] = {
815                 to_float(ctx, src0),
816                 to_float(ctx, src1),
817                 to_float(ctx, src2),
818         };
819         return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 3, AC_FUNC_ATTR_READNONE);
820 }
821
822 static LLVMValueRef emit_bcsel(struct nir_to_llvm_context *ctx,
823                                LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
824 {
825         LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
826                                        ctx->i32zero, "");
827         return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
828 }
829
830 static LLVMValueRef emit_find_lsb(struct nir_to_llvm_context *ctx,
831                                   LLVMValueRef src0)
832 {
833         LLVMValueRef params[2] = {
834                 src0,
835
836                 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
837                  * add special code to check for x=0. The reason is that
838                  * the LLVM behavior for x=0 is different from what we
839                  * need here.
840                  *
841                  * The hardware already implements the correct behavior.
842                  */
843                 LLVMConstInt(ctx->i32, 1, false),
844         };
845         return ac_emit_llvm_intrinsic(&ctx->ac, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
846 }
847
848 static LLVMValueRef emit_ifind_msb(struct nir_to_llvm_context *ctx,
849                                    LLVMValueRef src0)
850 {
851         LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.flbit.i32",
852                                                ctx->i32, &src0, 1,
853                                                AC_FUNC_ATTR_READNONE);
854
855         /* The HW returns the last bit index from MSB, but NIR wants
856          * the index from LSB. Invert it by doing "31 - msb". */
857         msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
858                            msb, "");
859
860         LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
861         LLVMValueRef cond = LLVMBuildOr(ctx->builder,
862                                         LLVMBuildICmp(ctx->builder, LLVMIntEQ,
863                                                       src0, ctx->i32zero, ""),
864                                         LLVMBuildICmp(ctx->builder, LLVMIntEQ,
865                                                       src0, all_ones, ""), "");
866
867         return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
868 }
869
870 static LLVMValueRef emit_ufind_msb(struct nir_to_llvm_context *ctx,
871                                    LLVMValueRef src0)
872 {
873         LLVMValueRef args[2] = {
874                 src0,
875                 ctx->i32one,
876         };
877         LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctlz.i32",
878                                                ctx->i32, args, ARRAY_SIZE(args),
879                                                AC_FUNC_ATTR_READNONE);
880
881         /* The HW returns the last bit index from MSB, but NIR wants
882          * the index from LSB. Invert it by doing "31 - msb". */
883         msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
884                            msb, "");
885
886         return LLVMBuildSelect(ctx->builder,
887                                LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0,
888                                              ctx->i32zero, ""),
889                                LLVMConstInt(ctx->i32, -1, true), msb, "");
890 }
891
892 static LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx,
893                                     LLVMIntPredicate pred,
894                                     LLVMValueRef src0, LLVMValueRef src1)
895 {
896         return LLVMBuildSelect(ctx->builder,
897                                LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
898                                src0,
899                                src1, "");
900
901 }
902 static LLVMValueRef emit_iabs(struct nir_to_llvm_context *ctx,
903                               LLVMValueRef src0)
904 {
905         return emit_minmax_int(ctx, LLVMIntSGT, src0,
906                                LLVMBuildNeg(ctx->builder, src0, ""));
907 }
908
909 static LLVMValueRef emit_fsign(struct nir_to_llvm_context *ctx,
910                                LLVMValueRef src0)
911 {
912         LLVMValueRef cmp, val;
913
914         cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32zero, "");
915         val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32one, src0, "");
916         cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32zero, "");
917         val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), "");
918         return val;
919 }
920
921 static LLVMValueRef emit_isign(struct nir_to_llvm_context *ctx,
922                                LLVMValueRef src0)
923 {
924         LLVMValueRef cmp, val;
925
926         cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32zero, "");
927         val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32one, src0, "");
928         cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32zero, "");
929         val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), "");
930         return val;
931 }
932
933 static LLVMValueRef emit_ffract(struct nir_to_llvm_context *ctx,
934                                 LLVMValueRef src0)
935 {
936         const char *intr = "llvm.floor.f32";
937         LLVMValueRef fsrc0 = to_float(ctx, src0);
938         LLVMValueRef params[] = {
939                 fsrc0,
940         };
941         LLVMValueRef floor = ac_emit_llvm_intrinsic(&ctx->ac, intr,
942                                                  ctx->f32, params, 1,
943                                                  AC_FUNC_ATTR_READNONE);
944         return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
945 }
946
947 static LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx,
948                                     const char *intrin,
949                                     LLVMValueRef src0, LLVMValueRef src1)
950 {
951         LLVMTypeRef ret_type;
952         LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
953         LLVMValueRef res;
954         LLVMValueRef params[] = { src0, src1 };
955         ret_type = LLVMStructTypeInContext(ctx->context, types,
956                                            2, true);
957
958         res = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ret_type,
959                                   params, 2, AC_FUNC_ATTR_READNONE);
960
961         res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
962         res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
963         return res;
964 }
965
966 static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
967                              LLVMValueRef src0)
968 {
969         return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
970 }
971
972 static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
973                                    LLVMValueRef src0, LLVMValueRef src1)
974 {
975         LLVMValueRef dst64, result;
976         src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
977         src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
978
979         dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
980         dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
981         result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
982         return result;
983 }
984
985 static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
986                                    LLVMValueRef src0, LLVMValueRef src1)
987 {
988         LLVMValueRef dst64, result;
989         src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
990         src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
991
992         dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
993         dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
994         result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
995         return result;
996 }
997
998 static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
999                                           const char *intrin,
1000                                           LLVMValueRef srcs[3])
1001 {
1002         LLVMValueRef result;
1003         LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
1004         result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE);
1005
1006         result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
1007         return result;
1008 }
1009
1010 static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
1011                                          LLVMValueRef src0, LLVMValueRef src1,
1012                                          LLVMValueRef src2, LLVMValueRef src3)
1013 {
1014         LLVMValueRef bfi_args[3], result;
1015
1016         bfi_args[0] = LLVMBuildShl(ctx->builder,
1017                                    LLVMBuildSub(ctx->builder,
1018                                                 LLVMBuildShl(ctx->builder,
1019                                                              ctx->i32one,
1020                                                              src3, ""),
1021                                                 ctx->i32one, ""),
1022                                    src2, "");
1023         bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
1024         bfi_args[2] = src0;
1025
1026         LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
1027
1028         /* Calculate:
1029          *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1030          * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1031          */
1032         result = LLVMBuildXor(ctx->builder, bfi_args[2],
1033                               LLVMBuildAnd(ctx->builder, bfi_args[0],
1034                                            LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
1035
1036         result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
1037         return result;
1038 }
1039
1040 static LLVMValueRef emit_pack_half_2x16(struct nir_to_llvm_context *ctx,
1041                                         LLVMValueRef src0)
1042 {
1043         LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1044         int i;
1045         LLVMValueRef comp[2];
1046
1047         src0 = to_float(ctx, src0);
1048         comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, "");
1049         comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, "");
1050         for (i = 0; i < 2; i++) {
1051                 comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
1052                 comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
1053                 comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
1054         }
1055
1056         comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
1057         comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
1058
1059         return comp[0];
1060 }
1061
1062 static LLVMValueRef emit_unpack_half_2x16(struct nir_to_llvm_context *ctx,
1063                                           LLVMValueRef src0)
1064 {
1065         LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1066         LLVMValueRef temps[2], result, val;
1067         int i;
1068
1069         for (i = 0; i < 2; i++) {
1070                 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
1071                 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
1072                 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
1073                 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
1074         }
1075
1076         result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
1077                                         ctx->i32zero, "");
1078         result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
1079                                         ctx->i32one, "");
1080         return result;
1081 }
1082
1083 /**
1084  * Set range metadata on an instruction.  This can only be used on load and
1085  * call instructions.  If you know an instruction can only produce the values
1086  * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
1087  * \p lo is the minimum value inclusive.
1088  * \p hi is the maximum value exclusive.
1089  */
1090 static void set_range_metadata(struct nir_to_llvm_context *ctx,
1091                                LLVMValueRef value, unsigned lo, unsigned hi)
1092 {
1093         LLVMValueRef range_md, md_args[2];
1094         LLVMTypeRef type = LLVMTypeOf(value);
1095         LLVMContextRef context = LLVMGetTypeContext(type);
1096
1097         md_args[0] = LLVMConstInt(type, lo, false);
1098         md_args[1] = LLVMConstInt(type, hi, false);
1099         range_md = LLVMMDNodeInContext(context, md_args, 2);
1100         LLVMSetMetadata(value, ctx->range_md_kind, range_md);
1101 }
1102
1103 static LLVMValueRef get_thread_id(struct nir_to_llvm_context *ctx)
1104 {
1105         LLVMValueRef tid;
1106         LLVMValueRef tid_args[2];
1107         tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
1108         tid_args[1] = ctx->i32zero;
1109         tid_args[1] = ac_emit_llvm_intrinsic(&ctx->ac,
1110                                           "llvm.amdgcn.mbcnt.lo", ctx->i32,
1111                                           tid_args, 2, AC_FUNC_ATTR_READNONE);
1112
1113         tid = ac_emit_llvm_intrinsic(&ctx->ac,
1114                                   "llvm.amdgcn.mbcnt.hi", ctx->i32,
1115                                   tid_args, 2, AC_FUNC_ATTR_READNONE);
1116         set_range_metadata(ctx, tid, 0, 64);
1117         return tid;
1118 }
1119
1120 /*
1121  * SI implements derivatives using the local data store (LDS)
1122  * All writes to the LDS happen in all executing threads at
1123  * the same time. TID is the Thread ID for the current
1124  * thread and is a value between 0 and 63, representing
1125  * the thread's position in the wavefront.
1126  *
1127  * For the pixel shader threads are grouped into quads of four pixels.
1128  * The TIDs of the pixels of a quad are:
1129  *
1130  *  +------+------+
1131  *  |4n + 0|4n + 1|
1132  *  +------+------+
1133  *  |4n + 2|4n + 3|
1134  *  +------+------+
1135  *
1136  * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
1137  * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
1138  * the current pixel's column, and masking with 0xfffffffe yields the TID
1139  * of the left pixel of the current pixel's row.
1140  *
1141  * Adding 1 yields the TID of the pixel to the right of the left pixel, and
1142  * adding 2 yields the TID of the pixel below the top pixel.
1143  */
1144 /* masks for thread ID. */
1145 #define TID_MASK_TOP_LEFT 0xfffffffc
1146 #define TID_MASK_TOP      0xfffffffd
1147 #define TID_MASK_LEFT     0xfffffffe
1148 static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
1149                               nir_op op,
1150                               LLVMValueRef src0)
1151 {
1152         LLVMValueRef tl, trbl, result;
1153         LLVMValueRef tl_tid, trbl_tid;
1154         LLVMValueRef args[2];
1155         LLVMValueRef thread_id;
1156         unsigned mask;
1157         int idx;
1158         ctx->has_ddxy = true;
1159
1160         if (!ctx->lds && !ctx->has_ds_bpermute)
1161                 ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
1162                                                        LLVMArrayType(ctx->i32, 64),
1163                                                        "ddxy_lds", LOCAL_ADDR_SPACE);
1164
1165         thread_id = get_thread_id(ctx);
1166         if (op == nir_op_fddx_fine || op == nir_op_fddx)
1167                 mask = TID_MASK_LEFT;
1168         else if (op == nir_op_fddy_fine || op == nir_op_fddy)
1169                 mask = TID_MASK_TOP;
1170         else
1171                 mask = TID_MASK_TOP_LEFT;
1172
1173         tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
1174                               LLVMConstInt(ctx->i32, mask, false), "");
1175         /* for DDX we want to next X pixel, DDY next Y pixel. */
1176         if (op == nir_op_fddx_fine ||
1177             op == nir_op_fddx_coarse ||
1178             op == nir_op_fddx)
1179                 idx = 1;
1180         else
1181                 idx = 2;
1182
1183         trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
1184                                 LLVMConstInt(ctx->i32, idx, false), "");
1185
1186         if (ctx->has_ds_bpermute) {
1187                 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
1188                                        LLVMConstInt(ctx->i32, 4, false), "");
1189                 args[1] = src0;
1190                 tl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute",
1191                                          ctx->i32, args, 2,
1192                                          AC_FUNC_ATTR_READNONE);
1193
1194                 args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
1195                                        LLVMConstInt(ctx->i32, 4, false), "");
1196                 trbl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute",
1197                                            ctx->i32, args, 2,
1198                                            AC_FUNC_ATTR_READNONE);
1199         } else {
1200                 LLVMValueRef store_ptr, load_ptr0, load_ptr1;
1201
1202                 store_ptr = build_gep0(ctx, ctx->lds, thread_id);
1203                 load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
1204                 load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
1205
1206                 LLVMBuildStore(ctx->builder, src0, store_ptr);
1207                 tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
1208                 trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
1209         }
1210         tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
1211         trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
1212         result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
1213         return result;
1214 }
1215
1216 /*
1217  * this takes an I,J coordinate pair,
1218  * and works out the X and Y derivatives.
1219  * it returns DDX(I), DDX(J), DDY(I), DDY(J).
1220  */
1221 static LLVMValueRef emit_ddxy_interp(
1222         struct nir_to_llvm_context *ctx,
1223         LLVMValueRef interp_ij)
1224 {
1225         LLVMValueRef result[4], a;
1226         unsigned i;
1227
1228         for (i = 0; i < 2; i++) {
1229                 a = LLVMBuildExtractElement(ctx->builder, interp_ij,
1230                                             LLVMConstInt(ctx->i32, i, false), "");
1231                 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
1232                 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
1233         }
1234         return build_gather_values(ctx, result, 4);
1235 }
1236
1237 static LLVMValueRef emit_fdiv(struct nir_to_llvm_context *ctx,
1238                               LLVMValueRef num,
1239                               LLVMValueRef den)
1240 {
1241         LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
1242
1243         if (!LLVMIsConstant(ret))
1244                 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
1245         return ret;
1246 }
1247
1248 static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
1249 {
1250         LLVMValueRef src[4], result = NULL;
1251         unsigned num_components = instr->dest.dest.ssa.num_components;
1252         unsigned src_components;
1253
1254         assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1255         switch (instr->op) {
1256         case nir_op_vec2:
1257         case nir_op_vec3:
1258         case nir_op_vec4:
1259                 src_components = 1;
1260                 break;
1261         case nir_op_pack_half_2x16:
1262                 src_components = 2;
1263                 break;
1264         case nir_op_unpack_half_2x16:
1265                 src_components = 1;
1266                 break;
1267         default:
1268                 src_components = num_components;
1269                 break;
1270         }
1271         for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1272                 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1273
1274         switch (instr->op) {
1275         case nir_op_fmov:
1276         case nir_op_imov:
1277                 result = src[0];
1278                 break;
1279         case nir_op_fneg:
1280                 src[0] = to_float(ctx, src[0]);
1281                 result = LLVMBuildFNeg(ctx->builder, src[0], "");
1282                 break;
1283         case nir_op_ineg:
1284                 result = LLVMBuildNeg(ctx->builder, src[0], "");
1285                 break;
1286         case nir_op_inot:
1287                 result = LLVMBuildNot(ctx->builder, src[0], "");
1288                 break;
1289         case nir_op_iadd:
1290                 result = LLVMBuildAdd(ctx->builder, src[0], src[1], "");
1291                 break;
1292         case nir_op_fadd:
1293                 src[0] = to_float(ctx, src[0]);
1294                 src[1] = to_float(ctx, src[1]);
1295                 result = LLVMBuildFAdd(ctx->builder, src[0], src[1], "");
1296                 break;
1297         case nir_op_fsub:
1298                 src[0] = to_float(ctx, src[0]);
1299                 src[1] = to_float(ctx, src[1]);
1300                 result = LLVMBuildFSub(ctx->builder, src[0], src[1], "");
1301                 break;
1302         case nir_op_isub:
1303                 result = LLVMBuildSub(ctx->builder, src[0], src[1], "");
1304                 break;
1305         case nir_op_imul:
1306                 result = LLVMBuildMul(ctx->builder, src[0], src[1], "");
1307                 break;
1308         case nir_op_imod:
1309                 result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
1310                 break;
1311         case nir_op_umod:
1312                 result = LLVMBuildURem(ctx->builder, src[0], src[1], "");
1313                 break;
1314         case nir_op_fmod:
1315                 src[0] = to_float(ctx, src[0]);
1316                 src[1] = to_float(ctx, src[1]);
1317                 result = emit_fdiv(ctx, src[0], src[1]);
1318                 result = emit_intrin_1f_param(ctx, "llvm.floor.f32", result);
1319                 result = LLVMBuildFMul(ctx->builder, src[1] , result, "");
1320                 result = LLVMBuildFSub(ctx->builder, src[0], result, "");
1321                 break;
1322         case nir_op_frem:
1323                 src[0] = to_float(ctx, src[0]);
1324                 src[1] = to_float(ctx, src[1]);
1325                 result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
1326                 break;
1327         case nir_op_idiv:
1328                 result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
1329                 break;
1330         case nir_op_udiv:
1331                 result = LLVMBuildUDiv(ctx->builder, src[0], src[1], "");
1332                 break;
1333         case nir_op_fmul:
1334                 src[0] = to_float(ctx, src[0]);
1335                 src[1] = to_float(ctx, src[1]);
1336                 result = LLVMBuildFMul(ctx->builder, src[0], src[1], "");
1337                 break;
1338         case nir_op_fdiv:
1339                 src[0] = to_float(ctx, src[0]);
1340                 src[1] = to_float(ctx, src[1]);
1341                 result = emit_fdiv(ctx, src[0], src[1]);
1342                 break;
1343         case nir_op_frcp:
1344                 src[0] = to_float(ctx, src[0]);
1345                 result = emit_fdiv(ctx, ctx->f32one, src[0]);
1346                 break;
1347         case nir_op_iand:
1348                 result = LLVMBuildAnd(ctx->builder, src[0], src[1], "");
1349                 break;
1350         case nir_op_ior:
1351                 result = LLVMBuildOr(ctx->builder, src[0], src[1], "");
1352                 break;
1353         case nir_op_ixor:
1354                 result = LLVMBuildXor(ctx->builder, src[0], src[1], "");
1355                 break;
1356         case nir_op_ishl:
1357                 result = LLVMBuildShl(ctx->builder, src[0], src[1], "");
1358                 break;
1359         case nir_op_ishr:
1360                 result = LLVMBuildAShr(ctx->builder, src[0], src[1], "");
1361                 break;
1362         case nir_op_ushr:
1363                 result = LLVMBuildLShr(ctx->builder, src[0], src[1], "");
1364                 break;
1365         case nir_op_ilt:
1366                 result = emit_int_cmp(ctx, LLVMIntSLT, src[0], src[1]);
1367                 break;
1368         case nir_op_ine:
1369                 result = emit_int_cmp(ctx, LLVMIntNE, src[0], src[1]);
1370                 break;
1371         case nir_op_ieq:
1372                 result = emit_int_cmp(ctx, LLVMIntEQ, src[0], src[1]);
1373                 break;
1374         case nir_op_ige:
1375                 result = emit_int_cmp(ctx, LLVMIntSGE, src[0], src[1]);
1376                 break;
1377         case nir_op_ult:
1378                 result = emit_int_cmp(ctx, LLVMIntULT, src[0], src[1]);
1379                 break;
1380         case nir_op_uge:
1381                 result = emit_int_cmp(ctx, LLVMIntUGE, src[0], src[1]);
1382                 break;
1383         case nir_op_feq:
1384                 result = emit_float_cmp(ctx, LLVMRealUEQ, src[0], src[1]);
1385                 break;
1386         case nir_op_fne:
1387                 result = emit_float_cmp(ctx, LLVMRealUNE, src[0], src[1]);
1388                 break;
1389         case nir_op_flt:
1390                 result = emit_float_cmp(ctx, LLVMRealULT, src[0], src[1]);
1391                 break;
1392         case nir_op_fge:
1393                 result = emit_float_cmp(ctx, LLVMRealUGE, src[0], src[1]);
1394                 break;
1395         case nir_op_fabs:
1396                 result = emit_intrin_1f_param(ctx, "llvm.fabs.f32", src[0]);
1397                 break;
1398         case nir_op_iabs:
1399                 result = emit_iabs(ctx, src[0]);
1400                 break;
1401         case nir_op_imax:
1402                 result = emit_minmax_int(ctx, LLVMIntSGT, src[0], src[1]);
1403                 break;
1404         case nir_op_imin:
1405                 result = emit_minmax_int(ctx, LLVMIntSLT, src[0], src[1]);
1406                 break;
1407         case nir_op_umax:
1408                 result = emit_minmax_int(ctx, LLVMIntUGT, src[0], src[1]);
1409                 break;
1410         case nir_op_umin:
1411                 result = emit_minmax_int(ctx, LLVMIntULT, src[0], src[1]);
1412                 break;
1413         case nir_op_isign:
1414                 result = emit_isign(ctx, src[0]);
1415                 break;
1416         case nir_op_fsign:
1417                 src[0] = to_float(ctx, src[0]);
1418                 result = emit_fsign(ctx, src[0]);
1419                 break;
1420         case nir_op_ffloor:
1421                 result = emit_intrin_1f_param(ctx, "llvm.floor.f32", src[0]);
1422                 break;
1423         case nir_op_ftrunc:
1424                 result = emit_intrin_1f_param(ctx, "llvm.trunc.f32", src[0]);
1425                 break;
1426         case nir_op_fceil:
1427                 result = emit_intrin_1f_param(ctx, "llvm.ceil.f32", src[0]);
1428                 break;
1429         case nir_op_fround_even:
1430                 result = emit_intrin_1f_param(ctx, "llvm.rint.f32", src[0]);
1431                 break;
1432         case nir_op_ffract:
1433                 result = emit_ffract(ctx, src[0]);
1434                 break;
1435         case nir_op_fsin:
1436                 result = emit_intrin_1f_param(ctx, "llvm.sin.f32", src[0]);
1437                 break;
1438         case nir_op_fcos:
1439                 result = emit_intrin_1f_param(ctx, "llvm.cos.f32", src[0]);
1440                 break;
1441         case nir_op_fsqrt:
1442                 result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1443                 break;
1444         case nir_op_fexp2:
1445                 result = emit_intrin_1f_param(ctx, "llvm.exp2.f32", src[0]);
1446                 break;
1447         case nir_op_flog2:
1448                 result = emit_intrin_1f_param(ctx, "llvm.log2.f32", src[0]);
1449                 break;
1450         case nir_op_frsq:
1451                 result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1452                 result = emit_fdiv(ctx, ctx->f32one, result);
1453                 break;
1454         case nir_op_fpow:
1455                 result = emit_intrin_2f_param(ctx, "llvm.pow.f32", src[0], src[1]);
1456                 break;
1457         case nir_op_fmax:
1458                 result = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", src[0], src[1]);
1459                 break;
1460         case nir_op_fmin:
1461                 result = emit_intrin_2f_param(ctx, "llvm.minnum.f32", src[0], src[1]);
1462                 break;
1463         case nir_op_ffma:
1464                 result = emit_intrin_3f_param(ctx, "llvm.fma.f32", src[0], src[1], src[2]);
1465                 break;
1466         case nir_op_ibitfield_extract:
1467                 result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
1468                 break;
1469         case nir_op_ubitfield_extract:
1470                 result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
1471                 break;
1472         case nir_op_bitfield_insert:
1473                 result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
1474                 break;
1475         case nir_op_bitfield_reverse:
1476                 result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1477                 break;
1478         case nir_op_bit_count:
1479                 result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1480                 break;
1481         case nir_op_vec2:
1482         case nir_op_vec3:
1483         case nir_op_vec4:
1484                 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1485                         src[i] = to_integer(ctx, src[i]);
1486                 result = build_gather_values(ctx, src, num_components);
1487                 break;
1488         case nir_op_f2i:
1489                 src[0] = to_float(ctx, src[0]);
1490                 result = LLVMBuildFPToSI(ctx->builder, src[0], ctx->i32, "");
1491                 break;
1492         case nir_op_f2u:
1493                 src[0] = to_float(ctx, src[0]);
1494                 result = LLVMBuildFPToUI(ctx->builder, src[0], ctx->i32, "");
1495                 break;
1496         case nir_op_i2f:
1497                 result = LLVMBuildSIToFP(ctx->builder, src[0], ctx->f32, "");
1498                 break;
1499         case nir_op_u2f:
1500                 result = LLVMBuildUIToFP(ctx->builder, src[0], ctx->f32, "");
1501                 break;
1502         case nir_op_bcsel:
1503                 result = emit_bcsel(ctx, src[0], src[1], src[2]);
1504                 break;
1505         case nir_op_find_lsb:
1506                 result = emit_find_lsb(ctx, src[0]);
1507                 break;
1508         case nir_op_ufind_msb:
1509                 result = emit_ufind_msb(ctx, src[0]);
1510                 break;
1511         case nir_op_ifind_msb:
1512                 result = emit_ifind_msb(ctx, src[0]);
1513                 break;
1514         case nir_op_uadd_carry:
1515                 result = emit_uint_carry(ctx, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1516                 break;
1517         case nir_op_usub_borrow:
1518                 result = emit_uint_carry(ctx, "llvm.usub.with.overflow.i32", src[0], src[1]);
1519                 break;
1520         case nir_op_b2f:
1521                 result = emit_b2f(ctx, src[0]);
1522                 break;
1523         case nir_op_fquantize2f16:
1524                 src[0] = to_float(ctx, src[0]);
1525                 result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
1526                 /* need to convert back up to f32 */
1527                 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
1528                 break;
1529         case nir_op_umul_high:
1530                 result = emit_umul_high(ctx, src[0], src[1]);
1531                 break;
1532         case nir_op_imul_high:
1533                 result = emit_imul_high(ctx, src[0], src[1]);
1534                 break;
1535         case nir_op_pack_half_2x16:
1536                 result = emit_pack_half_2x16(ctx, src[0]);
1537                 break;
1538         case nir_op_unpack_half_2x16:
1539                 result = emit_unpack_half_2x16(ctx, src[0]);
1540                 break;
1541         case nir_op_fddx:
1542         case nir_op_fddy:
1543         case nir_op_fddx_fine:
1544         case nir_op_fddy_fine:
1545         case nir_op_fddx_coarse:
1546         case nir_op_fddy_coarse:
1547                 result = emit_ddxy(ctx, instr->op, src[0]);
1548                 break;
1549         default:
1550                 fprintf(stderr, "Unknown NIR alu instr: ");
1551                 nir_print_instr(&instr->instr, stderr);
1552                 fprintf(stderr, "\n");
1553                 abort();
1554         }
1555
1556         if (result) {
1557                 assert(instr->dest.dest.is_ssa);
1558                 result = to_integer(ctx, result);
1559                 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
1560                                         result);
1561         }
1562 }
1563
1564 static void visit_load_const(struct nir_to_llvm_context *ctx,
1565                              nir_load_const_instr *instr)
1566 {
1567         LLVMValueRef values[4], value = NULL;
1568         LLVMTypeRef element_type =
1569             LLVMIntTypeInContext(ctx->context, instr->def.bit_size);
1570
1571         for (unsigned i = 0; i < instr->def.num_components; ++i) {
1572                 switch (instr->def.bit_size) {
1573                 case 32:
1574                         values[i] = LLVMConstInt(element_type,
1575                                                  instr->value.u32[i], false);
1576                         break;
1577                 case 64:
1578                         values[i] = LLVMConstInt(element_type,
1579                                                  instr->value.u64[i], false);
1580                         break;
1581                 default:
1582                         fprintf(stderr,
1583                                 "unsupported nir load_const bit_size: %d\n",
1584                                 instr->def.bit_size);
1585                         abort();
1586                 }
1587         }
1588         if (instr->def.num_components > 1) {
1589                 value = LLVMConstVector(values, instr->def.num_components);
1590         } else
1591                 value = values[0];
1592
1593         _mesa_hash_table_insert(ctx->defs, &instr->def, value);
1594 }
1595
1596 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
1597                              LLVMTypeRef type)
1598 {
1599         int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
1600         return LLVMBuildBitCast(ctx->builder, ptr,
1601                                 LLVMPointerType(type, addr_space), "");
1602 }
1603
1604 static LLVMValueRef
1605 get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements)
1606 {
1607         LLVMValueRef size =
1608                 LLVMBuildExtractElement(ctx->builder, descriptor,
1609                                         LLVMConstInt(ctx->i32, 2, false), "");
1610
1611         /* VI only */
1612         if (ctx->options->chip_class >= VI && in_elements) {
1613                 /* On VI, the descriptor contains the size in bytes,
1614                  * but TXQ must return the size in elements.
1615                  * The stride is always non-zero for resources using TXQ.
1616                  */
1617                 LLVMValueRef stride =
1618                         LLVMBuildExtractElement(ctx->builder, descriptor,
1619                                                 LLVMConstInt(ctx->i32, 1, false), "");
1620                 stride = LLVMBuildLShr(ctx->builder, stride,
1621                                        LLVMConstInt(ctx->i32, 16, false), "");
1622                 stride = LLVMBuildAnd(ctx->builder, stride,
1623                                       LLVMConstInt(ctx->i32, 0x3fff, false), "");
1624
1625                 size = LLVMBuildUDiv(ctx->builder, size, stride, "");
1626         }
1627         return size;
1628 }
1629
1630 /**
1631  * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
1632  * intrinsic names).
1633  */
1634 static void build_int_type_name(
1635         LLVMTypeRef type,
1636         char *buf, unsigned bufsize)
1637 {
1638         assert(bufsize >= 6);
1639
1640         if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
1641                 snprintf(buf, bufsize, "v%ui32",
1642                          LLVMGetVectorSize(type));
1643         else
1644                 strcpy(buf, "i32");
1645 }
1646
1647 static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
1648                                                struct ac_tex_info *tinfo,
1649                                                nir_tex_instr *instr,
1650                                                const char *intr_name,
1651                                                unsigned coord_vgpr_index)
1652 {
1653         LLVMValueRef coord = tinfo->args[0];
1654         LLVMValueRef half_texel[2];
1655         int c;
1656
1657         //TODO Rect
1658         {
1659                 LLVMValueRef txq_args[10];
1660                 int txq_arg_count = 0;
1661                 LLVMValueRef size;
1662                 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
1663                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, false);
1664                 txq_args[txq_arg_count++] = tinfo->args[1];
1665                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */
1666                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */
1667                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
1668                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
1669                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
1670                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
1671                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
1672                 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
1673                 size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
1674                                            txq_args, txq_arg_count,
1675                                            AC_FUNC_ATTR_READNONE);
1676
1677                 for (c = 0; c < 2; c++) {
1678                         half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
1679                                                                 LLVMConstInt(ctx->i32, c, false), "");
1680                         half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1681                         half_texel[c] = emit_fdiv(ctx, ctx->f32one, half_texel[c]);
1682                         half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
1683                                                       LLVMConstReal(ctx->f32, -0.5), "");
1684                 }
1685         }
1686
1687         for (c = 0; c < 2; c++) {
1688                 LLVMValueRef tmp;
1689                 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
1690                 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
1691                 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1692                 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1693                 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1694                 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
1695         }
1696
1697         tinfo->args[0] = coord;
1698         return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1699                                    AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
1700
1701 }
1702
1703 static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
1704                                         nir_tex_instr *instr,
1705                                         struct ac_tex_info *tinfo)
1706 {
1707         const char *name = "llvm.SI.image.sample";
1708         const char *infix = "";
1709         char intr_name[127];
1710         char type[64];
1711         bool is_shadow = instr->is_shadow;
1712         bool has_offset = tinfo->has_offset;
1713         switch (instr->op) {
1714         case nir_texop_txf:
1715         case nir_texop_txf_ms:
1716         case nir_texop_samples_identical:
1717                 name = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? "llvm.SI.image.load" :
1718                        instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? "llvm.SI.vs.load.input" :
1719                         "llvm.SI.image.load.mip";
1720                 is_shadow = false;
1721                 has_offset = false;
1722                 break;
1723         case nir_texop_txb:
1724                 infix = ".b";
1725                 break;
1726         case nir_texop_txl:
1727                 infix = ".l";
1728                 break;
1729         case nir_texop_txs:
1730                 name = "llvm.SI.getresinfo";
1731                 break;
1732         case nir_texop_query_levels:
1733                 name = "llvm.SI.getresinfo";
1734                 break;
1735         case nir_texop_tex:
1736                 if (ctx->stage != MESA_SHADER_FRAGMENT)
1737                         infix = ".lz";
1738                 break;
1739         case nir_texop_txd:
1740                 infix = ".d";
1741                 break;
1742         case nir_texop_tg4:
1743                 name = "llvm.SI.gather4";
1744                 infix = ".lz";
1745                 break;
1746         case nir_texop_lod:
1747                 name = "llvm.SI.getlod";
1748                 is_shadow = false;
1749                 has_offset = false;
1750                 break;
1751         default:
1752                 break;
1753         }
1754
1755         build_int_type_name(LLVMTypeOf(tinfo->args[0]), type, sizeof(type));
1756         sprintf(intr_name, "%s%s%s%s.%s", name, is_shadow ? ".c" : "", infix,
1757                 has_offset ? ".o" : "", type);
1758
1759         if (instr->op == nir_texop_tg4) {
1760                 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
1761                 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
1762                         return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name,
1763                                                           (int)has_offset + (int)is_shadow);
1764                 }
1765         }
1766         return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1767                                    AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
1768
1769 }
1770
1771 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
1772                                                 nir_intrinsic_instr *instr)
1773 {
1774         LLVMValueRef index = get_src(ctx, instr->src[0]);
1775         unsigned desc_set = nir_intrinsic_desc_set(instr);
1776         unsigned binding = nir_intrinsic_binding(instr);
1777         LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
1778         struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
1779         unsigned base_offset = layout->binding[binding].offset;
1780         LLVMValueRef offset, stride;
1781
1782         if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
1783             layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
1784                 desc_ptr = ctx->push_constants;
1785                 base_offset = ctx->options->layout->push_constant_size;
1786                 base_offset +=  16 * layout->binding[binding].dynamic_offset_offset;
1787                 stride = LLVMConstInt(ctx->i32, 16, false);
1788         } else
1789                 stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false);
1790
1791         offset = LLVMConstInt(ctx->i32, base_offset, false);
1792         index = LLVMBuildMul(ctx->builder, index, stride, "");
1793         offset = LLVMBuildAdd(ctx->builder, offset, index, "");
1794         
1795         desc_ptr = build_gep0(ctx, desc_ptr, offset);
1796         desc_ptr = cast_ptr(ctx, desc_ptr, ctx->v4i32);
1797         LLVMSetMetadata(desc_ptr, ctx->uniform_md_kind, ctx->empty_md);
1798
1799         return LLVMBuildLoad(ctx->builder, desc_ptr, "");
1800 }
1801
1802 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
1803                                              nir_intrinsic_instr *instr)
1804 {
1805         LLVMValueRef ptr;
1806
1807         ptr = build_gep0(ctx, ctx->push_constants, get_src(ctx, instr->src[0]));
1808         ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa));
1809
1810         return LLVMBuildLoad(ctx->builder, ptr, "");
1811 }
1812
1813 static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx,
1814                                           nir_intrinsic_instr *instr)
1815 {
1816         LLVMValueRef desc = get_src(ctx, instr->src[0]);
1817
1818         return get_buffer_size(ctx, desc, false);
1819 }
1820 static void visit_store_ssbo(struct nir_to_llvm_context *ctx,
1821                              nir_intrinsic_instr *instr)
1822 {
1823         const char *store_name;
1824         LLVMTypeRef data_type = ctx->f32;
1825         unsigned writemask = nir_intrinsic_write_mask(instr);
1826         LLVMValueRef base_data, base_offset;
1827         LLVMValueRef params[6];
1828
1829         if (ctx->stage == MESA_SHADER_FRAGMENT)
1830                 ctx->shader_info->fs.writes_memory = true;
1831
1832         params[1] = get_src(ctx, instr->src[1]);
1833         params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
1834         params[4] = LLVMConstInt(ctx->i1, 0, false);  /* glc */
1835         params[5] = LLVMConstInt(ctx->i1, 0, false);  /* slc */
1836
1837         if (instr->num_components > 1)
1838                 data_type = LLVMVectorType(ctx->f32, instr->num_components);
1839
1840         base_data = to_float(ctx, get_src(ctx, instr->src[0]));
1841         base_data = trim_vector(ctx, base_data, instr->num_components);
1842         base_data = LLVMBuildBitCast(ctx->builder, base_data,
1843                                      data_type, "");
1844         base_offset = get_src(ctx, instr->src[2]);      /* voffset */
1845         while (writemask) {
1846                 int start, count;
1847                 LLVMValueRef data;
1848                 LLVMValueRef offset;
1849                 LLVMValueRef tmp;
1850                 u_bit_scan_consecutive_range(&writemask, &start, &count);
1851
1852                 /* Due to an LLVM limitation, split 3-element writes
1853                  * into a 2-element and a 1-element write. */
1854                 if (count == 3) {
1855                         writemask |= 1 << (start + 2);
1856                         count = 2;
1857                 }
1858
1859                 if (count == 4) {
1860                         store_name = "llvm.amdgcn.buffer.store.v4f32";
1861                         data = base_data;
1862                 } else if (count == 2) {
1863                         tmp = LLVMBuildExtractElement(ctx->builder,
1864                                                       base_data, LLVMConstInt(ctx->i32, start, false), "");
1865                         data = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), tmp,
1866                                                       ctx->i32zero, "");
1867
1868                         tmp = LLVMBuildExtractElement(ctx->builder,
1869                                                       base_data, LLVMConstInt(ctx->i32, start + 1, false), "");
1870                         data = LLVMBuildInsertElement(ctx->builder, data, tmp,
1871                                                       ctx->i32one, "");
1872                         store_name = "llvm.amdgcn.buffer.store.v2f32";
1873
1874                 } else {
1875                         assert(count == 1);
1876                         if (get_llvm_num_components(base_data) > 1)
1877                                 data = LLVMBuildExtractElement(ctx->builder, base_data,
1878                                                                LLVMConstInt(ctx->i32, start, false), "");
1879                         else
1880                                 data = base_data;
1881                         store_name = "llvm.amdgcn.buffer.store.f32";
1882                 }
1883
1884                 offset = base_offset;
1885                 if (start != 0) {
1886                         offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, start * 4, false), "");
1887                 }
1888                 params[0] = data;
1889                 params[3] = offset;
1890                 ac_emit_llvm_intrinsic(&ctx->ac, store_name,
1891                                     LLVMVoidTypeInContext(ctx->context), params, 6, 0);
1892         }
1893 }
1894
1895 static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx,
1896                                       nir_intrinsic_instr *instr)
1897 {
1898         const char *name;
1899         LLVMValueRef params[6];
1900         int arg_count = 0;
1901         if (ctx->stage == MESA_SHADER_FRAGMENT)
1902                 ctx->shader_info->fs.writes_memory = true;
1903
1904         if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
1905                 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[3]), 0);
1906         }
1907         params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[2]), 0);
1908         params[arg_count++] = get_src(ctx, instr->src[0]);
1909         params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
1910         params[arg_count++] = get_src(ctx, instr->src[1]);      /* voffset */
1911         params[arg_count++] = LLVMConstInt(ctx->i1, 0, false);  /* slc */
1912
1913         switch (instr->intrinsic) {
1914         case nir_intrinsic_ssbo_atomic_add:
1915                 name = "llvm.amdgcn.buffer.atomic.add";
1916                 break;
1917         case nir_intrinsic_ssbo_atomic_imin:
1918                 name = "llvm.amdgcn.buffer.atomic.smin";
1919                 break;
1920         case nir_intrinsic_ssbo_atomic_umin:
1921                 name = "llvm.amdgcn.buffer.atomic.umin";
1922                 break;
1923         case nir_intrinsic_ssbo_atomic_imax:
1924                 name = "llvm.amdgcn.buffer.atomic.smax";
1925                 break;
1926         case nir_intrinsic_ssbo_atomic_umax:
1927                 name = "llvm.amdgcn.buffer.atomic.umax";
1928                 break;
1929         case nir_intrinsic_ssbo_atomic_and:
1930                 name = "llvm.amdgcn.buffer.atomic.and";
1931                 break;
1932         case nir_intrinsic_ssbo_atomic_or:
1933                 name = "llvm.amdgcn.buffer.atomic.or";
1934                 break;
1935         case nir_intrinsic_ssbo_atomic_xor:
1936                 name = "llvm.amdgcn.buffer.atomic.xor";
1937                 break;
1938         case nir_intrinsic_ssbo_atomic_exchange:
1939                 name = "llvm.amdgcn.buffer.atomic.swap";
1940                 break;
1941         case nir_intrinsic_ssbo_atomic_comp_swap:
1942                 name = "llvm.amdgcn.buffer.atomic.cmpswap";
1943                 break;
1944         default:
1945                 abort();
1946         }
1947
1948         return ac_emit_llvm_intrinsic(&ctx->ac, name, ctx->i32, params, arg_count, 0);
1949 }
1950
1951 static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx,
1952                                       nir_intrinsic_instr *instr)
1953 {
1954         const char *load_name;
1955         LLVMTypeRef data_type = ctx->f32;
1956         if (instr->num_components == 3)
1957                 data_type = LLVMVectorType(ctx->f32, 4);
1958         else if (instr->num_components > 1)
1959                 data_type = LLVMVectorType(ctx->f32, instr->num_components);
1960
1961         if (instr->num_components == 4 || instr->num_components == 3)
1962                 load_name = "llvm.amdgcn.buffer.load.v4f32";
1963         else if (instr->num_components == 2)
1964                 load_name = "llvm.amdgcn.buffer.load.v2f32";
1965         else if (instr->num_components == 1)
1966                 load_name = "llvm.amdgcn.buffer.load.f32";
1967         else
1968                 abort();
1969
1970         LLVMValueRef params[] = {
1971             get_src(ctx, instr->src[0]),
1972             LLVMConstInt(ctx->i32, 0, false),
1973             get_src(ctx, instr->src[1]),
1974             LLVMConstInt(ctx->i1, 0, false),
1975             LLVMConstInt(ctx->i1, 0, false),
1976         };
1977
1978         LLVMValueRef ret =
1979             ac_emit_llvm_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
1980
1981         if (instr->num_components == 3)
1982                 ret = trim_vector(ctx, ret, 3);
1983
1984         return LLVMBuildBitCast(ctx->builder, ret,
1985                                 get_def_type(ctx, &instr->dest.ssa), "");
1986 }
1987
1988 static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
1989                                           nir_intrinsic_instr *instr)
1990 {
1991         LLVMValueRef results[4], ret;
1992         LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
1993         LLVMValueRef offset = get_src(ctx, instr->src[1]);
1994
1995         rsrc = LLVMBuildBitCast(ctx->builder, rsrc, LLVMVectorType(ctx->i8, 16), "");
1996
1997         for (unsigned i = 0; i < instr->num_components; ++i) {
1998                 LLVMValueRef params[] = {
1999                         rsrc,
2000                         LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
2001                                      offset, "")
2002                 };
2003                 results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32,
2004                                                  params, 2, AC_FUNC_ATTR_READNONE);
2005         }
2006
2007
2008         ret = build_gather_values(ctx, results, instr->num_components);
2009         return LLVMBuildBitCast(ctx->builder, ret,
2010                                 get_def_type(ctx, &instr->dest.ssa), "");
2011 }
2012
2013 static void
2014 radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
2015                       bool vs_in, unsigned *const_out, LLVMValueRef *indir_out)
2016 {
2017         unsigned const_offset = 0;
2018         LLVMValueRef offset = NULL;
2019
2020
2021         while (tail->child != NULL) {
2022                 const struct glsl_type *parent_type = tail->type;
2023                 tail = tail->child;
2024
2025                 if (tail->deref_type == nir_deref_type_array) {
2026                         nir_deref_array *deref_array = nir_deref_as_array(tail);
2027                         LLVMValueRef index, stride, local_offset;
2028                         unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
2029
2030                         const_offset += size * deref_array->base_offset;
2031                         if (deref_array->deref_array_type == nir_deref_array_type_direct)
2032                                 continue;
2033
2034                         assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
2035                         index = get_src(ctx, deref_array->indirect);
2036                         stride = LLVMConstInt(ctx->i32, size, 0);
2037                         local_offset = LLVMBuildMul(ctx->builder, stride, index, "");
2038
2039                         if (offset)
2040                                 offset = LLVMBuildAdd(ctx->builder, offset, local_offset, "");
2041                         else
2042                                 offset = local_offset;
2043                 } else if (tail->deref_type == nir_deref_type_struct) {
2044                         nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
2045
2046                         for (unsigned i = 0; i < deref_struct->index; i++) {
2047                                 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
2048                                 const_offset += glsl_count_attribute_slots(ft, vs_in);
2049                         }
2050                 } else
2051                         unreachable("unsupported deref type");
2052
2053         }
2054
2055         if (const_offset && offset)
2056                 offset = LLVMBuildAdd(ctx->builder, offset,
2057                                       LLVMConstInt(ctx->i32, const_offset, 0),
2058                                       "");
2059
2060         *const_out = const_offset;
2061         *indir_out = offset;
2062 }
2063
2064 static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
2065                                    nir_intrinsic_instr *instr)
2066 {
2067         LLVMValueRef values[4];
2068         int idx = instr->variables[0]->var->data.driver_location;
2069         int ve = instr->dest.ssa.num_components;
2070         LLVMValueRef indir_index;
2071         unsigned const_index;
2072         switch (instr->variables[0]->var->data.mode) {
2073         case nir_var_shader_in:
2074                 radv_get_deref_offset(ctx, &instr->variables[0]->deref,
2075                                       ctx->stage == MESA_SHADER_VERTEX,
2076                                       &const_index, &indir_index);
2077                 for (unsigned chan = 0; chan < ve; chan++) {
2078                         if (indir_index) {
2079                                 unsigned count = glsl_count_attribute_slots(
2080                                                 instr->variables[0]->var->type,
2081                                                 ctx->stage == MESA_SHADER_VERTEX);
2082                                 LLVMValueRef tmp_vec = build_gather_values_extended(
2083                                                 ctx, ctx->inputs + idx + chan, count,
2084                                                 4, false);
2085
2086                                 values[chan] = LLVMBuildExtractElement(ctx->builder,
2087                                                                        tmp_vec,
2088                                                                        indir_index, "");
2089                         } else
2090                                 values[chan] = ctx->inputs[idx + chan + const_index * 4];
2091                 }
2092                 return to_integer(ctx, build_gather_values(ctx, values, ve));
2093                 break;
2094         case nir_var_local:
2095                 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2096                                       &const_index, &indir_index);
2097                 for (unsigned chan = 0; chan < ve; chan++) {
2098                         if (indir_index) {
2099                                 unsigned count = glsl_count_attribute_slots(
2100                                         instr->variables[0]->var->type, false);
2101                                 LLVMValueRef tmp_vec = build_gather_values_extended(
2102                                                 ctx, ctx->locals + idx + chan, count,
2103                                                 4, true);
2104
2105                                 values[chan] = LLVMBuildExtractElement(ctx->builder,
2106                                                                        tmp_vec,
2107                                                                        indir_index, "");
2108                         } else {
2109                                 values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
2110                         }
2111                 }
2112                 return to_integer(ctx, build_gather_values(ctx, values, ve));
2113         case nir_var_shader_out:
2114                 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2115                                       &const_index, &indir_index);
2116                 for (unsigned chan = 0; chan < ve; chan++) {
2117                         if (indir_index) {
2118                                 unsigned count = glsl_count_attribute_slots(
2119                                                 instr->variables[0]->var->type, false);
2120                                 LLVMValueRef tmp_vec = build_gather_values_extended(
2121                                                 ctx, ctx->outputs + idx + chan, count,
2122                                                 4, true);
2123
2124                                 values[chan] = LLVMBuildExtractElement(ctx->builder,
2125                                                                        tmp_vec,
2126                                                                        indir_index, "");
2127                         } else {
2128                         values[chan] = LLVMBuildLoad(ctx->builder,
2129                                                      ctx->outputs[idx + chan + const_index * 4],
2130                                                      "");
2131                         }
2132                 }
2133                 return to_integer(ctx, build_gather_values(ctx, values, ve));
2134         case nir_var_shared: {
2135                 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2136                                       &const_index, &indir_index);
2137                 LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2138                 LLVMValueRef derived_ptr;
2139
2140                 for (unsigned chan = 0; chan < ve; chan++) {
2141                         LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
2142                         if (indir_index)
2143                                 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2144                         derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2145                         values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, "");
2146                 }
2147                 return to_integer(ctx, build_gather_values(ctx, values, ve));
2148         }
2149         default:
2150                 break;
2151         }
2152         return NULL;
2153 }
2154
2155 static void
2156 visit_store_var(struct nir_to_llvm_context *ctx,
2157                                    nir_intrinsic_instr *instr)
2158 {
2159         LLVMValueRef temp_ptr, value;
2160         int idx = instr->variables[0]->var->data.driver_location;
2161         LLVMValueRef src = to_float(ctx, get_src(ctx, instr->src[0]));
2162         int writemask = instr->const_index[0];
2163         LLVMValueRef indir_index;
2164         unsigned const_index;
2165         switch (instr->variables[0]->var->data.mode) {
2166         case nir_var_shader_out:
2167                 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2168                                       &const_index, &indir_index);
2169                 for (unsigned chan = 0; chan < 4; chan++) {
2170                         int stride = 4;
2171                         if (!(writemask & (1 << chan)))
2172                                 continue;
2173                         if (get_llvm_num_components(src) == 1)
2174                                 value = src;
2175                         else
2176                                 value = LLVMBuildExtractElement(ctx->builder, src,
2177                                                                 LLVMConstInt(ctx->i32,
2178                                                                              chan, false),
2179                                                                 "");
2180
2181                         if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 ||
2182                             instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0)
2183                                 stride = 1;
2184                         if (indir_index) {
2185                                 unsigned count = glsl_count_attribute_slots(
2186                                                 instr->variables[0]->var->type, false);
2187                                 LLVMValueRef tmp_vec = build_gather_values_extended(
2188                                                 ctx, ctx->outputs + idx + chan, count,
2189                                                 stride, true);
2190
2191                                 if (get_llvm_num_components(tmp_vec) > 1) {
2192                                         tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2193                                                                          value, indir_index, "");
2194                                 } else
2195                                         tmp_vec = value;
2196                                 build_store_values_extended(ctx, ctx->outputs + idx + chan,
2197                                                             count, stride, tmp_vec);
2198
2199                         } else {
2200                                 temp_ptr = ctx->outputs[idx + chan + const_index * stride];
2201
2202                                 LLVMBuildStore(ctx->builder, value, temp_ptr);
2203                         }
2204                 }
2205                 break;
2206         case nir_var_local:
2207                 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2208                                       &const_index, &indir_index);
2209                 for (unsigned chan = 0; chan < 4; chan++) {
2210                         if (!(writemask & (1 << chan)))
2211                                 continue;
2212
2213                         if (get_llvm_num_components(src) == 1)
2214                                 value = src;
2215                         else
2216                                 value = LLVMBuildExtractElement(ctx->builder, src,
2217                                                                 LLVMConstInt(ctx->i32, chan, false), "");
2218                         if (indir_index) {
2219                                 unsigned count = glsl_count_attribute_slots(
2220                                         instr->variables[0]->var->type, false);
2221                                 LLVMValueRef tmp_vec = build_gather_values_extended(
2222                                         ctx, ctx->locals + idx + chan, count,
2223                                         4, true);
2224
2225                                 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2226                                                                  value, indir_index, "");
2227                                 build_store_values_extended(ctx, ctx->locals + idx + chan,
2228                                                             count, 4, tmp_vec);
2229                         } else {
2230                                 temp_ptr = ctx->locals[idx + chan + const_index * 4];
2231
2232                                 LLVMBuildStore(ctx->builder, value, temp_ptr);
2233                         }
2234                 }
2235                 break;
2236         case nir_var_shared: {
2237                 LLVMValueRef ptr;
2238                 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2239                                       &const_index, &indir_index);
2240
2241                 ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2242                 LLVMValueRef derived_ptr;
2243
2244                 for (unsigned chan = 0; chan < 4; chan++) {
2245                         if (!(writemask & (1 << chan)))
2246                                 continue;
2247
2248                         LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
2249
2250                         if (get_llvm_num_components(src) == 1)
2251                                 value = src;
2252                         else
2253                                 value = LLVMBuildExtractElement(ctx->builder, src,
2254                                                                 LLVMConstInt(ctx->i32,
2255                                                                              chan, false),
2256                                                                 "");
2257
2258                         if (indir_index)
2259                                 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2260
2261                         derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2262                         LLVMBuildStore(ctx->builder,
2263                                        to_integer(ctx, value), derived_ptr);
2264                 }
2265                 break;
2266         }
2267         default:
2268                 break;
2269         }
2270 }
2271
2272 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
2273 {
2274         switch (dim) {
2275         case GLSL_SAMPLER_DIM_BUF:
2276                 return 1;
2277         case GLSL_SAMPLER_DIM_1D:
2278                 return array ? 2 : 1;
2279         case GLSL_SAMPLER_DIM_2D:
2280                 return array ? 3 : 2;
2281         case GLSL_SAMPLER_DIM_MS:
2282                 return array ? 4 : 3;
2283         case GLSL_SAMPLER_DIM_3D:
2284         case GLSL_SAMPLER_DIM_CUBE:
2285                 return 3;
2286         case GLSL_SAMPLER_DIM_RECT:
2287         case GLSL_SAMPLER_DIM_SUBPASS:
2288                 return 2;
2289         case GLSL_SAMPLER_DIM_SUBPASS_MS:
2290                 return 3;
2291         default:
2292                 break;
2293         }
2294         return 0;
2295 }
2296
2297 static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
2298                                      nir_intrinsic_instr *instr, bool add_frag_pos)
2299 {
2300         const struct glsl_type *type = instr->variables[0]->var->type;
2301         if(instr->variables[0]->deref.child)
2302                 type = instr->variables[0]->deref.child->type;
2303
2304         LLVMValueRef src0 = get_src(ctx, instr->src[0]);
2305         LLVMValueRef coords[4];
2306         LLVMValueRef masks[] = {
2307                 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
2308                 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
2309         };
2310         LLVMValueRef res;
2311         int count;
2312         enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
2313         bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
2314                       dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2315
2316         count = image_type_to_components_count(dim,
2317                                                glsl_sampler_type_is_array(type));
2318
2319         if (count == 1) {
2320                 if (instr->src[0].ssa->num_components)
2321                         res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
2322                 else
2323                         res = src0;
2324         } else {
2325                 int chan;
2326                 if (is_ms)
2327                         count--;
2328                 for (chan = 0; chan < count; ++chan) {
2329                         coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
2330                 }
2331
2332                 if (add_frag_pos) {
2333                         for (chan = 0; chan < count; ++chan)
2334                                 coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
2335                 }
2336                 if (is_ms) {
2337                         coords[count] = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0);
2338                         count++;
2339                 }
2340
2341                 if (count == 3) {
2342                         coords[3] = LLVMGetUndef(ctx->i32);
2343                         count = 4;
2344                 }
2345                 res = build_gather_values(ctx, coords, count);
2346         }
2347         return res;
2348 }
2349
2350 static void build_type_name_for_intr(
2351         LLVMTypeRef type,
2352         char *buf, unsigned bufsize)
2353 {
2354         LLVMTypeRef elem_type = type;
2355
2356         assert(bufsize >= 8);
2357
2358         if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
2359                 int ret = snprintf(buf, bufsize, "v%u",
2360                                         LLVMGetVectorSize(type));
2361                 if (ret < 0) {
2362                         char *type_name = LLVMPrintTypeToString(type);
2363                         fprintf(stderr, "Error building type name for: %s\n",
2364                                 type_name);
2365                         return;
2366                 }
2367                 elem_type = LLVMGetElementType(type);
2368                 buf += ret;
2369                 bufsize -= ret;
2370         }
2371         switch (LLVMGetTypeKind(elem_type)) {
2372         default: break;
2373         case LLVMIntegerTypeKind:
2374                 snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
2375                 break;
2376         case LLVMFloatTypeKind:
2377                 snprintf(buf, bufsize, "f32");
2378                 break;
2379         case LLVMDoubleTypeKind:
2380                 snprintf(buf, bufsize, "f64");
2381                 break;
2382         }
2383 }
2384
2385 static void get_image_intr_name(const char *base_name,
2386                                 LLVMTypeRef data_type,
2387                                 LLVMTypeRef coords_type,
2388                                 LLVMTypeRef rsrc_type,
2389                                 char *out_name, unsigned out_len)
2390 {
2391         char coords_type_name[8];
2392
2393         build_type_name_for_intr(coords_type, coords_type_name,
2394                             sizeof(coords_type_name));
2395
2396         if (HAVE_LLVM <= 0x0309) {
2397                 snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name);
2398         } else {
2399                 char data_type_name[8];
2400                 char rsrc_type_name[8];
2401
2402                 build_type_name_for_intr(data_type, data_type_name,
2403                                         sizeof(data_type_name));
2404                 build_type_name_for_intr(rsrc_type, rsrc_type_name,
2405                                         sizeof(rsrc_type_name));
2406                 snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
2407                          data_type_name, coords_type_name, rsrc_type_name);
2408         }
2409 }
2410
2411 static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
2412                                      nir_intrinsic_instr *instr)
2413 {
2414         LLVMValueRef params[7];
2415         LLVMValueRef res;
2416         char intrinsic_name[64];
2417         const nir_variable *var = instr->variables[0]->var;
2418         const struct glsl_type *type = var->type;
2419         if(instr->variables[0]->deref.child)
2420                 type = instr->variables[0]->deref.child->type;
2421
2422         type = glsl_without_array(type);
2423         if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2424                 params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2425                 params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2426                                                     LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2427                 params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2428                 params[3] = LLVMConstInt(ctx->i1, 0, false);  /* glc */
2429                 params[4] = LLVMConstInt(ctx->i1, 0, false);  /* slc */
2430                 res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32,
2431                                           params, 5, 0);
2432
2433                 res = trim_vector(ctx, res, instr->dest.ssa.num_components);
2434                 res = to_integer(ctx, res);
2435         } else {
2436                 bool is_da = glsl_sampler_type_is_array(type) ||
2437                              glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2438                 bool add_frag_pos = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS;
2439                 LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero;
2440                 LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false);
2441                 LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false);
2442
2443                 params[0] = get_image_coords(ctx, instr, add_frag_pos);
2444                 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2445                 params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2446                 if (HAVE_LLVM <= 0x0309) {
2447                         params[3] = LLVMConstInt(ctx->i1, 0, false);  /* r128 */
2448                         params[4] = da;
2449                         params[5] = glc;
2450                         params[6] = slc;
2451                 } else {
2452                         LLVMValueRef lwe = LLVMConstInt(ctx->i1, 0, false);
2453                         params[3] = glc;
2454                         params[4] = slc;
2455                         params[5] = lwe;
2456                         params[6] = da;
2457                 }
2458
2459                 get_image_intr_name("llvm.amdgcn.image.load",
2460                                     ctx->v4f32, /* vdata */
2461                                     LLVMTypeOf(params[0]), /* coords */
2462                                     LLVMTypeOf(params[1]), /* rsrc */
2463                                     intrinsic_name, sizeof(intrinsic_name));
2464
2465                 res = ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32,
2466                                           params, 7, AC_FUNC_ATTR_READONLY);
2467         }
2468         return to_integer(ctx, res);
2469 }
2470
2471 static void visit_image_store(struct nir_to_llvm_context *ctx,
2472                               nir_intrinsic_instr *instr)
2473 {
2474         LLVMValueRef params[8];
2475         char intrinsic_name[64];
2476         const nir_variable *var = instr->variables[0]->var;
2477         LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2478         LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2479         const struct glsl_type *type = glsl_without_array(var->type);
2480
2481         if (ctx->stage == MESA_SHADER_FRAGMENT)
2482                 ctx->shader_info->fs.writes_memory = true;
2483
2484         if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2485                 params[0] = to_float(ctx, get_src(ctx, instr->src[2])); /* data */
2486                 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2487                 params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2488                                                     LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2489                 params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2490                 params[4] = i1false;  /* glc */
2491                 params[5] = i1false;  /* slc */
2492                 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
2493                                     params, 6, 0);
2494         } else {
2495                 bool is_da = glsl_sampler_type_is_array(type) ||
2496                              glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2497                 LLVMValueRef da = is_da ? i1true : i1false;
2498                 LLVMValueRef glc = i1false;
2499                 LLVMValueRef slc = i1false;
2500
2501                 params[0] = to_float(ctx, get_src(ctx, instr->src[2]));
2502                 params[1] = get_image_coords(ctx, instr, false); /* coords */
2503                 params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2504                 params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2505                 if (HAVE_LLVM <= 0x0309) {
2506                         params[4] = i1false;  /* r128 */
2507                         params[5] = da;
2508                         params[6] = glc;
2509                         params[7] = slc;
2510                 } else {
2511                         LLVMValueRef lwe = i1false;
2512                         params[4] = glc;
2513                         params[5] = slc;
2514                         params[6] = lwe;
2515                         params[7] = da;
2516                 }
2517
2518                 get_image_intr_name("llvm.amdgcn.image.store",
2519                                     LLVMTypeOf(params[0]), /* vdata */
2520                                     LLVMTypeOf(params[1]), /* coords */
2521                                     LLVMTypeOf(params[2]), /* rsrc */
2522                                     intrinsic_name, sizeof(intrinsic_name));
2523
2524                 ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt,
2525                                     params, 8, 0);
2526         }
2527
2528 }
2529
2530 static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
2531                                        nir_intrinsic_instr *instr)
2532 {
2533         LLVMValueRef params[6];
2534         int param_count = 0;
2535         const nir_variable *var = instr->variables[0]->var;
2536         LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2537         LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2538         const char *base_name = "llvm.amdgcn.image.atomic";
2539         const char *atomic_name;
2540         LLVMValueRef coords;
2541         char intrinsic_name[32], coords_type[8];
2542         const struct glsl_type *type = glsl_without_array(var->type);
2543
2544         if (ctx->stage == MESA_SHADER_FRAGMENT)
2545                 ctx->shader_info->fs.writes_memory = true;
2546
2547         params[param_count++] = get_src(ctx, instr->src[2]);
2548         if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
2549                 params[param_count++] = get_src(ctx, instr->src[3]);
2550
2551         if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2552                 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2553                 coords = params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2554                                                                         LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2555                 params[param_count++] = ctx->i32zero; /* voffset */
2556                 params[param_count++] = i1false;  /* glc */
2557                 params[param_count++] = i1false;  /* slc */
2558         } else {
2559                 bool da = glsl_sampler_type_is_array(type) ||
2560                           glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2561
2562                 coords = params[param_count++] = get_image_coords(ctx, instr, false);
2563                 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2564                 params[param_count++] = i1false; /* r128 */
2565                 params[param_count++] = da ? i1true : i1false;      /* da */
2566                 params[param_count++] = i1false;  /* slc */
2567         }
2568
2569         switch (instr->intrinsic) {
2570         case nir_intrinsic_image_atomic_add:
2571                 atomic_name = "add";
2572                 break;
2573         case nir_intrinsic_image_atomic_min:
2574                 atomic_name = "smin";
2575                 break;
2576         case nir_intrinsic_image_atomic_max:
2577                 atomic_name = "smax";
2578                 break;
2579         case nir_intrinsic_image_atomic_and:
2580                 atomic_name = "and";
2581                 break;
2582         case nir_intrinsic_image_atomic_or:
2583                 atomic_name = "or";
2584                 break;
2585         case nir_intrinsic_image_atomic_xor:
2586                 atomic_name = "xor";
2587                 break;
2588         case nir_intrinsic_image_atomic_exchange:
2589                 atomic_name = "swap";
2590                 break;
2591         case nir_intrinsic_image_atomic_comp_swap:
2592                 atomic_name = "cmpswap";
2593                 break;
2594         default:
2595                 abort();
2596         }
2597         build_int_type_name(LLVMTypeOf(coords),
2598                             coords_type, sizeof(coords_type));
2599
2600         snprintf(intrinsic_name, sizeof(intrinsic_name),
2601                          "%s.%s.%s", base_name, atomic_name, coords_type);
2602         return ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0);
2603 }
2604
2605 static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
2606                                      nir_intrinsic_instr *instr)
2607 {
2608         LLVMValueRef res;
2609         LLVMValueRef params[10];
2610         const nir_variable *var = instr->variables[0]->var;
2611         const struct glsl_type *type = instr->variables[0]->var->type;
2612         bool da = glsl_sampler_type_is_array(var->type) ||
2613                   glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE;
2614         if(instr->variables[0]->deref.child)
2615                 type = instr->variables[0]->deref.child->type;
2616
2617         if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
2618                 return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true);
2619         params[0] = ctx->i32zero;
2620         params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2621         params[2] = LLVMConstInt(ctx->i32, 15, false);
2622         params[3] = ctx->i32zero;
2623         params[4] = ctx->i32zero;
2624         params[5] = da ? ctx->i32one : ctx->i32zero;
2625         params[6] = ctx->i32zero;
2626         params[7] = ctx->i32zero;
2627         params[8] = ctx->i32zero;
2628         params[9] = ctx->i32zero;
2629
2630         res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
2631                                   params, 10, AC_FUNC_ATTR_READNONE);
2632
2633         if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
2634             glsl_sampler_type_is_array(type)) {
2635                 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
2636                 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
2637                 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
2638                 z = LLVMBuildSDiv(ctx->builder, z, six, "");
2639                 res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
2640         }
2641         return res;
2642 }
2643
2644 static void emit_waitcnt(struct nir_to_llvm_context *ctx)
2645 {
2646         LLVMValueRef args[1] = {
2647                 LLVMConstInt(ctx->i32, 0xf70, false),
2648         };
2649         ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt",
2650                             ctx->voidt, args, 1, 0);
2651 }
2652
2653 static void emit_barrier(struct nir_to_llvm_context *ctx)
2654 {
2655         // TODO tess
2656         ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
2657                             ctx->voidt, NULL, 0, 0);
2658 }
2659
2660 static void emit_discard_if(struct nir_to_llvm_context *ctx,
2661                             nir_intrinsic_instr *instr)
2662 {
2663         LLVMValueRef cond;
2664         ctx->shader_info->fs.can_discard = true;
2665
2666         cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
2667                              get_src(ctx, instr->src[0]),
2668                              ctx->i32zero, "");
2669
2670         cond = LLVMBuildSelect(ctx->builder, cond,
2671                                LLVMConstReal(ctx->f32, -1.0f),
2672                                ctx->f32zero, "");
2673         ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
2674                             LLVMVoidTypeInContext(ctx->context),
2675                             &cond, 1, 0);
2676 }
2677
2678 static LLVMValueRef
2679 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
2680 {
2681         LLVMValueRef result;
2682         LLVMValueRef thread_id = get_thread_id(ctx);
2683         result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
2684                               LLVMConstInt(ctx->i32, 0xfc0, false), "");
2685
2686         return LLVMBuildAdd(ctx->builder, result, thread_id, "");
2687 }
2688
2689 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
2690                                      nir_intrinsic_instr *instr)
2691 {
2692         LLVMValueRef ptr, result;
2693         int idx = instr->variables[0]->var->data.driver_location;
2694         LLVMValueRef src = get_src(ctx, instr->src[0]);
2695         ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2696
2697         if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
2698                 LLVMValueRef src1 = get_src(ctx, instr->src[1]);
2699                 result = LLVMBuildAtomicCmpXchg(ctx->builder,
2700                                                 ptr, src, src1,
2701                                                 LLVMAtomicOrderingSequentiallyConsistent,
2702                                                 LLVMAtomicOrderingSequentiallyConsistent,
2703                                                 false);
2704         } else {
2705                 LLVMAtomicRMWBinOp op;
2706                 switch (instr->intrinsic) {
2707                 case nir_intrinsic_var_atomic_add:
2708                         op = LLVMAtomicRMWBinOpAdd;
2709                         break;
2710                 case nir_intrinsic_var_atomic_umin:
2711                         op = LLVMAtomicRMWBinOpUMin;
2712                         break;
2713                 case nir_intrinsic_var_atomic_umax:
2714                         op = LLVMAtomicRMWBinOpUMax;
2715                         break;
2716                 case nir_intrinsic_var_atomic_imin:
2717                         op = LLVMAtomicRMWBinOpMin;
2718                         break;
2719                 case nir_intrinsic_var_atomic_imax:
2720                         op = LLVMAtomicRMWBinOpMax;
2721                         break;
2722                 case nir_intrinsic_var_atomic_and:
2723                         op = LLVMAtomicRMWBinOpAnd;
2724                         break;
2725                 case nir_intrinsic_var_atomic_or:
2726                         op = LLVMAtomicRMWBinOpOr;
2727                         break;
2728                 case nir_intrinsic_var_atomic_xor:
2729                         op = LLVMAtomicRMWBinOpXor;
2730                         break;
2731                 case nir_intrinsic_var_atomic_exchange:
2732                         op = LLVMAtomicRMWBinOpXchg;
2733                         break;
2734                 default:
2735                         return NULL;
2736                 }
2737
2738                 result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(ctx, src),
2739                                             LLVMAtomicOrderingSequentiallyConsistent,
2740                                             false);
2741         }
2742         return result;
2743 }
2744
2745 #define INTERP_CENTER 0
2746 #define INTERP_CENTROID 1
2747 #define INTERP_SAMPLE 2
2748
2749 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
2750                                         enum glsl_interp_mode interp, unsigned location)
2751 {
2752         switch (interp) {
2753         case INTERP_MODE_FLAT:
2754         default:
2755                 return NULL;
2756         case INTERP_MODE_SMOOTH:
2757         case INTERP_MODE_NONE:
2758                 if (location == INTERP_CENTER)
2759                         return ctx->persp_center;
2760                 else if (location == INTERP_CENTROID)
2761                         return ctx->persp_centroid;
2762                 else if (location == INTERP_SAMPLE)
2763                         return ctx->persp_sample;
2764                 break;
2765         case INTERP_MODE_NOPERSPECTIVE:
2766                 if (location == INTERP_CENTER)
2767                         return ctx->linear_center;
2768                 else if (location == INTERP_CENTROID)
2769                         return ctx->linear_centroid;
2770                 else if (location == INTERP_SAMPLE)
2771                         return ctx->linear_sample;
2772                 break;
2773         }
2774         return NULL;
2775 }
2776
2777 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
2778                                          LLVMValueRef sample_id)
2779 {
2780         /* offset = sample_id * 8  (8 = 2 floats containing samplepos.xy) */
2781         LLVMValueRef offset0 = LLVMBuildMul(ctx->builder, sample_id, LLVMConstInt(ctx->i32, 8, false), "");
2782         LLVMValueRef offset1 = LLVMBuildAdd(ctx->builder, offset0, LLVMConstInt(ctx->i32, 4, false), "");
2783         LLVMValueRef result[2];
2784
2785         result[0] = build_indexed_load_const(ctx, ctx->sample_positions, offset0);
2786         result[1] = build_indexed_load_const(ctx, ctx->sample_positions, offset1);
2787
2788         return build_gather_values(ctx, result, 2);
2789 }
2790
2791 static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx)
2792 {
2793         LLVMValueRef values[2];
2794
2795         values[0] = emit_ffract(ctx, ctx->frag_pos[0]);
2796         values[1] = emit_ffract(ctx, ctx->frag_pos[1]);
2797         return build_gather_values(ctx, values, 2);
2798 }
2799
2800 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
2801                                  nir_intrinsic_instr *instr)
2802 {
2803         LLVMValueRef result[2];
2804         LLVMValueRef interp_param, attr_number;
2805         unsigned location;
2806         unsigned chan;
2807         LLVMValueRef src_c0, src_c1;
2808         const char *intr_name;
2809         LLVMValueRef src0;
2810         int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
2811         switch (instr->intrinsic) {
2812         case nir_intrinsic_interp_var_at_centroid:
2813                 location = INTERP_CENTROID;
2814                 break;
2815         case nir_intrinsic_interp_var_at_sample:
2816         case nir_intrinsic_interp_var_at_offset:
2817                 location = INTERP_SAMPLE;
2818                 src0 = get_src(ctx, instr->src[0]);
2819                 break;
2820         default:
2821                 break;
2822         }
2823
2824         if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
2825                 src_c0 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""));
2826                 src_c1 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""));
2827         } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
2828                 LLVMValueRef sample_position;
2829                 LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
2830
2831                 /* fetch sample ID */
2832                 sample_position = load_sample_position(ctx, src0);
2833
2834                 src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, "");
2835                 src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
2836                 src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, "");
2837                 src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
2838         }
2839         interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
2840         attr_number = LLVMConstInt(ctx->i32, input_index, false);
2841
2842         if (location == INTERP_SAMPLE) {
2843                 LLVMValueRef ij_out[2];
2844                 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
2845
2846                 /*
2847                  * take the I then J parameters, and the DDX/Y for it, and
2848                  * calculate the IJ inputs for the interpolator.
2849                  * temp1 = ddx * offset/sample.x + I;
2850                  * interp_param.I = ddy * offset/sample.y + temp1;
2851                  * temp1 = ddx * offset/sample.x + J;
2852                  * interp_param.J = ddy * offset/sample.y + temp1;
2853                  */
2854                 for (unsigned i = 0; i < 2; i++) {
2855                         LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, false);
2856                         LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, false);
2857                         LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
2858                                                                       ddxy_out, ix_ll, "");
2859                         LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
2860                                                                       ddxy_out, iy_ll, "");
2861                         LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
2862                                                                          interp_param, ix_ll, "");
2863                         LLVMValueRef temp1, temp2;
2864
2865                         interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
2866                                                      ctx->f32, "");
2867
2868                         temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
2869                         temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
2870
2871                         temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
2872                         temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
2873
2874                         ij_out[i] = LLVMBuildBitCast(ctx->builder,
2875                                                      temp2, ctx->i32, "");
2876                 }
2877                 interp_param = build_gather_values(ctx, ij_out, 2);
2878
2879         }
2880         intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
2881         for (chan = 0; chan < 2; chan++) {
2882                 LLVMValueRef args[4];
2883                 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
2884
2885                 args[0] = llvm_chan;
2886                 args[1] = attr_number;
2887                 args[2] = ctx->prim_mask;
2888                 args[3] = interp_param;
2889                 result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
2890                                                    ctx->f32, args, args[3] ? 4 : 3,
2891                                                    AC_FUNC_ATTR_READNONE);
2892         }
2893         return build_gather_values(ctx, result, 2);
2894 }
2895
2896 static void visit_intrinsic(struct nir_to_llvm_context *ctx,
2897                             nir_intrinsic_instr *instr)
2898 {
2899         LLVMValueRef result = NULL;
2900
2901         switch (instr->intrinsic) {
2902         case nir_intrinsic_load_work_group_id: {
2903                 result = ctx->workgroup_ids;
2904                 break;
2905         }
2906         case nir_intrinsic_load_base_vertex: {
2907                 result = ctx->base_vertex;
2908                 break;
2909         }
2910         case nir_intrinsic_load_vertex_id_zero_base: {
2911                 result = ctx->vertex_id;
2912                 break;
2913         }
2914         case nir_intrinsic_load_local_invocation_id: {
2915                 result = ctx->local_invocation_ids;
2916                 break;
2917         }
2918         case nir_intrinsic_load_base_instance:
2919                 result = ctx->start_instance;
2920                 break;
2921         case nir_intrinsic_load_sample_id:
2922                 ctx->shader_info->fs.force_persample = true;
2923                 result = unpack_param(ctx, ctx->ancillary, 8, 4);
2924                 break;
2925         case nir_intrinsic_load_sample_pos:
2926                 ctx->shader_info->fs.force_persample = true;
2927                 result = load_sample_pos(ctx);
2928                 break;
2929         case nir_intrinsic_load_front_face:
2930                 result = ctx->front_face;
2931                 break;
2932         case nir_intrinsic_load_instance_id:
2933                 result = ctx->instance_id;
2934                 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
2935                                             ctx->shader_info->vs.vgpr_comp_cnt);
2936                 break;
2937         case nir_intrinsic_load_num_work_groups:
2938                 result = ctx->num_work_groups;
2939                 break;
2940         case nir_intrinsic_load_local_invocation_index:
2941                 result = visit_load_local_invocation_index(ctx);
2942                 break;
2943         case nir_intrinsic_load_push_constant:
2944                 result = visit_load_push_constant(ctx, instr);
2945                 break;
2946         case nir_intrinsic_vulkan_resource_index:
2947                 result = visit_vulkan_resource_index(ctx, instr);
2948                 break;
2949         case nir_intrinsic_store_ssbo:
2950                 visit_store_ssbo(ctx, instr);
2951                 break;
2952         case nir_intrinsic_load_ssbo:
2953                 result = visit_load_buffer(ctx, instr);
2954                 break;
2955         case nir_intrinsic_ssbo_atomic_add:
2956         case nir_intrinsic_ssbo_atomic_imin:
2957         case nir_intrinsic_ssbo_atomic_umin:
2958         case nir_intrinsic_ssbo_atomic_imax:
2959         case nir_intrinsic_ssbo_atomic_umax:
2960         case nir_intrinsic_ssbo_atomic_and:
2961         case nir_intrinsic_ssbo_atomic_or:
2962         case nir_intrinsic_ssbo_atomic_xor:
2963         case nir_intrinsic_ssbo_atomic_exchange:
2964         case nir_intrinsic_ssbo_atomic_comp_swap:
2965                 result = visit_atomic_ssbo(ctx, instr);
2966                 break;
2967         case nir_intrinsic_load_ubo:
2968                 result = visit_load_ubo_buffer(ctx, instr);
2969                 break;
2970         case nir_intrinsic_get_buffer_size:
2971                 result = visit_get_buffer_size(ctx, instr);
2972                 break;
2973         case nir_intrinsic_load_var:
2974                 result = visit_load_var(ctx, instr);
2975                 break;
2976         case nir_intrinsic_store_var:
2977                 visit_store_var(ctx, instr);
2978                 break;
2979         case nir_intrinsic_image_load:
2980                 result = visit_image_load(ctx, instr);
2981                 break;
2982         case nir_intrinsic_image_store:
2983                 visit_image_store(ctx, instr);
2984                 break;
2985         case nir_intrinsic_image_atomic_add:
2986         case nir_intrinsic_image_atomic_min:
2987         case nir_intrinsic_image_atomic_max:
2988         case nir_intrinsic_image_atomic_and:
2989         case nir_intrinsic_image_atomic_or:
2990         case nir_intrinsic_image_atomic_xor:
2991         case nir_intrinsic_image_atomic_exchange:
2992         case nir_intrinsic_image_atomic_comp_swap:
2993                 result = visit_image_atomic(ctx, instr);
2994                 break;
2995         case nir_intrinsic_image_size:
2996                 result = visit_image_size(ctx, instr);
2997                 break;
2998         case nir_intrinsic_discard:
2999                 ctx->shader_info->fs.can_discard = true;
3000                 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
3001                                     LLVMVoidTypeInContext(ctx->context),
3002                                     NULL, 0, 0);
3003                 break;
3004         case nir_intrinsic_discard_if:
3005                 emit_discard_if(ctx, instr);
3006                 break;
3007         case nir_intrinsic_memory_barrier:
3008                 emit_waitcnt(ctx);
3009                 break;
3010         case nir_intrinsic_barrier:
3011                 emit_barrier(ctx);
3012                 break;
3013         case nir_intrinsic_var_atomic_add:
3014         case nir_intrinsic_var_atomic_imin:
3015         case nir_intrinsic_var_atomic_umin:
3016         case nir_intrinsic_var_atomic_imax:
3017         case nir_intrinsic_var_atomic_umax:
3018         case nir_intrinsic_var_atomic_and:
3019         case nir_intrinsic_var_atomic_or:
3020         case nir_intrinsic_var_atomic_xor:
3021         case nir_intrinsic_var_atomic_exchange:
3022         case nir_intrinsic_var_atomic_comp_swap:
3023                 result = visit_var_atomic(ctx, instr);
3024                 break;
3025         case nir_intrinsic_interp_var_at_centroid:
3026         case nir_intrinsic_interp_var_at_sample:
3027         case nir_intrinsic_interp_var_at_offset:
3028                 result = visit_interp(ctx, instr);
3029                 break;
3030         default:
3031                 fprintf(stderr, "Unknown intrinsic: ");
3032                 nir_print_instr(&instr->instr, stderr);
3033                 fprintf(stderr, "\n");
3034                 break;
3035         }
3036         if (result) {
3037                 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3038         }
3039 }
3040
3041 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
3042                                           nir_deref_var *deref,
3043                                           enum desc_type desc_type)
3044 {
3045         unsigned desc_set = deref->var->data.descriptor_set;
3046         LLVMValueRef list = ctx->descriptor_sets[desc_set];
3047         struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
3048         struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding;
3049         unsigned offset = binding->offset;
3050         unsigned stride = binding->size;
3051         unsigned type_size;
3052         LLVMBuilderRef builder = ctx->builder;
3053         LLVMTypeRef type;
3054         LLVMValueRef index = NULL;
3055
3056         assert(deref->var->data.binding < layout->binding_count);
3057
3058         switch (desc_type) {
3059         case DESC_IMAGE:
3060                 type = ctx->v8i32;
3061                 type_size = 32;
3062                 break;
3063         case DESC_FMASK:
3064                 type = ctx->v8i32;
3065                 offset += 32;
3066                 type_size = 32;
3067                 break;
3068         case DESC_SAMPLER:
3069                 type = ctx->v4i32;
3070                 if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
3071                         offset += 64;
3072
3073                 type_size = 16;
3074                 break;
3075         case DESC_BUFFER:
3076                 type = ctx->v4i32;
3077                 type_size = 16;
3078                 break;
3079         default:
3080                 unreachable("invalid desc_type\n");
3081         }
3082
3083         if (deref->deref.child) {
3084                 nir_deref_array *child = (nir_deref_array*)deref->deref.child;
3085
3086                 assert(child->deref_array_type != nir_deref_array_type_wildcard);
3087                 offset += child->base_offset * stride;
3088                 if (child->deref_array_type == nir_deref_array_type_indirect) {
3089                         index = get_src(ctx, child->indirect);
3090                 }
3091         }
3092
3093         assert(stride % type_size == 0);
3094
3095         if (!index)
3096                 index = ctx->i32zero;
3097
3098         index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), "");
3099
3100         list = build_gep0(ctx, list, LLVMConstInt(ctx->i32, offset, 0));
3101         list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
3102
3103         return build_indexed_load_const(ctx, list, index);
3104 }
3105
3106 static void set_tex_fetch_args(struct nir_to_llvm_context *ctx,
3107                                struct ac_tex_info *tinfo,
3108                                nir_tex_instr *instr,
3109                                nir_texop op,
3110                                LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
3111                                LLVMValueRef *param, unsigned count,
3112                                unsigned dmask)
3113 {
3114         int num_args;
3115         unsigned is_rect = 0;
3116         bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
3117
3118         if (op == nir_texop_lod)
3119                 da = false;
3120         /* Pad to power of two vector */
3121         while (count < util_next_power_of_two(count))
3122                 param[count++] = LLVMGetUndef(ctx->i32);
3123
3124         if (count > 1)
3125                 tinfo->args[0] = build_gather_values(ctx, param, count);
3126         else
3127                 tinfo->args[0] = param[0];
3128
3129         tinfo->args[1] = res_ptr;
3130         num_args = 2;
3131
3132         if (op == nir_texop_txf ||
3133             op == nir_texop_txf_ms ||
3134             op == nir_texop_query_levels ||
3135             op == nir_texop_texture_samples ||
3136             op == nir_texop_txs)
3137                 tinfo->dst_type = ctx->v4i32;
3138         else {
3139                 tinfo->dst_type = ctx->v4f32;
3140                 tinfo->args[num_args++] = samp_ptr;
3141         }
3142
3143         if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
3144                 tinfo->args[0] = res_ptr;
3145                 tinfo->args[1] = LLVMConstInt(ctx->i32, 0, false);
3146                 tinfo->args[2] = param[0];
3147                 tinfo->arg_count = 3;
3148                 return;
3149         }
3150
3151         tinfo->args[num_args++] = LLVMConstInt(ctx->i32, dmask, 0);
3152         tinfo->args[num_args++] = LLVMConstInt(ctx->i32, is_rect, 0); /* unorm */
3153         tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
3154         tinfo->args[num_args++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
3155         tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
3156         tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
3157         tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
3158         tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
3159
3160         tinfo->arg_count = num_args;
3161 }
3162
3163 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
3164  *
3165  * SI-CI:
3166  *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
3167  *   filtering manually. The driver sets img7 to a mask clearing
3168  *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
3169  *     s_and_b32 samp0, samp0, img7
3170  *
3171  * VI:
3172  *   The ANISO_OVERRIDE sampler field enables this fix in TA.
3173  */
3174 static LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx,
3175                                            LLVMValueRef res, LLVMValueRef samp)
3176 {
3177         LLVMBuilderRef builder = ctx->builder;
3178         LLVMValueRef img7, samp0;
3179
3180         if (ctx->options->chip_class >= VI)
3181                 return samp;
3182
3183         img7 = LLVMBuildExtractElement(builder, res,
3184                                        LLVMConstInt(ctx->i32, 7, 0), "");
3185         samp0 = LLVMBuildExtractElement(builder, samp,
3186                                         LLVMConstInt(ctx->i32, 0, 0), "");
3187         samp0 = LLVMBuildAnd(builder, samp0, img7, "");
3188         return LLVMBuildInsertElement(builder, samp, samp0,
3189                                       LLVMConstInt(ctx->i32, 0, 0), "");
3190 }
3191
3192 static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
3193                            nir_tex_instr *instr,
3194                            LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
3195                            LLVMValueRef *fmask_ptr)
3196 {
3197         if (instr->sampler_dim  == GLSL_SAMPLER_DIM_BUF)
3198                 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER);
3199         else
3200                 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE);
3201         if (samp_ptr) {
3202                 if (instr->sampler)
3203                         *samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER);
3204                 else
3205                         *samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER);
3206                 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
3207                         *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
3208         }
3209         if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
3210                                              instr->op == nir_texop_samples_identical))
3211                 *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
3212 }
3213
3214 static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
3215 {
3216         LLVMValueRef result = NULL;
3217         struct ac_tex_info tinfo = { 0 };
3218         unsigned dmask = 0xf;
3219         LLVMValueRef address[16];
3220         LLVMValueRef coords[5];
3221         LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
3222         LLVMValueRef bias = NULL, offsets = NULL;
3223         LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
3224         LLVMValueRef ddx = NULL, ddy = NULL;
3225         LLVMValueRef derivs[6];
3226         unsigned chan, count = 0;
3227         unsigned const_src = 0, num_deriv_comp = 0;
3228
3229         tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
3230
3231         for (unsigned i = 0; i < instr->num_srcs; i++) {
3232                 switch (instr->src[i].src_type) {
3233                 case nir_tex_src_coord:
3234                         coord = get_src(ctx, instr->src[i].src);
3235                         break;
3236                 case nir_tex_src_projector:
3237                         break;
3238                 case nir_tex_src_comparator:
3239                         comparator = get_src(ctx, instr->src[i].src);
3240                         break;
3241                 case nir_tex_src_offset:
3242                         offsets = get_src(ctx, instr->src[i].src);
3243                         const_src = i;
3244                         break;
3245                 case nir_tex_src_bias:
3246                         bias = get_src(ctx, instr->src[i].src);
3247                         break;
3248                 case nir_tex_src_lod:
3249                         lod = get_src(ctx, instr->src[i].src);
3250                         break;
3251                 case nir_tex_src_ms_index:
3252                         sample_index = get_src(ctx, instr->src[i].src);
3253                         break;
3254                 case nir_tex_src_ms_mcs:
3255                         break;
3256                 case nir_tex_src_ddx:
3257                         ddx = get_src(ctx, instr->src[i].src);
3258                         num_deriv_comp = instr->src[i].src.ssa->num_components;
3259                         break;
3260                 case nir_tex_src_ddy:
3261                         ddy = get_src(ctx, instr->src[i].src);
3262                         break;
3263                 case nir_tex_src_texture_offset:
3264                 case nir_tex_src_sampler_offset:
3265                 case nir_tex_src_plane:
3266                 default:
3267                         break;
3268                 }
3269         }
3270
3271         if (instr->op == nir_texop_texture_samples) {
3272                 LLVMValueRef res, samples, is_msaa;
3273                 res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
3274                 samples = LLVMBuildExtractElement(ctx->builder, res,
3275                                                   LLVMConstInt(ctx->i32, 3, false), "");
3276                 is_msaa = LLVMBuildLShr(ctx->builder, samples,
3277                                         LLVMConstInt(ctx->i32, 28, false), "");
3278                 is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
3279                                        LLVMConstInt(ctx->i32, 0xe, false), "");
3280                 is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
3281                                         LLVMConstInt(ctx->i32, 0xe, false), "");
3282
3283                 samples = LLVMBuildLShr(ctx->builder, samples,
3284                                         LLVMConstInt(ctx->i32, 16, false), "");
3285                 samples = LLVMBuildAnd(ctx->builder, samples,
3286                                        LLVMConstInt(ctx->i32, 0xf, false), "");
3287                 samples = LLVMBuildShl(ctx->builder, ctx->i32one,
3288                                        samples, "");
3289                 samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
3290                                           ctx->i32one, "");
3291                 result = samples;
3292                 goto write_result;
3293         }
3294
3295         if (coord)
3296                 for (chan = 0; chan < instr->coord_components; chan++)
3297                         coords[chan] = llvm_extract_elem(ctx, coord, chan);
3298
3299         if (offsets && instr->op != nir_texop_txf) {
3300                 LLVMValueRef offset[3], pack;
3301                 for (chan = 0; chan < 3; ++chan)
3302                         offset[chan] = ctx->i32zero;
3303
3304                 tinfo.has_offset = true;
3305                 for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
3306                         offset[chan] = llvm_extract_elem(ctx, offsets, chan);
3307                         offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan],
3308                                                     LLVMConstInt(ctx->i32, 0x3f, false), "");
3309                         if (chan)
3310                                 offset[chan] = LLVMBuildShl(ctx->builder, offset[chan],
3311                                                             LLVMConstInt(ctx->i32, chan * 8, false), "");
3312                 }
3313                 pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], "");
3314                 pack = LLVMBuildOr(ctx->builder, pack, offset[2], "");
3315                 address[count++] = pack;
3316
3317         }
3318         /* pack LOD bias value */
3319         if (instr->op == nir_texop_txb && bias) {
3320                 address[count++] = bias;
3321         }
3322
3323         /* Pack depth comparison value */
3324         if (instr->is_shadow && comparator) {
3325                 address[count++] = llvm_extract_elem(ctx, comparator, 0);
3326         }
3327
3328         /* pack derivatives */
3329         if (ddx || ddy) {
3330                 switch (instr->sampler_dim) {
3331                 case GLSL_SAMPLER_DIM_3D:
3332                 case GLSL_SAMPLER_DIM_CUBE:
3333                         num_deriv_comp = 3;
3334                         break;
3335                 case GLSL_SAMPLER_DIM_2D:
3336                 default:
3337                         num_deriv_comp = 2;
3338                         break;
3339                 case GLSL_SAMPLER_DIM_1D:
3340                         num_deriv_comp = 1;
3341                         break;
3342                 }
3343
3344                 for (unsigned i = 0; i < num_deriv_comp; i++) {
3345                         derivs[i * 2] = to_float(ctx, llvm_extract_elem(ctx, ddx, i));
3346                         derivs[i * 2 + 1] = to_float(ctx, llvm_extract_elem(ctx, ddy, i));
3347                 }
3348         }
3349
3350         if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
3351                 for (chan = 0; chan < instr->coord_components; chan++)
3352                         coords[chan] = to_float(ctx, coords[chan]);
3353                 if (instr->coord_components == 3)
3354                         coords[3] = LLVMGetUndef(ctx->f32);
3355                 ac_prepare_cube_coords(&ctx->ac,
3356                         instr->op == nir_texop_txd, instr->is_array,
3357                         coords, derivs);
3358                 if (num_deriv_comp)
3359                         num_deriv_comp--;
3360         }
3361
3362         if (ddx || ddy) {
3363                 for (unsigned i = 0; i < num_deriv_comp * 2; i++)
3364                         address[count++] = derivs[i];
3365         }
3366
3367         /* Pack texture coordinates */
3368         if (coord) {
3369                 address[count++] = coords[0];
3370                 if (instr->coord_components > 1)
3371                         address[count++] = coords[1];
3372                 if (instr->coord_components > 2) {
3373                         /* This seems like a bit of a hack - but it passes Vulkan CTS with it */
3374                         if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && instr->op != nir_texop_txf) {
3375                                 coords[2] = to_float(ctx, coords[2]);
3376                                 coords[2] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coords[2],
3377                                                                 1, 0);
3378                                 coords[2] = to_integer(ctx, coords[2]);
3379                         }
3380                         address[count++] = coords[2];
3381                 }
3382         }
3383
3384         /* Pack LOD */
3385         if ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && lod) {
3386                 address[count++] = lod;
3387         } else if (instr->op == nir_texop_txf_ms && sample_index) {
3388                 address[count++] = sample_index;
3389         } else if(instr->op == nir_texop_txs) {
3390                 count = 0;
3391                 if (lod)
3392                         address[count++] = lod;
3393                 else
3394                         address[count++] = ctx->i32zero;
3395         }
3396
3397         for (chan = 0; chan < count; chan++) {
3398                 address[chan] = LLVMBuildBitCast(ctx->builder,
3399                                                  address[chan], ctx->i32, "");
3400         }
3401
3402         if (instr->op == nir_texop_samples_identical) {
3403                 LLVMValueRef txf_address[4];
3404                 struct ac_tex_info txf_info = { 0 };
3405                 unsigned txf_count = count;
3406                 memcpy(txf_address, address, sizeof(txf_address));
3407
3408                 if (!instr->is_array)
3409                         txf_address[2] = ctx->i32zero;
3410                 txf_address[3] = ctx->i32zero;
3411
3412                 set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3413                                    fmask_ptr, NULL,
3414                                    txf_address, txf_count, 0xf);
3415
3416                 result = build_tex_intrinsic(ctx, instr, &txf_info);
3417
3418                 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
3419                 result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero);
3420                 goto write_result;
3421         }
3422
3423         /* Adjust the sample index according to FMASK.
3424          *
3425          * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
3426          * which is the identity mapping. Each nibble says which physical sample
3427          * should be fetched to get that sample.
3428          *
3429          * For example, 0x11111100 means there are only 2 samples stored and
3430          * the second sample covers 3/4 of the pixel. When reading samples 0
3431          * and 1, return physical sample 0 (determined by the first two 0s
3432          * in FMASK), otherwise return physical sample 1.
3433          *
3434          * The sample index should be adjusted as follows:
3435          *   sample_index = (fmask >> (sample_index * 4)) & 0xF;
3436          */
3437         if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS) {
3438                 LLVMValueRef txf_address[4];
3439                 struct ac_tex_info txf_info = { 0 };
3440                 unsigned txf_count = count;
3441                 memcpy(txf_address, address, sizeof(txf_address));
3442
3443                 if (!instr->is_array)
3444                         txf_address[2] = ctx->i32zero;
3445                 txf_address[3] = ctx->i32zero;
3446
3447                 set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3448                                    fmask_ptr, NULL,
3449                                    txf_address, txf_count, 0xf);
3450
3451                 result = build_tex_intrinsic(ctx, instr, &txf_info);
3452                 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
3453                 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
3454
3455                 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
3456                                                              result,
3457                                                              ctx->i32zero, "");
3458
3459                 unsigned sample_chan = instr->is_array ? 3 : 2;
3460
3461                 LLVMValueRef sample_index4 =
3462                         LLVMBuildMul(ctx->builder, address[sample_chan], four, "");
3463                 LLVMValueRef shifted_fmask =
3464                         LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
3465                 LLVMValueRef final_sample =
3466                         LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
3467
3468                 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
3469                  * resource descriptor is 0 (invalid),
3470                  */
3471                 LLVMValueRef fmask_desc =
3472                         LLVMBuildBitCast(ctx->builder, fmask_ptr,
3473                                          ctx->v8i32, "");
3474
3475                 LLVMValueRef fmask_word1 =
3476                         LLVMBuildExtractElement(ctx->builder, fmask_desc,
3477                                                 ctx->i32one, "");
3478
3479                 LLVMValueRef word1_is_nonzero =
3480                         LLVMBuildICmp(ctx->builder, LLVMIntNE,
3481                                       fmask_word1, ctx->i32zero, "");
3482
3483                 /* Replace the MSAA sample index. */
3484                 address[sample_chan] =
3485                         LLVMBuildSelect(ctx->builder, word1_is_nonzero,
3486                                         final_sample, address[sample_chan], "");
3487         }
3488
3489         if (offsets && instr->op == nir_texop_txf) {
3490                 nir_const_value *const_offset =
3491                         nir_src_as_const_value(instr->src[const_src].src);
3492                 int num_offsets = instr->src[const_src].src.ssa->num_components;
3493                 assert(const_offset);
3494                 num_offsets = MIN2(num_offsets, instr->coord_components);
3495                 if (num_offsets > 2)
3496                         address[2] = LLVMBuildAdd(ctx->builder,
3497                                                   address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
3498                 if (num_offsets > 1)
3499                         address[1] = LLVMBuildAdd(ctx->builder,
3500                                                   address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
3501                 address[0] = LLVMBuildAdd(ctx->builder,
3502                                           address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), "");
3503
3504         }
3505
3506         /* TODO TG4 support */
3507         if (instr->op == nir_texop_tg4) {
3508                 if (instr->is_shadow)
3509                         dmask = 1;
3510                 else
3511                         dmask = 1 << instr->component;
3512         }
3513         set_tex_fetch_args(ctx, &tinfo, instr, instr->op,
3514                            res_ptr, samp_ptr, address, count, dmask);
3515
3516         result = build_tex_intrinsic(ctx, instr, &tinfo);
3517
3518         if (instr->op == nir_texop_query_levels)
3519                 result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
3520         else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
3521                 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
3522         else if (instr->op == nir_texop_txs &&
3523                  instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
3524                  instr->is_array) {
3525                 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
3526                 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
3527                 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
3528                 z = LLVMBuildSDiv(ctx->builder, z, six, "");
3529                 result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
3530         } else if (instr->dest.ssa.num_components != 4)
3531                 result = trim_vector(ctx, result, instr->dest.ssa.num_components);
3532
3533 write_result:
3534         if (result) {
3535                 assert(instr->dest.is_ssa);
3536                 result = to_integer(ctx, result);
3537                 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3538         }
3539 }
3540
3541
3542 static void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr)
3543 {
3544         LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
3545         LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, "");
3546
3547         _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3548         _mesa_hash_table_insert(ctx->phis, instr, result);
3549 }
3550
3551 static void visit_post_phi(struct nir_to_llvm_context *ctx,
3552                            nir_phi_instr *instr,
3553                            LLVMValueRef llvm_phi)
3554 {
3555         nir_foreach_phi_src(src, instr) {
3556                 LLVMBasicBlockRef block = get_block(ctx, src->pred);
3557                 LLVMValueRef llvm_src = get_src(ctx, src->src);
3558
3559                 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
3560         }
3561 }
3562
3563 static void phi_post_pass(struct nir_to_llvm_context *ctx)
3564 {
3565         struct hash_entry *entry;
3566         hash_table_foreach(ctx->phis, entry) {
3567                 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
3568                                (LLVMValueRef)entry->data);
3569         }
3570 }
3571
3572
3573 static void visit_ssa_undef(struct nir_to_llvm_context *ctx,
3574                             nir_ssa_undef_instr *instr)
3575 {
3576         unsigned num_components = instr->def.num_components;
3577         LLVMValueRef undef;
3578
3579         if (num_components == 1)
3580                 undef = LLVMGetUndef(ctx->i32);
3581         else {
3582                 undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components));
3583         }
3584         _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
3585 }
3586
3587 static void visit_jump(struct nir_to_llvm_context *ctx,
3588                        nir_jump_instr *instr)
3589 {
3590         switch (instr->type) {
3591         case nir_jump_break:
3592                 LLVMBuildBr(ctx->builder, ctx->break_block);
3593                 LLVMClearInsertionPosition(ctx->builder);
3594                 break;
3595         case nir_jump_continue:
3596                 LLVMBuildBr(ctx->builder, ctx->continue_block);
3597                 LLVMClearInsertionPosition(ctx->builder);
3598                 break;
3599         default:
3600                 fprintf(stderr, "Unknown NIR jump instr: ");
3601                 nir_print_instr(&instr->instr, stderr);
3602                 fprintf(stderr, "\n");
3603                 abort();
3604         }
3605 }
3606
3607 static void visit_cf_list(struct nir_to_llvm_context *ctx,
3608                           struct exec_list *list);
3609
3610 static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block)
3611 {
3612         LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder);
3613         nir_foreach_instr(instr, block)
3614         {
3615                 switch (instr->type) {
3616                 case nir_instr_type_alu:
3617                         visit_alu(ctx, nir_instr_as_alu(instr));
3618                         break;
3619                 case nir_instr_type_load_const:
3620                         visit_load_const(ctx, nir_instr_as_load_const(instr));
3621                         break;
3622                 case nir_instr_type_intrinsic:
3623                         visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
3624                         break;
3625                 case nir_instr_type_tex:
3626                         visit_tex(ctx, nir_instr_as_tex(instr));
3627                         break;
3628                 case nir_instr_type_phi:
3629                         visit_phi(ctx, nir_instr_as_phi(instr));
3630                         break;
3631                 case nir_instr_type_ssa_undef:
3632                         visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
3633                         break;
3634                 case nir_instr_type_jump:
3635                         visit_jump(ctx, nir_instr_as_jump(instr));
3636                         break;
3637                 default:
3638                         fprintf(stderr, "Unknown NIR instr type: ");
3639                         nir_print_instr(instr, stderr);
3640                         fprintf(stderr, "\n");
3641                         abort();
3642                 }
3643         }
3644
3645         _mesa_hash_table_insert(ctx->defs, block, llvm_block);
3646 }
3647
3648 static void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt)
3649 {
3650         LLVMValueRef value = get_src(ctx, if_stmt->condition);
3651
3652         LLVMBasicBlockRef merge_block =
3653             LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3654         LLVMBasicBlockRef if_block =
3655             LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3656         LLVMBasicBlockRef else_block = merge_block;
3657         if (!exec_list_is_empty(&if_stmt->else_list))
3658                 else_block = LLVMAppendBasicBlockInContext(
3659                     ctx->context, ctx->main_function, "");
3660
3661         LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value,
3662                                           LLVMConstInt(ctx->i32, 0, false), "");
3663         LLVMBuildCondBr(ctx->builder, cond, if_block, else_block);
3664
3665         LLVMPositionBuilderAtEnd(ctx->builder, if_block);
3666         visit_cf_list(ctx, &if_stmt->then_list);
3667         if (LLVMGetInsertBlock(ctx->builder))
3668                 LLVMBuildBr(ctx->builder, merge_block);
3669
3670         if (!exec_list_is_empty(&if_stmt->else_list)) {
3671                 LLVMPositionBuilderAtEnd(ctx->builder, else_block);
3672                 visit_cf_list(ctx, &if_stmt->else_list);
3673                 if (LLVMGetInsertBlock(ctx->builder))
3674                         LLVMBuildBr(ctx->builder, merge_block);
3675         }
3676
3677         LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
3678 }
3679
3680 static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
3681 {
3682         LLVMBasicBlockRef continue_parent = ctx->continue_block;
3683         LLVMBasicBlockRef break_parent = ctx->break_block;
3684
3685         ctx->continue_block =
3686             LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3687         ctx->break_block =
3688             LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3689
3690         LLVMBuildBr(ctx->builder, ctx->continue_block);
3691         LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
3692         visit_cf_list(ctx, &loop->body);
3693
3694         if (LLVMGetInsertBlock(ctx->builder))
3695                 LLVMBuildBr(ctx->builder, ctx->continue_block);
3696         LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
3697
3698         ctx->continue_block = continue_parent;
3699         ctx->break_block = break_parent;
3700 }
3701
3702 static void visit_cf_list(struct nir_to_llvm_context *ctx,
3703                           struct exec_list *list)
3704 {
3705         foreach_list_typed(nir_cf_node, node, node, list)
3706         {
3707                 switch (node->type) {
3708                 case nir_cf_node_block:
3709                         visit_block(ctx, nir_cf_node_as_block(node));
3710                         break;
3711
3712                 case nir_cf_node_if:
3713                         visit_if(ctx, nir_cf_node_as_if(node));
3714                         break;
3715
3716                 case nir_cf_node_loop:
3717                         visit_loop(ctx, nir_cf_node_as_loop(node));
3718                         break;
3719
3720                 default:
3721                         assert(0);
3722                 }
3723         }
3724 }
3725
3726 static void
3727 handle_vs_input_decl(struct nir_to_llvm_context *ctx,
3728                      struct nir_variable *variable)
3729 {
3730         LLVMValueRef t_list_ptr = ctx->vertex_buffers;
3731         LLVMValueRef t_offset;
3732         LLVMValueRef t_list;
3733         LLVMValueRef args[3];
3734         LLVMValueRef input;
3735         LLVMValueRef buffer_index;
3736         int index = variable->data.location - VERT_ATTRIB_GENERIC0;
3737         int idx = variable->data.location;
3738         unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
3739
3740         variable->data.driver_location = idx * 4;
3741
3742         if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
3743                 buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id,
3744                                             ctx->start_instance, "");
3745                 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
3746                                             ctx->shader_info->vs.vgpr_comp_cnt);
3747         } else
3748                 buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id,
3749                                             ctx->base_vertex, "");
3750
3751         for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
3752                 t_offset = LLVMConstInt(ctx->i32, index + i, false);
3753
3754                 t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset);
3755                 args[0] = t_list;
3756                 args[1] = LLVMConstInt(ctx->i32, 0, false);
3757                 args[2] = buffer_index;
3758                 input = ac_emit_llvm_intrinsic(&ctx->ac,
3759                         "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
3760                         AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
3761
3762                 for (unsigned chan = 0; chan < 4; chan++) {
3763                         LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3764                         ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
3765                                 to_integer(ctx, LLVMBuildExtractElement(ctx->builder,
3766                                                         input, llvm_chan, ""));
3767                 }
3768         }
3769 }
3770
3771
3772 static void interp_fs_input(struct nir_to_llvm_context *ctx,
3773                             unsigned attr,
3774                             LLVMValueRef interp_param,
3775                             LLVMValueRef prim_mask,
3776                             LLVMValueRef result[4])
3777 {
3778         const char *intr_name;
3779         LLVMValueRef attr_number;
3780         unsigned chan;
3781
3782         attr_number = LLVMConstInt(ctx->i32, attr, false);
3783
3784         /* fs.constant returns the param from the middle vertex, so it's not
3785          * really useful for flat shading. It's meant to be used for custom
3786          * interpolation (but the intrinsic can't fetch from the other two
3787          * vertices).
3788          *
3789          * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
3790          * to do the right thing. The only reason we use fs.constant is that
3791          * fs.interp cannot be used on integers, because they can be equal
3792          * to NaN.
3793          */
3794         intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
3795
3796         for (chan = 0; chan < 4; chan++) {
3797                 LLVMValueRef args[4];
3798                 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3799
3800                 args[0] = llvm_chan;
3801                 args[1] = attr_number;
3802                 args[2] = prim_mask;
3803                 args[3] = interp_param;
3804                 result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
3805                                                    ctx->f32, args, args[3] ? 4 : 3,
3806                                                   AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
3807         }
3808 }
3809
3810 static void
3811 handle_fs_input_decl(struct nir_to_llvm_context *ctx,
3812                      struct nir_variable *variable)
3813 {
3814         int idx = variable->data.location;
3815         unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3816         LLVMValueRef interp;
3817
3818         variable->data.driver_location = idx * 4;
3819         ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
3820
3821         if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
3822                 unsigned interp_type;
3823                 if (variable->data.sample) {
3824                         interp_type = INTERP_SAMPLE;
3825                         ctx->shader_info->fs.force_persample = true;
3826                 } else if (variable->data.centroid)
3827                         interp_type = INTERP_CENTROID;
3828                 else
3829                         interp_type = INTERP_CENTER;
3830
3831                 interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type);
3832         } else
3833                 interp = NULL;
3834
3835         for (unsigned i = 0; i < attrib_count; ++i)
3836                 ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
3837
3838 }
3839
3840 static void
3841 handle_shader_input_decl(struct nir_to_llvm_context *ctx,
3842                          struct nir_variable *variable)
3843 {
3844         switch (ctx->stage) {
3845         case MESA_SHADER_VERTEX:
3846                 handle_vs_input_decl(ctx, variable);
3847                 break;
3848         case MESA_SHADER_FRAGMENT:
3849                 handle_fs_input_decl(ctx, variable);
3850                 break;
3851         default:
3852                 break;
3853         }
3854
3855 }
3856
3857 static void
3858 handle_fs_inputs_pre(struct nir_to_llvm_context *ctx,
3859                      struct nir_shader *nir)
3860 {
3861         unsigned index = 0;
3862         for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
3863                 LLVMValueRef interp_param;
3864                 LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
3865
3866                 if (!(ctx->input_mask & (1ull << i)))
3867                         continue;
3868
3869                 if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC) {
3870                         interp_param = *inputs;
3871                         interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
3872                                         inputs);
3873
3874                         if (!interp_param)
3875                                 ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
3876                         ++index;
3877                 } else if (i == VARYING_SLOT_POS) {
3878                         for(int i = 0; i < 3; ++i)
3879                                 inputs[i] = ctx->frag_pos[i];
3880
3881                         inputs[3] = emit_fdiv(ctx, ctx->f32one, ctx->frag_pos[3]);
3882                 }
3883         }
3884         ctx->shader_info->fs.num_interp = index;
3885         if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
3886                 ctx->shader_info->fs.has_pcoord = true;
3887         ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
3888 }
3889
3890 static LLVMValueRef
3891 ac_build_alloca(struct nir_to_llvm_context *ctx,
3892                 LLVMTypeRef type,
3893                 const char *name)
3894 {
3895         LLVMBuilderRef builder = ctx->builder;
3896         LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
3897         LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
3898         LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
3899         LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
3900         LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context);
3901         LLVMValueRef res;
3902
3903         if (first_instr) {
3904                 LLVMPositionBuilderBefore(first_builder, first_instr);
3905         } else {
3906                 LLVMPositionBuilderAtEnd(first_builder, first_block);
3907         }
3908
3909         res = LLVMBuildAlloca(first_builder, type, name);
3910         LLVMBuildStore(builder, LLVMConstNull(type), res);
3911
3912         LLVMDisposeBuilder(first_builder);
3913
3914         return res;
3915 }
3916
3917 static LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx,
3918                                           LLVMTypeRef type,
3919                                           const char *name)
3920 {
3921         LLVMValueRef ptr = ac_build_alloca(ctx, type, name);
3922         LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr);
3923         return ptr;
3924 }
3925
3926 static void
3927 handle_shader_output_decl(struct nir_to_llvm_context *ctx,
3928                           struct nir_variable *variable)
3929 {
3930         int idx = variable->data.location + variable->data.index;
3931         unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3932
3933         variable->data.driver_location = idx * 4;
3934
3935         if (ctx->stage == MESA_SHADER_VERTEX) {
3936
3937                 if (idx == VARYING_SLOT_CLIP_DIST0 ||
3938                     idx == VARYING_SLOT_CULL_DIST0) {
3939                         int length = glsl_get_length(variable->type);
3940                         if (idx == VARYING_SLOT_CLIP_DIST0) {
3941                                 ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1;
3942                                 ctx->num_clips = length;
3943                         } else if (idx == VARYING_SLOT_CULL_DIST0) {
3944                                 ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1;
3945                                 ctx->num_culls = length;
3946                         }
3947                         if (length > 4)
3948                                 attrib_count = 2;
3949                         else
3950                                 attrib_count = 1;
3951                 }
3952         }
3953
3954         for (unsigned i = 0; i < attrib_count; ++i) {
3955                 for (unsigned chan = 0; chan < 4; chan++) {
3956                         ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] =
3957                                        si_build_alloca_undef(ctx, ctx->f32, "");
3958                 }
3959         }
3960         ctx->output_mask |= ((1ull << attrib_count) - 1) << idx;
3961 }
3962
3963 static void
3964 setup_locals(struct nir_to_llvm_context *ctx,
3965              struct nir_function *func)
3966 {
3967         int i, j;
3968         ctx->num_locals = 0;
3969         nir_foreach_variable(variable, &func->impl->locals) {
3970                 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3971                 variable->data.driver_location = ctx->num_locals * 4;
3972                 ctx->num_locals += attrib_count;
3973         }
3974         ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
3975         if (!ctx->locals)
3976             return;
3977
3978         for (i = 0; i < ctx->num_locals; i++) {
3979                 for (j = 0; j < 4; j++) {
3980                         ctx->locals[i * 4 + j] =
3981                                 si_build_alloca_undef(ctx, ctx->f32, "temp");
3982                 }
3983         }
3984 }
3985
3986 static LLVMValueRef
3987 emit_float_saturate(struct nir_to_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
3988 {
3989         v = to_float(ctx, v);
3990         v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", v, LLVMConstReal(ctx->f32, lo));
3991         return emit_intrin_2f_param(ctx, "llvm.minnum.f32", v, LLVMConstReal(ctx->f32, hi));
3992 }
3993
3994
3995 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
3996                                         LLVMValueRef src0, LLVMValueRef src1)
3997 {
3998         LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
3999         LLVMValueRef comp[2];
4000
4001         comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx-> i32, 65535, 0), "");
4002         comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx-> i32, 65535, 0), "");
4003         comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
4004         return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
4005 }
4006
4007 /* Initialize arguments for the shader export intrinsic */
4008 static void
4009 si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
4010                          LLVMValueRef *values,
4011                          unsigned target,
4012                          LLVMValueRef *args)
4013 {
4014         /* Default is 0xf. Adjusted below depending on the format. */
4015         args[0] = LLVMConstInt(ctx->i32, target != V_008DFC_SQ_EXP_NULL ? 0xf : 0, false);
4016         /* Specify whether the EXEC mask represents the valid mask */
4017         args[1] = LLVMConstInt(ctx->i32, 0, false);
4018
4019         /* Specify whether this is the last export */
4020         args[2] = LLVMConstInt(ctx->i32, 0, false);
4021         /* Specify the target we are exporting */
4022         args[3] = LLVMConstInt(ctx->i32, target, false);
4023
4024         args[4] = LLVMConstInt(ctx->i32, 0, false); /* COMPR flag */
4025         args[5] = LLVMGetUndef(ctx->f32);
4026         args[6] = LLVMGetUndef(ctx->f32);
4027         args[7] = LLVMGetUndef(ctx->f32);
4028         args[8] = LLVMGetUndef(ctx->f32);
4029
4030         if (!values)
4031                 return;
4032
4033         if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
4034                 LLVMValueRef val[4];
4035                 unsigned index = target - V_008DFC_SQ_EXP_MRT;
4036                 unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
4037                 bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
4038
4039                 switch(col_format) {
4040                 case V_028714_SPI_SHADER_ZERO:
4041                         args[0] = LLVMConstInt(ctx->i32, 0x0, 0);
4042                         args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_NULL, 0);
4043                         break;
4044
4045                 case V_028714_SPI_SHADER_32_R:
4046                         args[0] = LLVMConstInt(ctx->i32, 0x1, 0);
4047                         args[5] = values[0];
4048                         break;
4049
4050                 case V_028714_SPI_SHADER_32_GR:
4051                         args[0] = LLVMConstInt(ctx->i32, 0x3, 0);
4052                         args[5] = values[0];
4053                         args[6] = values[1];
4054                         break;
4055
4056                 case V_028714_SPI_SHADER_32_AR:
4057                         args[0] = LLVMConstInt(ctx->i32, 0x9, 0);
4058                         args[5] = values[0];
4059                         args[8] = values[3];
4060                         break;
4061
4062                 case V_028714_SPI_SHADER_FP16_ABGR:
4063                         args[4] = ctx->i32one;
4064
4065                         for (unsigned chan = 0; chan < 2; chan++) {
4066                                 LLVMValueRef pack_args[2] = {
4067                                         values[2 * chan],
4068                                         values[2 * chan + 1]
4069                                 };
4070                                 LLVMValueRef packed;
4071
4072                                 packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16",
4073                                                              ctx->i32, pack_args, 2,
4074                                                              AC_FUNC_ATTR_READNONE);
4075                                 args[chan + 5] = packed;
4076                         }
4077                         break;
4078
4079                 case V_028714_SPI_SHADER_UNORM16_ABGR:
4080                         for (unsigned chan = 0; chan < 4; chan++) {
4081                                 val[chan] = emit_float_saturate(ctx, values[chan], 0, 1);
4082                                 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4083                                                         LLVMConstReal(ctx->f32, 65535), "");
4084                                 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4085                                                         LLVMConstReal(ctx->f32, 0.5), "");
4086                                 val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
4087                                                         ctx->i32, "");
4088                         }
4089
4090                         args[4] = ctx->i32one;
4091                         args[5] = emit_pack_int16(ctx, val[0], val[1]);
4092                         args[6] = emit_pack_int16(ctx, val[2], val[3]);
4093                         break;
4094
4095                 case V_028714_SPI_SHADER_SNORM16_ABGR:
4096                         for (unsigned chan = 0; chan < 4; chan++) {
4097                                 val[chan] = emit_float_saturate(ctx, values[chan], -1, 1);
4098                                 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4099                                                         LLVMConstReal(ctx->f32, 32767), "");
4100
4101                                 /* If positive, add 0.5, else add -0.5. */
4102                                 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4103                                                 LLVMBuildSelect(ctx->builder,
4104                                                         LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
4105                                                                 val[chan], ctx->f32zero, ""),
4106                                                         LLVMConstReal(ctx->f32, 0.5),
4107                                                         LLVMConstReal(ctx->f32, -0.5), ""), "");
4108                                 val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
4109                         }
4110
4111                         args[4] = ctx->i32one;
4112                         args[5] = emit_pack_int16(ctx, val[0], val[1]);
4113                         args[6] = emit_pack_int16(ctx, val[2], val[3]);
4114                         break;
4115
4116                 case V_028714_SPI_SHADER_UINT16_ABGR: {
4117                         LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
4118
4119                         for (unsigned chan = 0; chan < 4; chan++) {
4120                                 val[chan] = to_integer(ctx, values[chan]);
4121                                 val[chan] = emit_minmax_int(ctx, LLVMIntULT, val[chan], max);
4122                         }
4123
4124                         args[4] = ctx->i32one;
4125                         args[5] = emit_pack_int16(ctx, val[0], val[1]);
4126                         args[6] = emit_pack_int16(ctx, val[2], val[3]);
4127                         break;
4128                 }
4129
4130                 case V_028714_SPI_SHADER_SINT16_ABGR: {
4131                         LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
4132                         LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
4133
4134                         /* Clamp. */
4135                         for (unsigned chan = 0; chan < 4; chan++) {
4136                                 val[chan] = to_integer(ctx, values[chan]);
4137                                 val[chan] = emit_minmax_int(ctx, LLVMIntSLT, val[chan], max);
4138                                 val[chan] = emit_minmax_int(ctx, LLVMIntSGT, val[chan], min);
4139                         }
4140
4141                         args[4] = ctx->i32one;
4142                         args[5] = emit_pack_int16(ctx, val[0], val[1]);
4143                         args[6] = emit_pack_int16(ctx, val[2], val[3]);
4144                         break;
4145                 }
4146
4147                 default:
4148                 case V_028714_SPI_SHADER_32_ABGR:
4149                         memcpy(&args[5], values, sizeof(values[0]) * 4);
4150                         break;
4151                 }
4152         } else
4153                 memcpy(&args[5], values, sizeof(values[0]) * 4);
4154
4155         for (unsigned i = 5; i < 9; ++i)
4156                 args[i] = to_float(ctx, args[i]);
4157 }
4158
4159 static void
4160 handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
4161 {
4162         uint32_t param_count = 0;
4163         unsigned target;
4164         unsigned pos_idx, num_pos_exports = 0;
4165         LLVMValueRef args[9];
4166         LLVMValueRef pos_args[4][9] = { { 0 } };
4167         LLVMValueRef psize_value = 0;
4168         int i;
4169         const uint64_t clip_mask = ctx->output_mask & ((1ull << VARYING_SLOT_CLIP_DIST0) |
4170                                                        (1ull << VARYING_SLOT_CLIP_DIST1) |
4171                                                        (1ull << VARYING_SLOT_CULL_DIST0) |
4172                                                        (1ull << VARYING_SLOT_CULL_DIST1));
4173
4174         if (clip_mask) {
4175                 LLVMValueRef slots[8];
4176                 unsigned j;
4177
4178                 if (ctx->shader_info->vs.cull_dist_mask)
4179                         ctx->shader_info->vs.cull_dist_mask <<= ctx->num_clips;
4180
4181                 i = VARYING_SLOT_CLIP_DIST0;
4182                 for (j = 0; j < ctx->num_clips; j++)
4183                         slots[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4184                                                                ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4185                 i = VARYING_SLOT_CULL_DIST0;
4186                 for (j = 0; j < ctx->num_culls; j++)
4187                         slots[ctx->num_clips + j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4188                                                                            ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4189
4190                 for (i = ctx->num_clips + ctx->num_culls; i < 8; i++)
4191                         slots[i] = LLVMGetUndef(ctx->f32);
4192
4193                 if (ctx->num_clips + ctx->num_culls > 4) {
4194                         target = V_008DFC_SQ_EXP_POS + 3;
4195                         si_llvm_init_export_args(ctx, &slots[4], target, args);
4196                         memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4197                                args, sizeof(args));
4198                 }
4199
4200                 target = V_008DFC_SQ_EXP_POS + 2;
4201                 si_llvm_init_export_args(ctx, &slots[0], target, args);
4202                 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4203                        args, sizeof(args));
4204
4205         }
4206
4207         for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4208                 LLVMValueRef values[4];
4209                 if (!(ctx->output_mask & (1ull << i)))
4210                         continue;
4211
4212                 for (unsigned j = 0; j < 4; j++)
4213                         values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4214                                               ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4215
4216                 if (i == VARYING_SLOT_POS) {
4217                         target = V_008DFC_SQ_EXP_POS;
4218                 } else if (i == VARYING_SLOT_CLIP_DIST0 ||
4219                            i == VARYING_SLOT_CLIP_DIST1 ||
4220                            i == VARYING_SLOT_CULL_DIST0 ||
4221                            i == VARYING_SLOT_CULL_DIST1) {
4222                         continue;
4223                 } else if (i == VARYING_SLOT_PSIZ) {
4224                         ctx->shader_info->vs.writes_pointsize = true;
4225                         psize_value = values[0];
4226                         continue;
4227                 } else if (i >= VARYING_SLOT_VAR0) {
4228                         ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0);
4229                         target = V_008DFC_SQ_EXP_PARAM + param_count;
4230                         param_count++;
4231                 }
4232
4233                 si_llvm_init_export_args(ctx, values, target, args);
4234
4235                 if (target >= V_008DFC_SQ_EXP_POS &&
4236                     target <= (V_008DFC_SQ_EXP_POS + 3)) {
4237                         memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4238                                args, sizeof(args));
4239                 } else {
4240                         ac_emit_llvm_intrinsic(&ctx->ac,
4241                                             "llvm.SI.export",
4242                                             LLVMVoidTypeInContext(ctx->context),
4243                                             args, 9, 0);
4244                 }
4245         }
4246
4247         /* We need to add the position output manually if it's missing. */
4248         if (!pos_args[0][0]) {
4249                 pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
4250                 pos_args[0][1] = ctx->i32zero; /* EXEC mask */
4251                 pos_args[0][2] = ctx->i32zero; /* last export? */
4252                 pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false);
4253                 pos_args[0][4] = ctx->i32zero; /* COMPR flag */
4254                 pos_args[0][5] = ctx->f32zero; /* X */
4255                 pos_args[0][6] = ctx->f32zero; /* Y */
4256                 pos_args[0][7] = ctx->f32zero; /* Z */
4257                 pos_args[0][8] = ctx->f32one;  /* W */
4258         }
4259
4260         if (ctx->shader_info->vs.writes_pointsize == true) {
4261                 pos_args[1][0] = LLVMConstInt(ctx->i32, (ctx->shader_info->vs.writes_pointsize == true), false); /* writemask */
4262                 pos_args[1][1] = ctx->i32zero;  /* EXEC mask */
4263                 pos_args[1][2] = ctx->i32zero;  /* last export? */
4264                 pos_args[1][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + 1, false);
4265                 pos_args[1][4] = ctx->i32zero;  /* COMPR flag */
4266                 pos_args[1][5] = ctx->f32zero; /* X */
4267                 pos_args[1][6] = ctx->f32zero; /* Y */
4268                 pos_args[1][7] = ctx->f32zero; /* Z */
4269                 pos_args[1][8] = ctx->f32zero;  /* W */
4270
4271                 if (ctx->shader_info->vs.writes_pointsize == true)
4272                         pos_args[1][5] = psize_value;
4273         }
4274         for (i = 0; i < 4; i++) {
4275                 if (pos_args[i][0])
4276                         num_pos_exports++;
4277         }
4278
4279         pos_idx = 0;
4280         for (i = 0; i < 4; i++) {
4281                 if (!pos_args[i][0])
4282                         continue;
4283
4284                 /* Specify the target we are exporting */
4285                 pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false);
4286                 if (pos_idx == num_pos_exports)
4287                         pos_args[i][2] = ctx->i32one;
4288                 ac_emit_llvm_intrinsic(&ctx->ac,
4289                                     "llvm.SI.export",
4290                                     LLVMVoidTypeInContext(ctx->context),
4291                                     pos_args[i], 9, 0);
4292         }
4293
4294         ctx->shader_info->vs.pos_exports = num_pos_exports;
4295         ctx->shader_info->vs.param_exports = param_count;
4296 }
4297
4298 static void
4299 si_export_mrt_color(struct nir_to_llvm_context *ctx,
4300                     LLVMValueRef *color, unsigned param, bool is_last)
4301 {
4302         LLVMValueRef args[9];
4303         /* Export */
4304         si_llvm_init_export_args(ctx, color, param,
4305                                  args);
4306
4307         if (is_last) {
4308                 args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4309                 args[2] = ctx->i32one; /* DONE bit */
4310         } else if (args[0] == ctx->i32zero)
4311                 return; /* unnecessary NULL export */
4312
4313         ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
4314                             ctx->voidt, args, 9, 0);
4315 }
4316
4317 static void
4318 si_export_mrt_z(struct nir_to_llvm_context *ctx,
4319                 LLVMValueRef depth, LLVMValueRef stencil,
4320                 LLVMValueRef samplemask)
4321 {
4322         LLVMValueRef args[9];
4323         unsigned mask = 0;
4324         args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4325         args[2] = ctx->i32one; /* DONE bit */
4326         /* Specify the target we are exporting */
4327         args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_MRTZ, false);
4328
4329         args[4] = ctx->i32zero; /* COMP flag */
4330         args[5] = LLVMGetUndef(ctx->f32); /* R, depth */
4331         args[6] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
4332         args[7] = LLVMGetUndef(ctx->f32); /* B, sample mask */
4333         args[8] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
4334
4335         if (depth) {
4336                 args[5] = depth;
4337                 mask |= 0x1;
4338         }
4339
4340         if (stencil) {
4341                 args[6] = stencil;
4342                 mask |= 0x2;
4343         }
4344
4345         if (samplemask) {
4346                 args[7] = samplemask;
4347                 mask |= 0x04;
4348         }
4349
4350         /* SI (except OLAND) has a bug that it only looks
4351          * at the X writemask component. */
4352         if (ctx->options->chip_class == SI &&
4353             ctx->options->family != CHIP_OLAND)
4354                 mask |= 0x01;
4355
4356         args[0] = LLVMConstInt(ctx->i32, mask, false);
4357         ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
4358                             ctx->voidt, args, 9, 0);
4359 }
4360
4361 static void
4362 handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
4363 {
4364         unsigned index = 0;
4365         LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
4366
4367         for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4368                 LLVMValueRef values[4];
4369
4370                 if (!(ctx->output_mask & (1ull << i)))
4371                         continue;
4372
4373                 if (i == FRAG_RESULT_DEPTH) {
4374                         ctx->shader_info->fs.writes_z = true;
4375                         depth = to_float(ctx, LLVMBuildLoad(ctx->builder,
4376                                                             ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4377                 } else if (i == FRAG_RESULT_STENCIL) {
4378                         ctx->shader_info->fs.writes_stencil = true;
4379                         stencil = to_float(ctx, LLVMBuildLoad(ctx->builder,
4380                                                               ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4381                 } else {
4382                         bool last = false;
4383                         for (unsigned j = 0; j < 4; j++)
4384                                 values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4385                                                                         ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4386
4387                         if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil)
4388                                 last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
4389
4390                         si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
4391                         index++;
4392                 }
4393         }
4394
4395         if (depth || stencil)
4396                 si_export_mrt_z(ctx, depth, stencil, samplemask);
4397         else if (!index)
4398                 si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true);
4399
4400         ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
4401 }
4402
4403 static void
4404 handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
4405 {
4406         switch (ctx->stage) {
4407         case MESA_SHADER_VERTEX:
4408                 handle_vs_outputs_post(ctx);
4409                 break;
4410         case MESA_SHADER_FRAGMENT:
4411                 handle_fs_outputs_post(ctx);
4412                 break;
4413         default:
4414                 break;
4415         }
4416 }
4417
4418 static void
4419 handle_shared_compute_var(struct nir_to_llvm_context *ctx,
4420                           struct nir_variable *variable, uint32_t *offset, int idx)
4421 {
4422         unsigned size = glsl_count_attribute_slots(variable->type, false);
4423         variable->data.driver_location = *offset;
4424         *offset += size;
4425 }
4426
4427 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
4428 {
4429         LLVMPassManagerRef passmgr;
4430         /* Create the pass manager */
4431         passmgr = LLVMCreateFunctionPassManagerForModule(
4432                                                         ctx->module);
4433
4434         /* This pass should eliminate all the load and store instructions */
4435         LLVMAddPromoteMemoryToRegisterPass(passmgr);
4436
4437         /* Add some optimization passes */
4438         LLVMAddScalarReplAggregatesPass(passmgr);
4439         LLVMAddLICMPass(passmgr);
4440         LLVMAddAggressiveDCEPass(passmgr);
4441         LLVMAddCFGSimplificationPass(passmgr);
4442         LLVMAddInstructionCombiningPass(passmgr);
4443
4444         /* Run the pass */
4445         LLVMInitializeFunctionPassManager(passmgr);
4446         LLVMRunFunctionPassManager(passmgr, ctx->main_function);
4447         LLVMFinalizeFunctionPassManager(passmgr);
4448
4449         LLVMDisposeBuilder(ctx->builder);
4450         LLVMDisposePassManager(passmgr);
4451 }
4452
4453 static
4454 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
4455                                        struct nir_shader *nir,
4456                                        struct ac_shader_variant_info *shader_info,
4457                                        const struct ac_nir_compiler_options *options)
4458 {
4459         struct nir_to_llvm_context ctx = {0};
4460         struct nir_function *func;
4461         unsigned i;
4462         ctx.options = options;
4463         ctx.shader_info = shader_info;
4464         ctx.context = LLVMContextCreate();
4465         ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
4466
4467         ac_llvm_context_init(&ctx.ac, ctx.context);
4468         ctx.ac.module = ctx.module;
4469
4470         ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
4471
4472         memset(shader_info, 0, sizeof(*shader_info));
4473
4474         LLVMSetTarget(ctx.module, "amdgcn--");
4475         setup_types(&ctx);
4476
4477         ctx.builder = LLVMCreateBuilderInContext(ctx.context);
4478         ctx.ac.builder = ctx.builder;
4479         ctx.stage = nir->stage;
4480
4481         for (i = 0; i < AC_UD_MAX_SETS; i++)
4482                 shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
4483         for (i = 0; i < AC_UD_MAX_UD; i++)
4484                 shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
4485
4486         create_function(&ctx);
4487
4488         if (nir->stage == MESA_SHADER_COMPUTE) {
4489                 int num_shared = 0;
4490                 nir_foreach_variable(variable, &nir->shared)
4491                         num_shared++;
4492                 if (num_shared) {
4493                         int idx = 0;
4494                         uint32_t shared_size = 0;
4495                         LLVMValueRef var;
4496                         LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE);
4497                         nir_foreach_variable(variable, &nir->shared) {
4498                                 handle_shared_compute_var(&ctx, variable, &shared_size, idx);
4499                                 idx++;
4500                         }
4501
4502                         shared_size *= 4;
4503                         var = LLVMAddGlobalInAddressSpace(ctx.module,
4504                                                           LLVMArrayType(ctx.i8, shared_size),
4505                                                           "compute_lds",
4506                                                           LOCAL_ADDR_SPACE);
4507                         LLVMSetAlignment(var, 4);
4508                         ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, "");
4509                 }
4510         }
4511
4512         nir_foreach_variable(variable, &nir->inputs)
4513                 handle_shader_input_decl(&ctx, variable);
4514
4515         if (nir->stage == MESA_SHADER_FRAGMENT)
4516                 handle_fs_inputs_pre(&ctx, nir);
4517
4518         nir_foreach_variable(variable, &nir->outputs)
4519                 handle_shader_output_decl(&ctx, variable);
4520
4521         ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4522                                            _mesa_key_pointer_equal);
4523         ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4524                                            _mesa_key_pointer_equal);
4525
4526         func = (struct nir_function *)exec_list_get_head(&nir->functions);
4527
4528         setup_locals(&ctx, func);
4529
4530         visit_cf_list(&ctx, &func->impl->body);
4531         phi_post_pass(&ctx);
4532
4533         handle_shader_outputs_post(&ctx);
4534         LLVMBuildRetVoid(ctx.builder);
4535
4536         ac_llvm_finalize_module(&ctx);
4537         free(ctx.locals);
4538         ralloc_free(ctx.defs);
4539         ralloc_free(ctx.phis);
4540
4541         return ctx.module;
4542 }
4543
4544 static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
4545 {
4546         unsigned *retval = (unsigned *)context;
4547         LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
4548         char *description = LLVMGetDiagInfoDescription(di);
4549
4550         if (severity == LLVMDSError) {
4551                 *retval = 1;
4552                 fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
4553                         description);
4554         }
4555
4556         LLVMDisposeMessage(description);
4557 }
4558
4559 static unsigned ac_llvm_compile(LLVMModuleRef M,
4560                                 struct ac_shader_binary *binary,
4561                                 LLVMTargetMachineRef tm)
4562 {
4563         unsigned retval = 0;
4564         char *err;
4565         LLVMContextRef llvm_ctx;
4566         LLVMMemoryBufferRef out_buffer;
4567         unsigned buffer_size;
4568         const char *buffer_data;
4569         LLVMBool mem_err;
4570
4571         /* Setup Diagnostic Handler*/
4572         llvm_ctx = LLVMGetModuleContext(M);
4573
4574         LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
4575                                         &retval);
4576
4577         /* Compile IR*/
4578         mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
4579                                                       &err, &out_buffer);
4580
4581         /* Process Errors/Warnings */
4582         if (mem_err) {
4583                 fprintf(stderr, "%s: %s", __FUNCTION__, err);
4584                 free(err);
4585                 retval = 1;
4586                 goto out;
4587         }
4588
4589         /* Extract Shader Code*/
4590         buffer_size = LLVMGetBufferSize(out_buffer);
4591         buffer_data = LLVMGetBufferStart(out_buffer);
4592
4593         ac_elf_read(buffer_data, buffer_size, binary);
4594
4595         /* Clean up */
4596         LLVMDisposeMemoryBuffer(out_buffer);
4597
4598 out:
4599         return retval;
4600 }
4601
4602 void ac_compile_nir_shader(LLVMTargetMachineRef tm,
4603                            struct ac_shader_binary *binary,
4604                            struct ac_shader_config *config,
4605                            struct ac_shader_variant_info *shader_info,
4606                            struct nir_shader *nir,
4607                            const struct ac_nir_compiler_options *options,
4608                            bool dump_shader)
4609 {
4610
4611         LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
4612                                                              options);
4613         if (dump_shader)
4614                 LLVMDumpModule(llvm_module);
4615
4616         memset(binary, 0, sizeof(*binary));
4617         int v = ac_llvm_compile(llvm_module, binary, tm);
4618         if (v) {
4619                 fprintf(stderr, "compile failed\n");
4620         }
4621
4622         if (dump_shader)
4623                 fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
4624
4625         ac_shader_binary_read_config(binary, config, 0);
4626
4627         LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
4628         LLVMDisposeModule(llvm_module);
4629         LLVMContextDispose(ctx);
4630
4631         if (nir->stage == MESA_SHADER_FRAGMENT) {
4632                 shader_info->num_input_vgprs = 0;
4633                 if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
4634                         shader_info->num_input_vgprs += 2;
4635                 if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
4636                         shader_info->num_input_vgprs += 2;
4637                 if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
4638                         shader_info->num_input_vgprs += 2;
4639                 if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
4640                         shader_info->num_input_vgprs += 3;
4641                 if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
4642                         shader_info->num_input_vgprs += 2;
4643                 if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
4644                         shader_info->num_input_vgprs += 2;
4645                 if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
4646                         shader_info->num_input_vgprs += 2;
4647                 if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
4648                         shader_info->num_input_vgprs += 1;
4649                 if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
4650                         shader_info->num_input_vgprs += 1;
4651                 if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
4652                         shader_info->num_input_vgprs += 1;
4653                 if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
4654                         shader_info->num_input_vgprs += 1;
4655                 if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
4656                         shader_info->num_input_vgprs += 1;
4657                 if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
4658                         shader_info->num_input_vgprs += 1;
4659                 if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
4660                         shader_info->num_input_vgprs += 1;
4661                 if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
4662                         shader_info->num_input_vgprs += 1;
4663                 if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
4664                         shader_info->num_input_vgprs += 1;
4665         }
4666         config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
4667
4668         /* +3 for scratch wave offset and VCC */
4669         config->num_sgprs = MAX2(config->num_sgprs,
4670                                  shader_info->num_input_sgprs + 3);
4671         if (nir->stage == MESA_SHADER_COMPUTE) {
4672                 for (int i = 0; i < 3; ++i)
4673                         shader_info->cs.block_size[i] = nir->info->cs.local_size[i];
4674         }
4675
4676         if (nir->stage == MESA_SHADER_FRAGMENT)
4677                 shader_info->fs.early_fragment_test = nir->info->fs.early_fragment_tests;
4678 }