2 * Copyright © 2016 Bas Nieuwenhuizen
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_util.h"
26 #include "ac_binary.h"
29 #include "../vulkan/radv_descriptor_set.h"
30 #include "util/bitscan.h"
31 #include <llvm-c/Transforms/Scalar.h>
33 enum radeon_llvm_calling_convention {
34 RADEON_LLVM_AMDGPU_VS = 87,
35 RADEON_LLVM_AMDGPU_GS = 88,
36 RADEON_LLVM_AMDGPU_PS = 89,
37 RADEON_LLVM_AMDGPU_CS = 90,
40 #define CONST_ADDR_SPACE 2
41 #define LOCAL_ADDR_SPACE 3
43 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
44 #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
53 struct nir_to_llvm_context {
54 struct ac_llvm_context ac;
55 const struct ac_nir_compiler_options *options;
56 struct ac_shader_variant_info *shader_info;
58 LLVMContextRef context;
60 LLVMBuilderRef builder;
61 LLVMValueRef main_function;
63 struct hash_table *defs;
64 struct hash_table *phis;
66 LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
67 LLVMValueRef push_constants;
68 LLVMValueRef num_work_groups;
69 LLVMValueRef workgroup_ids;
70 LLVMValueRef local_invocation_ids;
73 LLVMValueRef vertex_buffers;
74 LLVMValueRef base_vertex;
75 LLVMValueRef start_instance;
76 LLVMValueRef vertex_id;
77 LLVMValueRef rel_auto_id;
78 LLVMValueRef vs_prim_id;
79 LLVMValueRef instance_id;
81 LLVMValueRef prim_mask;
82 LLVMValueRef sample_positions;
83 LLVMValueRef persp_sample, persp_center, persp_centroid;
84 LLVMValueRef linear_sample, linear_center, linear_centroid;
85 LLVMValueRef front_face;
86 LLVMValueRef ancillary;
87 LLVMValueRef frag_pos[4];
89 LLVMBasicBlockRef continue_block;
90 LLVMBasicBlockRef break_block;
108 LLVMValueRef i32zero;
110 LLVMValueRef f32zero;
112 LLVMValueRef v4f32empty;
114 unsigned range_md_kind;
115 unsigned uniform_md_kind;
116 unsigned fpmath_md_kind;
117 unsigned invariant_load_md_kind;
118 LLVMValueRef empty_md;
119 LLVMValueRef fpmath_md_2p5_ulp;
120 gl_shader_stage stage;
123 LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
124 LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
126 LLVMValueRef shared_memory;
128 uint64_t output_mask;
130 LLVMValueRef *locals;
135 bool has_ds_bpermute;
139 LLVMValueRef args[12];
141 LLVMTypeRef dst_type;
145 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
146 nir_deref_var *deref,
147 enum desc_type desc_type);
148 static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
150 return (index * 4) + chan;
153 static unsigned llvm_get_type_size(LLVMTypeRef type)
155 LLVMTypeKind kind = LLVMGetTypeKind(type);
158 case LLVMIntegerTypeKind:
159 return LLVMGetIntTypeWidth(type) / 8;
160 case LLVMFloatTypeKind:
162 case LLVMPointerTypeKind:
164 case LLVMVectorTypeKind:
165 return LLVMGetVectorSize(type) *
166 llvm_get_type_size(LLVMGetElementType(type));
173 static void set_llvm_calling_convention(LLVMValueRef func,
174 gl_shader_stage stage)
176 enum radeon_llvm_calling_convention calling_conv;
179 case MESA_SHADER_VERTEX:
180 case MESA_SHADER_TESS_CTRL:
181 case MESA_SHADER_TESS_EVAL:
182 calling_conv = RADEON_LLVM_AMDGPU_VS;
184 case MESA_SHADER_GEOMETRY:
185 calling_conv = RADEON_LLVM_AMDGPU_GS;
187 case MESA_SHADER_FRAGMENT:
188 calling_conv = RADEON_LLVM_AMDGPU_PS;
190 case MESA_SHADER_COMPUTE:
191 calling_conv = RADEON_LLVM_AMDGPU_CS;
194 unreachable("Unhandle shader type");
197 LLVMSetFunctionCallConv(func, calling_conv);
201 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
202 LLVMBuilderRef builder, LLVMTypeRef *return_types,
203 unsigned num_return_elems, LLVMTypeRef *param_types,
204 unsigned param_count, unsigned array_params_mask,
205 unsigned sgpr_params, bool unsafe_math)
207 LLVMTypeRef main_function_type, ret_type;
208 LLVMBasicBlockRef main_function_body;
210 if (num_return_elems)
211 ret_type = LLVMStructTypeInContext(ctx, return_types,
212 num_return_elems, true);
214 ret_type = LLVMVoidTypeInContext(ctx);
216 /* Setup the function */
218 LLVMFunctionType(ret_type, param_types, param_count, 0);
219 LLVMValueRef main_function =
220 LLVMAddFunction(module, "main", main_function_type);
222 LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
223 LLVMPositionBuilderAtEnd(builder, main_function_body);
225 LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
226 for (unsigned i = 0; i < sgpr_params; ++i) {
227 if (array_params_mask & (1 << i)) {
228 LLVMValueRef P = LLVMGetParam(main_function, i);
229 ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_BYVAL);
230 ac_add_attr_dereferenceable(P, UINT64_MAX);
233 ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_INREG);
238 /* These were copied from some LLVM test. */
239 LLVMAddTargetDependentFunctionAttr(main_function,
240 "less-precise-fpmad",
242 LLVMAddTargetDependentFunctionAttr(main_function,
245 LLVMAddTargetDependentFunctionAttr(main_function,
248 LLVMAddTargetDependentFunctionAttr(main_function,
252 return main_function;
255 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
257 return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
261 static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
269 offset = LLVMConstInt(ctx->i32, idx, false);
271 ptr = ctx->shared_memory;
272 ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
273 addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
274 ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
278 static LLVMValueRef to_integer(struct nir_to_llvm_context *ctx, LLVMValueRef v)
280 LLVMTypeRef type = LLVMTypeOf(v);
281 if (type == ctx->f32) {
282 return LLVMBuildBitCast(ctx->builder, v, ctx->i32, "");
283 } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
284 LLVMTypeRef elem_type = LLVMGetElementType(type);
285 if (elem_type == ctx->f32) {
286 LLVMTypeRef nt = LLVMVectorType(ctx->i32, LLVMGetVectorSize(type));
287 return LLVMBuildBitCast(ctx->builder, v, nt, "");
293 static LLVMValueRef to_float(struct nir_to_llvm_context *ctx, LLVMValueRef v)
295 LLVMTypeRef type = LLVMTypeOf(v);
296 if (type == ctx->i32) {
297 return LLVMBuildBitCast(ctx->builder, v, ctx->f32, "");
298 } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
299 LLVMTypeRef elem_type = LLVMGetElementType(type);
300 if (elem_type == ctx->i32) {
301 LLVMTypeRef nt = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type));
302 return LLVMBuildBitCast(ctx->builder, v, nt, "");
308 static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx,
309 LLVMValueRef param, unsigned rshift,
312 LLVMValueRef value = param;
314 value = LLVMBuildLShr(ctx->builder, value,
315 LLVMConstInt(ctx->i32, rshift, false), "");
317 if (rshift + bitwidth < 32) {
318 unsigned mask = (1 << bitwidth) - 1;
319 value = LLVMBuildAnd(ctx->builder, value,
320 LLVMConstInt(ctx->i32, mask, false), "");
325 static LLVMValueRef build_gep0(struct nir_to_llvm_context *ctx,
326 LLVMValueRef base_ptr, LLVMValueRef index)
328 LLVMValueRef indices[2] = {
332 return LLVMBuildGEP(ctx->builder, base_ptr,
336 static LLVMValueRef build_indexed_load(struct nir_to_llvm_context *ctx,
337 LLVMValueRef base_ptr, LLVMValueRef index,
340 LLVMValueRef pointer;
341 pointer = build_gep0(ctx, base_ptr, index);
343 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
344 return LLVMBuildLoad(ctx->builder, pointer, "");
347 static LLVMValueRef build_indexed_load_const(struct nir_to_llvm_context *ctx,
348 LLVMValueRef base_ptr, LLVMValueRef index)
350 LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
351 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
355 static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs)
357 ud_info->sgpr_idx = sgpr_idx;
358 ud_info->num_sgprs = num_sgprs;
359 ud_info->indirect = false;
360 ud_info->indirect_offset = 0;
363 static void set_userdata_location_shader(struct nir_to_llvm_context *ctx,
364 int idx, uint8_t sgpr_idx, uint8_t num_sgprs)
366 set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs);
370 static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs,
371 uint32_t indirect_offset)
373 ud_info->sgpr_idx = sgpr_idx;
374 ud_info->num_sgprs = num_sgprs;
375 ud_info->indirect = true;
376 ud_info->indirect_offset = indirect_offset;
380 static void create_function(struct nir_to_llvm_context *ctx)
382 LLVMTypeRef arg_types[23];
383 unsigned arg_idx = 0;
384 unsigned array_params_mask = 0;
385 unsigned sgpr_count = 0, user_sgpr_count;
387 unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
388 unsigned user_sgpr_idx;
389 bool need_push_constants;
391 need_push_constants = true;
392 if (!ctx->options->layout)
393 need_push_constants = false;
394 else if (!ctx->options->layout->push_constant_size &&
395 !ctx->options->layout->dynamic_offset_count)
396 need_push_constants = false;
398 /* 1 for each descriptor set */
399 for (unsigned i = 0; i < num_sets; ++i) {
400 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
401 array_params_mask |= (1 << arg_idx);
402 arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
406 if (need_push_constants) {
407 /* 1 for push constants and dynamic descriptors */
408 array_params_mask |= (1 << arg_idx);
409 arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
412 switch (ctx->stage) {
413 case MESA_SHADER_COMPUTE:
414 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */
415 user_sgpr_count = arg_idx;
416 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
417 arg_types[arg_idx++] = ctx->i32;
418 sgpr_count = arg_idx;
420 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
422 case MESA_SHADER_VERTEX:
423 arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* vertex buffers */
424 arg_types[arg_idx++] = ctx->i32; // base vertex
425 arg_types[arg_idx++] = ctx->i32; // start instance
426 user_sgpr_count = sgpr_count = arg_idx;
427 arg_types[arg_idx++] = ctx->i32; // vertex id
428 arg_types[arg_idx++] = ctx->i32; // rel auto id
429 arg_types[arg_idx++] = ctx->i32; // vs prim id
430 arg_types[arg_idx++] = ctx->i32; // instance id
432 case MESA_SHADER_FRAGMENT:
433 arg_types[arg_idx++] = const_array(ctx->f32, 32); /* sample positions */
434 user_sgpr_count = arg_idx;
435 arg_types[arg_idx++] = ctx->i32; /* prim mask */
436 sgpr_count = arg_idx;
437 arg_types[arg_idx++] = ctx->v2i32; /* persp sample */
438 arg_types[arg_idx++] = ctx->v2i32; /* persp center */
439 arg_types[arg_idx++] = ctx->v2i32; /* persp centroid */
440 arg_types[arg_idx++] = ctx->v3i32; /* persp pull model */
441 arg_types[arg_idx++] = ctx->v2i32; /* linear sample */
442 arg_types[arg_idx++] = ctx->v2i32; /* linear center */
443 arg_types[arg_idx++] = ctx->v2i32; /* linear centroid */
444 arg_types[arg_idx++] = ctx->f32; /* line stipple tex */
445 arg_types[arg_idx++] = ctx->f32; /* pos x float */
446 arg_types[arg_idx++] = ctx->f32; /* pos y float */
447 arg_types[arg_idx++] = ctx->f32; /* pos z float */
448 arg_types[arg_idx++] = ctx->f32; /* pos w float */
449 arg_types[arg_idx++] = ctx->i32; /* front face */
450 arg_types[arg_idx++] = ctx->i32; /* ancillary */
451 arg_types[arg_idx++] = ctx->f32; /* sample coverage */
452 arg_types[arg_idx++] = ctx->i32; /* fixed pt */
455 unreachable("Shader stage not implemented");
458 ctx->main_function = create_llvm_function(
459 ctx->context, ctx->module, ctx->builder, NULL, 0, arg_types,
460 arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math);
461 set_llvm_calling_convention(ctx->main_function, ctx->stage);
464 ctx->shader_info->num_input_sgprs = 0;
465 ctx->shader_info->num_input_vgprs = 0;
467 for (i = 0; i < user_sgpr_count; i++)
468 ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4;
470 ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs;
471 for (; i < sgpr_count; i++)
472 ctx->shader_info->num_input_sgprs += llvm_get_type_size(arg_types[i]) / 4;
474 if (ctx->stage != MESA_SHADER_FRAGMENT)
475 for (; i < arg_idx; ++i)
476 ctx->shader_info->num_input_vgprs += llvm_get_type_size(arg_types[i]) / 4;
480 for (unsigned i = 0; i < num_sets; ++i) {
481 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
482 set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
484 ctx->descriptor_sets[i] =
485 LLVMGetParam(ctx->main_function, arg_idx++);
487 ctx->descriptor_sets[i] = NULL;
490 if (need_push_constants) {
491 ctx->push_constants = LLVMGetParam(ctx->main_function, arg_idx++);
492 set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
496 switch (ctx->stage) {
497 case MESA_SHADER_COMPUTE:
498 set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3);
500 ctx->num_work_groups =
501 LLVMGetParam(ctx->main_function, arg_idx++);
503 LLVMGetParam(ctx->main_function, arg_idx++);
505 LLVMGetParam(ctx->main_function, arg_idx++);
506 ctx->local_invocation_ids =
507 LLVMGetParam(ctx->main_function, arg_idx++);
509 case MESA_SHADER_VERTEX:
510 set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx, 2);
512 ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++);
513 set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, 2);
515 ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++);
516 ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++);
517 ctx->vertex_id = LLVMGetParam(ctx->main_function, arg_idx++);
518 ctx->rel_auto_id = LLVMGetParam(ctx->main_function, arg_idx++);
519 ctx->vs_prim_id = LLVMGetParam(ctx->main_function, arg_idx++);
520 ctx->instance_id = LLVMGetParam(ctx->main_function, arg_idx++);
522 case MESA_SHADER_FRAGMENT:
523 set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS, user_sgpr_idx, 2);
525 ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++);
526 ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++);
527 ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++);
528 ctx->persp_center = LLVMGetParam(ctx->main_function, arg_idx++);
529 ctx->persp_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
531 ctx->linear_sample = LLVMGetParam(ctx->main_function, arg_idx++);
532 ctx->linear_center = LLVMGetParam(ctx->main_function, arg_idx++);
533 ctx->linear_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
534 arg_idx++; /* line stipple */
535 ctx->frag_pos[0] = LLVMGetParam(ctx->main_function, arg_idx++);
536 ctx->frag_pos[1] = LLVMGetParam(ctx->main_function, arg_idx++);
537 ctx->frag_pos[2] = LLVMGetParam(ctx->main_function, arg_idx++);
538 ctx->frag_pos[3] = LLVMGetParam(ctx->main_function, arg_idx++);
539 ctx->front_face = LLVMGetParam(ctx->main_function, arg_idx++);
540 ctx->ancillary = LLVMGetParam(ctx->main_function, arg_idx++);
543 unreachable("Shader stage not implemented");
547 static void setup_types(struct nir_to_llvm_context *ctx)
549 LLVMValueRef args[4];
551 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
552 ctx->i1 = LLVMIntTypeInContext(ctx->context, 1);
553 ctx->i8 = LLVMIntTypeInContext(ctx->context, 8);
554 ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
555 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
556 ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
557 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
558 ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
559 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
560 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
561 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
562 ctx->f16 = LLVMHalfTypeInContext(ctx->context);
563 ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
564 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
565 ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
567 ctx->i32zero = LLVMConstInt(ctx->i32, 0, false);
568 ctx->i32one = LLVMConstInt(ctx->i32, 1, false);
569 ctx->f32zero = LLVMConstReal(ctx->f32, 0.0);
570 ctx->f32one = LLVMConstReal(ctx->f32, 1.0);
572 args[0] = ctx->f32zero;
573 args[1] = ctx->f32zero;
574 args[2] = ctx->f32zero;
575 args[3] = ctx->f32one;
576 ctx->v4f32empty = LLVMConstVector(args, 4);
578 ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
580 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
581 "invariant.load", 14);
582 ctx->uniform_md_kind =
583 LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
584 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
586 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
588 args[0] = LLVMConstReal(ctx->f32, 2.5);
589 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
592 static int get_llvm_num_components(LLVMValueRef value)
594 LLVMTypeRef type = LLVMTypeOf(value);
595 unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
596 ? LLVMGetVectorSize(type)
598 return num_components;
601 static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx,
605 int count = get_llvm_num_components(value);
607 assert(index < count);
611 return LLVMBuildExtractElement(ctx->builder, value,
612 LLVMConstInt(ctx->i32, index, false), "");
615 static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx,
616 LLVMValueRef value, unsigned count)
618 unsigned num_components = get_llvm_num_components(value);
619 if (count == num_components)
622 LLVMValueRef masks[] = {
623 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
624 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
627 return LLVMBuildExtractElement(ctx->builder, value, masks[0],
630 LLVMValueRef swizzle = LLVMConstVector(masks, count);
631 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
635 build_gather_values_extended(struct nir_to_llvm_context *ctx,
636 LLVMValueRef *values,
637 unsigned value_count,
638 unsigned value_stride,
641 LLVMBuilderRef builder = ctx->builder;
646 if (value_count == 1) {
648 return LLVMBuildLoad(builder, values[0], "");
650 } else if (!value_count)
651 unreachable("value_count is 0");
653 for (i = 0; i < value_count; i++) {
654 LLVMValueRef value = values[i * value_stride];
656 value = LLVMBuildLoad(builder, value, "");
659 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
660 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
661 vec = LLVMBuildInsertElement(builder, vec, value, index, "");
668 build_store_values_extended(struct nir_to_llvm_context *ctx,
669 LLVMValueRef *values,
670 unsigned value_count,
671 unsigned value_stride,
674 LLVMBuilderRef builder = ctx->builder;
677 if (value_count == 1) {
678 LLVMBuildStore(builder, vec, values[0]);
682 for (i = 0; i < value_count; i++) {
683 LLVMValueRef ptr = values[i * value_stride];
684 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
685 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
686 LLVMBuildStore(builder, value, ptr);
691 build_gather_values(struct nir_to_llvm_context *ctx,
692 LLVMValueRef *values,
693 unsigned value_count)
695 return build_gather_values_extended(ctx, values, value_count, 1, false);
698 static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx,
701 LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size);
702 if (def->num_components > 1) {
703 type = LLVMVectorType(type, def->num_components);
708 static LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src)
711 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa);
712 return (LLVMValueRef)entry->data;
716 static LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx,
719 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b);
720 return (LLVMBasicBlockRef)entry->data;
723 static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx,
725 unsigned num_components)
727 LLVMValueRef value = get_src(ctx, src.src);
728 bool need_swizzle = false;
731 LLVMTypeRef type = LLVMTypeOf(value);
732 unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
733 ? LLVMGetVectorSize(type)
736 for (unsigned i = 0; i < num_components; ++i) {
737 assert(src.swizzle[i] < src_components);
738 if (src.swizzle[i] != i)
742 if (need_swizzle || num_components != src_components) {
743 LLVMValueRef masks[] = {
744 LLVMConstInt(ctx->i32, src.swizzle[0], false),
745 LLVMConstInt(ctx->i32, src.swizzle[1], false),
746 LLVMConstInt(ctx->i32, src.swizzle[2], false),
747 LLVMConstInt(ctx->i32, src.swizzle[3], false)};
749 if (src_components > 1 && num_components == 1) {
750 value = LLVMBuildExtractElement(ctx->builder, value,
752 } else if (src_components == 1 && num_components > 1) {
753 LLVMValueRef values[] = {value, value, value, value};
754 value = build_gather_values(ctx, values, num_components);
756 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
757 value = LLVMBuildShuffleVector(ctx->builder, value, value,
766 static LLVMValueRef emit_int_cmp(struct nir_to_llvm_context *ctx,
767 LLVMIntPredicate pred, LLVMValueRef src0,
770 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
771 return LLVMBuildSelect(ctx->builder, result,
772 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
773 LLVMConstInt(ctx->i32, 0, false), "");
776 static LLVMValueRef emit_float_cmp(struct nir_to_llvm_context *ctx,
777 LLVMRealPredicate pred, LLVMValueRef src0,
781 src0 = to_float(ctx, src0);
782 src1 = to_float(ctx, src1);
783 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
784 return LLVMBuildSelect(ctx->builder, result,
785 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
786 LLVMConstInt(ctx->i32, 0, false), "");
789 static LLVMValueRef emit_intrin_1f_param(struct nir_to_llvm_context *ctx,
793 LLVMValueRef params[] = {
796 return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 1, AC_FUNC_ATTR_READNONE);
799 static LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx,
801 LLVMValueRef src0, LLVMValueRef src1)
803 LLVMValueRef params[] = {
807 return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 2, AC_FUNC_ATTR_READNONE);
810 static LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx,
812 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
814 LLVMValueRef params[] = {
819 return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 3, AC_FUNC_ATTR_READNONE);
822 static LLVMValueRef emit_bcsel(struct nir_to_llvm_context *ctx,
823 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
825 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
827 return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
830 static LLVMValueRef emit_find_lsb(struct nir_to_llvm_context *ctx,
833 LLVMValueRef params[2] = {
836 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
837 * add special code to check for x=0. The reason is that
838 * the LLVM behavior for x=0 is different from what we
841 * The hardware already implements the correct behavior.
843 LLVMConstInt(ctx->i32, 1, false),
845 return ac_emit_llvm_intrinsic(&ctx->ac, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
848 static LLVMValueRef emit_ifind_msb(struct nir_to_llvm_context *ctx,
851 LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.flbit.i32",
853 AC_FUNC_ATTR_READNONE);
855 /* The HW returns the last bit index from MSB, but NIR wants
856 * the index from LSB. Invert it by doing "31 - msb". */
857 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
860 LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
861 LLVMValueRef cond = LLVMBuildOr(ctx->builder,
862 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
863 src0, ctx->i32zero, ""),
864 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
865 src0, all_ones, ""), "");
867 return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
870 static LLVMValueRef emit_ufind_msb(struct nir_to_llvm_context *ctx,
873 LLVMValueRef args[2] = {
877 LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctlz.i32",
878 ctx->i32, args, ARRAY_SIZE(args),
879 AC_FUNC_ATTR_READNONE);
881 /* The HW returns the last bit index from MSB, but NIR wants
882 * the index from LSB. Invert it by doing "31 - msb". */
883 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
886 return LLVMBuildSelect(ctx->builder,
887 LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0,
889 LLVMConstInt(ctx->i32, -1, true), msb, "");
892 static LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx,
893 LLVMIntPredicate pred,
894 LLVMValueRef src0, LLVMValueRef src1)
896 return LLVMBuildSelect(ctx->builder,
897 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
902 static LLVMValueRef emit_iabs(struct nir_to_llvm_context *ctx,
905 return emit_minmax_int(ctx, LLVMIntSGT, src0,
906 LLVMBuildNeg(ctx->builder, src0, ""));
909 static LLVMValueRef emit_fsign(struct nir_to_llvm_context *ctx,
912 LLVMValueRef cmp, val;
914 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32zero, "");
915 val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32one, src0, "");
916 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32zero, "");
917 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), "");
921 static LLVMValueRef emit_isign(struct nir_to_llvm_context *ctx,
924 LLVMValueRef cmp, val;
926 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32zero, "");
927 val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32one, src0, "");
928 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32zero, "");
929 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), "");
933 static LLVMValueRef emit_ffract(struct nir_to_llvm_context *ctx,
936 const char *intr = "llvm.floor.f32";
937 LLVMValueRef fsrc0 = to_float(ctx, src0);
938 LLVMValueRef params[] = {
941 LLVMValueRef floor = ac_emit_llvm_intrinsic(&ctx->ac, intr,
943 AC_FUNC_ATTR_READNONE);
944 return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
947 static LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx,
949 LLVMValueRef src0, LLVMValueRef src1)
951 LLVMTypeRef ret_type;
952 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
954 LLVMValueRef params[] = { src0, src1 };
955 ret_type = LLVMStructTypeInContext(ctx->context, types,
958 res = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ret_type,
959 params, 2, AC_FUNC_ATTR_READNONE);
961 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
962 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
966 static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
969 return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
972 static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
973 LLVMValueRef src0, LLVMValueRef src1)
975 LLVMValueRef dst64, result;
976 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
977 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
979 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
980 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
981 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
985 static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
986 LLVMValueRef src0, LLVMValueRef src1)
988 LLVMValueRef dst64, result;
989 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
990 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
992 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
993 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
994 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
998 static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
1000 LLVMValueRef srcs[3])
1002 LLVMValueRef result;
1003 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
1004 result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE);
1006 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
1010 static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
1011 LLVMValueRef src0, LLVMValueRef src1,
1012 LLVMValueRef src2, LLVMValueRef src3)
1014 LLVMValueRef bfi_args[3], result;
1016 bfi_args[0] = LLVMBuildShl(ctx->builder,
1017 LLVMBuildSub(ctx->builder,
1018 LLVMBuildShl(ctx->builder,
1023 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
1026 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
1029 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1030 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1032 result = LLVMBuildXor(ctx->builder, bfi_args[2],
1033 LLVMBuildAnd(ctx->builder, bfi_args[0],
1034 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
1036 result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
1040 static LLVMValueRef emit_pack_half_2x16(struct nir_to_llvm_context *ctx,
1043 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1045 LLVMValueRef comp[2];
1047 src0 = to_float(ctx, src0);
1048 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, "");
1049 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, "");
1050 for (i = 0; i < 2; i++) {
1051 comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
1052 comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
1053 comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
1056 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
1057 comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
1062 static LLVMValueRef emit_unpack_half_2x16(struct nir_to_llvm_context *ctx,
1065 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1066 LLVMValueRef temps[2], result, val;
1069 for (i = 0; i < 2; i++) {
1070 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
1071 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
1072 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
1073 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
1076 result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
1078 result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
1084 * Set range metadata on an instruction. This can only be used on load and
1085 * call instructions. If you know an instruction can only produce the values
1086 * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
1087 * \p lo is the minimum value inclusive.
1088 * \p hi is the maximum value exclusive.
1090 static void set_range_metadata(struct nir_to_llvm_context *ctx,
1091 LLVMValueRef value, unsigned lo, unsigned hi)
1093 LLVMValueRef range_md, md_args[2];
1094 LLVMTypeRef type = LLVMTypeOf(value);
1095 LLVMContextRef context = LLVMGetTypeContext(type);
1097 md_args[0] = LLVMConstInt(type, lo, false);
1098 md_args[1] = LLVMConstInt(type, hi, false);
1099 range_md = LLVMMDNodeInContext(context, md_args, 2);
1100 LLVMSetMetadata(value, ctx->range_md_kind, range_md);
1103 static LLVMValueRef get_thread_id(struct nir_to_llvm_context *ctx)
1106 LLVMValueRef tid_args[2];
1107 tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
1108 tid_args[1] = ctx->i32zero;
1109 tid_args[1] = ac_emit_llvm_intrinsic(&ctx->ac,
1110 "llvm.amdgcn.mbcnt.lo", ctx->i32,
1111 tid_args, 2, AC_FUNC_ATTR_READNONE);
1113 tid = ac_emit_llvm_intrinsic(&ctx->ac,
1114 "llvm.amdgcn.mbcnt.hi", ctx->i32,
1115 tid_args, 2, AC_FUNC_ATTR_READNONE);
1116 set_range_metadata(ctx, tid, 0, 64);
1121 * SI implements derivatives using the local data store (LDS)
1122 * All writes to the LDS happen in all executing threads at
1123 * the same time. TID is the Thread ID for the current
1124 * thread and is a value between 0 and 63, representing
1125 * the thread's position in the wavefront.
1127 * For the pixel shader threads are grouped into quads of four pixels.
1128 * The TIDs of the pixels of a quad are:
1136 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
1137 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
1138 * the current pixel's column, and masking with 0xfffffffe yields the TID
1139 * of the left pixel of the current pixel's row.
1141 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
1142 * adding 2 yields the TID of the pixel below the top pixel.
1144 /* masks for thread ID. */
1145 #define TID_MASK_TOP_LEFT 0xfffffffc
1146 #define TID_MASK_TOP 0xfffffffd
1147 #define TID_MASK_LEFT 0xfffffffe
1148 static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
1152 LLVMValueRef tl, trbl, result;
1153 LLVMValueRef tl_tid, trbl_tid;
1154 LLVMValueRef args[2];
1155 LLVMValueRef thread_id;
1158 ctx->has_ddxy = true;
1160 if (!ctx->lds && !ctx->has_ds_bpermute)
1161 ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
1162 LLVMArrayType(ctx->i32, 64),
1163 "ddxy_lds", LOCAL_ADDR_SPACE);
1165 thread_id = get_thread_id(ctx);
1166 if (op == nir_op_fddx_fine || op == nir_op_fddx)
1167 mask = TID_MASK_LEFT;
1168 else if (op == nir_op_fddy_fine || op == nir_op_fddy)
1169 mask = TID_MASK_TOP;
1171 mask = TID_MASK_TOP_LEFT;
1173 tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
1174 LLVMConstInt(ctx->i32, mask, false), "");
1175 /* for DDX we want to next X pixel, DDY next Y pixel. */
1176 if (op == nir_op_fddx_fine ||
1177 op == nir_op_fddx_coarse ||
1183 trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
1184 LLVMConstInt(ctx->i32, idx, false), "");
1186 if (ctx->has_ds_bpermute) {
1187 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
1188 LLVMConstInt(ctx->i32, 4, false), "");
1190 tl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute",
1192 AC_FUNC_ATTR_READNONE);
1194 args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
1195 LLVMConstInt(ctx->i32, 4, false), "");
1196 trbl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute",
1198 AC_FUNC_ATTR_READNONE);
1200 LLVMValueRef store_ptr, load_ptr0, load_ptr1;
1202 store_ptr = build_gep0(ctx, ctx->lds, thread_id);
1203 load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
1204 load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
1206 LLVMBuildStore(ctx->builder, src0, store_ptr);
1207 tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
1208 trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
1210 tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
1211 trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
1212 result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
1217 * this takes an I,J coordinate pair,
1218 * and works out the X and Y derivatives.
1219 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
1221 static LLVMValueRef emit_ddxy_interp(
1222 struct nir_to_llvm_context *ctx,
1223 LLVMValueRef interp_ij)
1225 LLVMValueRef result[4], a;
1228 for (i = 0; i < 2; i++) {
1229 a = LLVMBuildExtractElement(ctx->builder, interp_ij,
1230 LLVMConstInt(ctx->i32, i, false), "");
1231 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
1232 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
1234 return build_gather_values(ctx, result, 4);
1237 static LLVMValueRef emit_fdiv(struct nir_to_llvm_context *ctx,
1241 LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
1243 if (!LLVMIsConstant(ret))
1244 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
1248 static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
1250 LLVMValueRef src[4], result = NULL;
1251 unsigned num_components = instr->dest.dest.ssa.num_components;
1252 unsigned src_components;
1254 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1255 switch (instr->op) {
1261 case nir_op_pack_half_2x16:
1264 case nir_op_unpack_half_2x16:
1268 src_components = num_components;
1271 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1272 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1274 switch (instr->op) {
1280 src[0] = to_float(ctx, src[0]);
1281 result = LLVMBuildFNeg(ctx->builder, src[0], "");
1284 result = LLVMBuildNeg(ctx->builder, src[0], "");
1287 result = LLVMBuildNot(ctx->builder, src[0], "");
1290 result = LLVMBuildAdd(ctx->builder, src[0], src[1], "");
1293 src[0] = to_float(ctx, src[0]);
1294 src[1] = to_float(ctx, src[1]);
1295 result = LLVMBuildFAdd(ctx->builder, src[0], src[1], "");
1298 src[0] = to_float(ctx, src[0]);
1299 src[1] = to_float(ctx, src[1]);
1300 result = LLVMBuildFSub(ctx->builder, src[0], src[1], "");
1303 result = LLVMBuildSub(ctx->builder, src[0], src[1], "");
1306 result = LLVMBuildMul(ctx->builder, src[0], src[1], "");
1309 result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
1312 result = LLVMBuildURem(ctx->builder, src[0], src[1], "");
1315 src[0] = to_float(ctx, src[0]);
1316 src[1] = to_float(ctx, src[1]);
1317 result = emit_fdiv(ctx, src[0], src[1]);
1318 result = emit_intrin_1f_param(ctx, "llvm.floor.f32", result);
1319 result = LLVMBuildFMul(ctx->builder, src[1] , result, "");
1320 result = LLVMBuildFSub(ctx->builder, src[0], result, "");
1323 src[0] = to_float(ctx, src[0]);
1324 src[1] = to_float(ctx, src[1]);
1325 result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
1328 result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
1331 result = LLVMBuildUDiv(ctx->builder, src[0], src[1], "");
1334 src[0] = to_float(ctx, src[0]);
1335 src[1] = to_float(ctx, src[1]);
1336 result = LLVMBuildFMul(ctx->builder, src[0], src[1], "");
1339 src[0] = to_float(ctx, src[0]);
1340 src[1] = to_float(ctx, src[1]);
1341 result = emit_fdiv(ctx, src[0], src[1]);
1344 src[0] = to_float(ctx, src[0]);
1345 result = emit_fdiv(ctx, ctx->f32one, src[0]);
1348 result = LLVMBuildAnd(ctx->builder, src[0], src[1], "");
1351 result = LLVMBuildOr(ctx->builder, src[0], src[1], "");
1354 result = LLVMBuildXor(ctx->builder, src[0], src[1], "");
1357 result = LLVMBuildShl(ctx->builder, src[0], src[1], "");
1360 result = LLVMBuildAShr(ctx->builder, src[0], src[1], "");
1363 result = LLVMBuildLShr(ctx->builder, src[0], src[1], "");
1366 result = emit_int_cmp(ctx, LLVMIntSLT, src[0], src[1]);
1369 result = emit_int_cmp(ctx, LLVMIntNE, src[0], src[1]);
1372 result = emit_int_cmp(ctx, LLVMIntEQ, src[0], src[1]);
1375 result = emit_int_cmp(ctx, LLVMIntSGE, src[0], src[1]);
1378 result = emit_int_cmp(ctx, LLVMIntULT, src[0], src[1]);
1381 result = emit_int_cmp(ctx, LLVMIntUGE, src[0], src[1]);
1384 result = emit_float_cmp(ctx, LLVMRealUEQ, src[0], src[1]);
1387 result = emit_float_cmp(ctx, LLVMRealUNE, src[0], src[1]);
1390 result = emit_float_cmp(ctx, LLVMRealULT, src[0], src[1]);
1393 result = emit_float_cmp(ctx, LLVMRealUGE, src[0], src[1]);
1396 result = emit_intrin_1f_param(ctx, "llvm.fabs.f32", src[0]);
1399 result = emit_iabs(ctx, src[0]);
1402 result = emit_minmax_int(ctx, LLVMIntSGT, src[0], src[1]);
1405 result = emit_minmax_int(ctx, LLVMIntSLT, src[0], src[1]);
1408 result = emit_minmax_int(ctx, LLVMIntUGT, src[0], src[1]);
1411 result = emit_minmax_int(ctx, LLVMIntULT, src[0], src[1]);
1414 result = emit_isign(ctx, src[0]);
1417 src[0] = to_float(ctx, src[0]);
1418 result = emit_fsign(ctx, src[0]);
1421 result = emit_intrin_1f_param(ctx, "llvm.floor.f32", src[0]);
1424 result = emit_intrin_1f_param(ctx, "llvm.trunc.f32", src[0]);
1427 result = emit_intrin_1f_param(ctx, "llvm.ceil.f32", src[0]);
1429 case nir_op_fround_even:
1430 result = emit_intrin_1f_param(ctx, "llvm.rint.f32", src[0]);
1433 result = emit_ffract(ctx, src[0]);
1436 result = emit_intrin_1f_param(ctx, "llvm.sin.f32", src[0]);
1439 result = emit_intrin_1f_param(ctx, "llvm.cos.f32", src[0]);
1442 result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1445 result = emit_intrin_1f_param(ctx, "llvm.exp2.f32", src[0]);
1448 result = emit_intrin_1f_param(ctx, "llvm.log2.f32", src[0]);
1451 result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1452 result = emit_fdiv(ctx, ctx->f32one, result);
1455 result = emit_intrin_2f_param(ctx, "llvm.pow.f32", src[0], src[1]);
1458 result = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", src[0], src[1]);
1461 result = emit_intrin_2f_param(ctx, "llvm.minnum.f32", src[0], src[1]);
1464 result = emit_intrin_3f_param(ctx, "llvm.fma.f32", src[0], src[1], src[2]);
1466 case nir_op_ibitfield_extract:
1467 result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
1469 case nir_op_ubitfield_extract:
1470 result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
1472 case nir_op_bitfield_insert:
1473 result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
1475 case nir_op_bitfield_reverse:
1476 result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1478 case nir_op_bit_count:
1479 result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1484 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1485 src[i] = to_integer(ctx, src[i]);
1486 result = build_gather_values(ctx, src, num_components);
1489 src[0] = to_float(ctx, src[0]);
1490 result = LLVMBuildFPToSI(ctx->builder, src[0], ctx->i32, "");
1493 src[0] = to_float(ctx, src[0]);
1494 result = LLVMBuildFPToUI(ctx->builder, src[0], ctx->i32, "");
1497 result = LLVMBuildSIToFP(ctx->builder, src[0], ctx->f32, "");
1500 result = LLVMBuildUIToFP(ctx->builder, src[0], ctx->f32, "");
1503 result = emit_bcsel(ctx, src[0], src[1], src[2]);
1505 case nir_op_find_lsb:
1506 result = emit_find_lsb(ctx, src[0]);
1508 case nir_op_ufind_msb:
1509 result = emit_ufind_msb(ctx, src[0]);
1511 case nir_op_ifind_msb:
1512 result = emit_ifind_msb(ctx, src[0]);
1514 case nir_op_uadd_carry:
1515 result = emit_uint_carry(ctx, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1517 case nir_op_usub_borrow:
1518 result = emit_uint_carry(ctx, "llvm.usub.with.overflow.i32", src[0], src[1]);
1521 result = emit_b2f(ctx, src[0]);
1523 case nir_op_fquantize2f16:
1524 src[0] = to_float(ctx, src[0]);
1525 result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
1526 /* need to convert back up to f32 */
1527 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
1529 case nir_op_umul_high:
1530 result = emit_umul_high(ctx, src[0], src[1]);
1532 case nir_op_imul_high:
1533 result = emit_imul_high(ctx, src[0], src[1]);
1535 case nir_op_pack_half_2x16:
1536 result = emit_pack_half_2x16(ctx, src[0]);
1538 case nir_op_unpack_half_2x16:
1539 result = emit_unpack_half_2x16(ctx, src[0]);
1543 case nir_op_fddx_fine:
1544 case nir_op_fddy_fine:
1545 case nir_op_fddx_coarse:
1546 case nir_op_fddy_coarse:
1547 result = emit_ddxy(ctx, instr->op, src[0]);
1550 fprintf(stderr, "Unknown NIR alu instr: ");
1551 nir_print_instr(&instr->instr, stderr);
1552 fprintf(stderr, "\n");
1557 assert(instr->dest.dest.is_ssa);
1558 result = to_integer(ctx, result);
1559 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
1564 static void visit_load_const(struct nir_to_llvm_context *ctx,
1565 nir_load_const_instr *instr)
1567 LLVMValueRef values[4], value = NULL;
1568 LLVMTypeRef element_type =
1569 LLVMIntTypeInContext(ctx->context, instr->def.bit_size);
1571 for (unsigned i = 0; i < instr->def.num_components; ++i) {
1572 switch (instr->def.bit_size) {
1574 values[i] = LLVMConstInt(element_type,
1575 instr->value.u32[i], false);
1578 values[i] = LLVMConstInt(element_type,
1579 instr->value.u64[i], false);
1583 "unsupported nir load_const bit_size: %d\n",
1584 instr->def.bit_size);
1588 if (instr->def.num_components > 1) {
1589 value = LLVMConstVector(values, instr->def.num_components);
1593 _mesa_hash_table_insert(ctx->defs, &instr->def, value);
1596 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
1599 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
1600 return LLVMBuildBitCast(ctx->builder, ptr,
1601 LLVMPointerType(type, addr_space), "");
1605 get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements)
1608 LLVMBuildExtractElement(ctx->builder, descriptor,
1609 LLVMConstInt(ctx->i32, 2, false), "");
1612 if (ctx->options->chip_class >= VI && in_elements) {
1613 /* On VI, the descriptor contains the size in bytes,
1614 * but TXQ must return the size in elements.
1615 * The stride is always non-zero for resources using TXQ.
1617 LLVMValueRef stride =
1618 LLVMBuildExtractElement(ctx->builder, descriptor,
1619 LLVMConstInt(ctx->i32, 1, false), "");
1620 stride = LLVMBuildLShr(ctx->builder, stride,
1621 LLVMConstInt(ctx->i32, 16, false), "");
1622 stride = LLVMBuildAnd(ctx->builder, stride,
1623 LLVMConstInt(ctx->i32, 0x3fff, false), "");
1625 size = LLVMBuildUDiv(ctx->builder, size, stride, "");
1631 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
1634 static void build_int_type_name(
1636 char *buf, unsigned bufsize)
1638 assert(bufsize >= 6);
1640 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
1641 snprintf(buf, bufsize, "v%ui32",
1642 LLVMGetVectorSize(type));
1647 static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
1648 struct ac_tex_info *tinfo,
1649 nir_tex_instr *instr,
1650 const char *intr_name,
1651 unsigned coord_vgpr_index)
1653 LLVMValueRef coord = tinfo->args[0];
1654 LLVMValueRef half_texel[2];
1659 LLVMValueRef txq_args[10];
1660 int txq_arg_count = 0;
1662 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
1663 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, false);
1664 txq_args[txq_arg_count++] = tinfo->args[1];
1665 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */
1666 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */
1667 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
1668 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
1669 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
1670 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
1671 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
1672 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
1673 size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
1674 txq_args, txq_arg_count,
1675 AC_FUNC_ATTR_READNONE);
1677 for (c = 0; c < 2; c++) {
1678 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
1679 LLVMConstInt(ctx->i32, c, false), "");
1680 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1681 half_texel[c] = emit_fdiv(ctx, ctx->f32one, half_texel[c]);
1682 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
1683 LLVMConstReal(ctx->f32, -0.5), "");
1687 for (c = 0; c < 2; c++) {
1689 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
1690 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
1691 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1692 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1693 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1694 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
1697 tinfo->args[0] = coord;
1698 return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1699 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
1703 static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
1704 nir_tex_instr *instr,
1705 struct ac_tex_info *tinfo)
1707 const char *name = "llvm.SI.image.sample";
1708 const char *infix = "";
1709 char intr_name[127];
1711 bool is_shadow = instr->is_shadow;
1712 bool has_offset = tinfo->has_offset;
1713 switch (instr->op) {
1715 case nir_texop_txf_ms:
1716 case nir_texop_samples_identical:
1717 name = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? "llvm.SI.image.load" :
1718 instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? "llvm.SI.vs.load.input" :
1719 "llvm.SI.image.load.mip";
1730 name = "llvm.SI.getresinfo";
1732 case nir_texop_query_levels:
1733 name = "llvm.SI.getresinfo";
1736 if (ctx->stage != MESA_SHADER_FRAGMENT)
1743 name = "llvm.SI.gather4";
1747 name = "llvm.SI.getlod";
1755 build_int_type_name(LLVMTypeOf(tinfo->args[0]), type, sizeof(type));
1756 sprintf(intr_name, "%s%s%s%s.%s", name, is_shadow ? ".c" : "", infix,
1757 has_offset ? ".o" : "", type);
1759 if (instr->op == nir_texop_tg4) {
1760 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
1761 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
1762 return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name,
1763 (int)has_offset + (int)is_shadow);
1766 return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1767 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
1771 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
1772 nir_intrinsic_instr *instr)
1774 LLVMValueRef index = get_src(ctx, instr->src[0]);
1775 unsigned desc_set = nir_intrinsic_desc_set(instr);
1776 unsigned binding = nir_intrinsic_binding(instr);
1777 LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
1778 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
1779 unsigned base_offset = layout->binding[binding].offset;
1780 LLVMValueRef offset, stride;
1782 if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
1783 layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
1784 desc_ptr = ctx->push_constants;
1785 base_offset = ctx->options->layout->push_constant_size;
1786 base_offset += 16 * layout->binding[binding].dynamic_offset_offset;
1787 stride = LLVMConstInt(ctx->i32, 16, false);
1789 stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false);
1791 offset = LLVMConstInt(ctx->i32, base_offset, false);
1792 index = LLVMBuildMul(ctx->builder, index, stride, "");
1793 offset = LLVMBuildAdd(ctx->builder, offset, index, "");
1795 desc_ptr = build_gep0(ctx, desc_ptr, offset);
1796 desc_ptr = cast_ptr(ctx, desc_ptr, ctx->v4i32);
1797 LLVMSetMetadata(desc_ptr, ctx->uniform_md_kind, ctx->empty_md);
1799 return LLVMBuildLoad(ctx->builder, desc_ptr, "");
1802 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
1803 nir_intrinsic_instr *instr)
1807 ptr = build_gep0(ctx, ctx->push_constants, get_src(ctx, instr->src[0]));
1808 ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa));
1810 return LLVMBuildLoad(ctx->builder, ptr, "");
1813 static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx,
1814 nir_intrinsic_instr *instr)
1816 LLVMValueRef desc = get_src(ctx, instr->src[0]);
1818 return get_buffer_size(ctx, desc, false);
1820 static void visit_store_ssbo(struct nir_to_llvm_context *ctx,
1821 nir_intrinsic_instr *instr)
1823 const char *store_name;
1824 LLVMTypeRef data_type = ctx->f32;
1825 unsigned writemask = nir_intrinsic_write_mask(instr);
1826 LLVMValueRef base_data, base_offset;
1827 LLVMValueRef params[6];
1829 if (ctx->stage == MESA_SHADER_FRAGMENT)
1830 ctx->shader_info->fs.writes_memory = true;
1832 params[1] = get_src(ctx, instr->src[1]);
1833 params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
1834 params[4] = LLVMConstInt(ctx->i1, 0, false); /* glc */
1835 params[5] = LLVMConstInt(ctx->i1, 0, false); /* slc */
1837 if (instr->num_components > 1)
1838 data_type = LLVMVectorType(ctx->f32, instr->num_components);
1840 base_data = to_float(ctx, get_src(ctx, instr->src[0]));
1841 base_data = trim_vector(ctx, base_data, instr->num_components);
1842 base_data = LLVMBuildBitCast(ctx->builder, base_data,
1844 base_offset = get_src(ctx, instr->src[2]); /* voffset */
1848 LLVMValueRef offset;
1850 u_bit_scan_consecutive_range(&writemask, &start, &count);
1852 /* Due to an LLVM limitation, split 3-element writes
1853 * into a 2-element and a 1-element write. */
1855 writemask |= 1 << (start + 2);
1860 store_name = "llvm.amdgcn.buffer.store.v4f32";
1862 } else if (count == 2) {
1863 tmp = LLVMBuildExtractElement(ctx->builder,
1864 base_data, LLVMConstInt(ctx->i32, start, false), "");
1865 data = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), tmp,
1868 tmp = LLVMBuildExtractElement(ctx->builder,
1869 base_data, LLVMConstInt(ctx->i32, start + 1, false), "");
1870 data = LLVMBuildInsertElement(ctx->builder, data, tmp,
1872 store_name = "llvm.amdgcn.buffer.store.v2f32";
1876 if (get_llvm_num_components(base_data) > 1)
1877 data = LLVMBuildExtractElement(ctx->builder, base_data,
1878 LLVMConstInt(ctx->i32, start, false), "");
1881 store_name = "llvm.amdgcn.buffer.store.f32";
1884 offset = base_offset;
1886 offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, start * 4, false), "");
1890 ac_emit_llvm_intrinsic(&ctx->ac, store_name,
1891 LLVMVoidTypeInContext(ctx->context), params, 6, 0);
1895 static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx,
1896 nir_intrinsic_instr *instr)
1899 LLVMValueRef params[6];
1901 if (ctx->stage == MESA_SHADER_FRAGMENT)
1902 ctx->shader_info->fs.writes_memory = true;
1904 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
1905 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[3]), 0);
1907 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[2]), 0);
1908 params[arg_count++] = get_src(ctx, instr->src[0]);
1909 params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
1910 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
1911 params[arg_count++] = LLVMConstInt(ctx->i1, 0, false); /* slc */
1913 switch (instr->intrinsic) {
1914 case nir_intrinsic_ssbo_atomic_add:
1915 name = "llvm.amdgcn.buffer.atomic.add";
1917 case nir_intrinsic_ssbo_atomic_imin:
1918 name = "llvm.amdgcn.buffer.atomic.smin";
1920 case nir_intrinsic_ssbo_atomic_umin:
1921 name = "llvm.amdgcn.buffer.atomic.umin";
1923 case nir_intrinsic_ssbo_atomic_imax:
1924 name = "llvm.amdgcn.buffer.atomic.smax";
1926 case nir_intrinsic_ssbo_atomic_umax:
1927 name = "llvm.amdgcn.buffer.atomic.umax";
1929 case nir_intrinsic_ssbo_atomic_and:
1930 name = "llvm.amdgcn.buffer.atomic.and";
1932 case nir_intrinsic_ssbo_atomic_or:
1933 name = "llvm.amdgcn.buffer.atomic.or";
1935 case nir_intrinsic_ssbo_atomic_xor:
1936 name = "llvm.amdgcn.buffer.atomic.xor";
1938 case nir_intrinsic_ssbo_atomic_exchange:
1939 name = "llvm.amdgcn.buffer.atomic.swap";
1941 case nir_intrinsic_ssbo_atomic_comp_swap:
1942 name = "llvm.amdgcn.buffer.atomic.cmpswap";
1948 return ac_emit_llvm_intrinsic(&ctx->ac, name, ctx->i32, params, arg_count, 0);
1951 static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx,
1952 nir_intrinsic_instr *instr)
1954 const char *load_name;
1955 LLVMTypeRef data_type = ctx->f32;
1956 if (instr->num_components == 3)
1957 data_type = LLVMVectorType(ctx->f32, 4);
1958 else if (instr->num_components > 1)
1959 data_type = LLVMVectorType(ctx->f32, instr->num_components);
1961 if (instr->num_components == 4 || instr->num_components == 3)
1962 load_name = "llvm.amdgcn.buffer.load.v4f32";
1963 else if (instr->num_components == 2)
1964 load_name = "llvm.amdgcn.buffer.load.v2f32";
1965 else if (instr->num_components == 1)
1966 load_name = "llvm.amdgcn.buffer.load.f32";
1970 LLVMValueRef params[] = {
1971 get_src(ctx, instr->src[0]),
1972 LLVMConstInt(ctx->i32, 0, false),
1973 get_src(ctx, instr->src[1]),
1974 LLVMConstInt(ctx->i1, 0, false),
1975 LLVMConstInt(ctx->i1, 0, false),
1979 ac_emit_llvm_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
1981 if (instr->num_components == 3)
1982 ret = trim_vector(ctx, ret, 3);
1984 return LLVMBuildBitCast(ctx->builder, ret,
1985 get_def_type(ctx, &instr->dest.ssa), "");
1988 static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
1989 nir_intrinsic_instr *instr)
1991 LLVMValueRef results[4], ret;
1992 LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
1993 LLVMValueRef offset = get_src(ctx, instr->src[1]);
1995 rsrc = LLVMBuildBitCast(ctx->builder, rsrc, LLVMVectorType(ctx->i8, 16), "");
1997 for (unsigned i = 0; i < instr->num_components; ++i) {
1998 LLVMValueRef params[] = {
2000 LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
2003 results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32,
2004 params, 2, AC_FUNC_ATTR_READNONE);
2008 ret = build_gather_values(ctx, results, instr->num_components);
2009 return LLVMBuildBitCast(ctx->builder, ret,
2010 get_def_type(ctx, &instr->dest.ssa), "");
2014 radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
2015 bool vs_in, unsigned *const_out, LLVMValueRef *indir_out)
2017 unsigned const_offset = 0;
2018 LLVMValueRef offset = NULL;
2021 while (tail->child != NULL) {
2022 const struct glsl_type *parent_type = tail->type;
2025 if (tail->deref_type == nir_deref_type_array) {
2026 nir_deref_array *deref_array = nir_deref_as_array(tail);
2027 LLVMValueRef index, stride, local_offset;
2028 unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
2030 const_offset += size * deref_array->base_offset;
2031 if (deref_array->deref_array_type == nir_deref_array_type_direct)
2034 assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
2035 index = get_src(ctx, deref_array->indirect);
2036 stride = LLVMConstInt(ctx->i32, size, 0);
2037 local_offset = LLVMBuildMul(ctx->builder, stride, index, "");
2040 offset = LLVMBuildAdd(ctx->builder, offset, local_offset, "");
2042 offset = local_offset;
2043 } else if (tail->deref_type == nir_deref_type_struct) {
2044 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
2046 for (unsigned i = 0; i < deref_struct->index; i++) {
2047 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
2048 const_offset += glsl_count_attribute_slots(ft, vs_in);
2051 unreachable("unsupported deref type");
2055 if (const_offset && offset)
2056 offset = LLVMBuildAdd(ctx->builder, offset,
2057 LLVMConstInt(ctx->i32, const_offset, 0),
2060 *const_out = const_offset;
2061 *indir_out = offset;
2064 static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
2065 nir_intrinsic_instr *instr)
2067 LLVMValueRef values[4];
2068 int idx = instr->variables[0]->var->data.driver_location;
2069 int ve = instr->dest.ssa.num_components;
2070 LLVMValueRef indir_index;
2071 unsigned const_index;
2072 switch (instr->variables[0]->var->data.mode) {
2073 case nir_var_shader_in:
2074 radv_get_deref_offset(ctx, &instr->variables[0]->deref,
2075 ctx->stage == MESA_SHADER_VERTEX,
2076 &const_index, &indir_index);
2077 for (unsigned chan = 0; chan < ve; chan++) {
2079 unsigned count = glsl_count_attribute_slots(
2080 instr->variables[0]->var->type,
2081 ctx->stage == MESA_SHADER_VERTEX);
2082 LLVMValueRef tmp_vec = build_gather_values_extended(
2083 ctx, ctx->inputs + idx + chan, count,
2086 values[chan] = LLVMBuildExtractElement(ctx->builder,
2090 values[chan] = ctx->inputs[idx + chan + const_index * 4];
2092 return to_integer(ctx, build_gather_values(ctx, values, ve));
2095 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2096 &const_index, &indir_index);
2097 for (unsigned chan = 0; chan < ve; chan++) {
2099 unsigned count = glsl_count_attribute_slots(
2100 instr->variables[0]->var->type, false);
2101 LLVMValueRef tmp_vec = build_gather_values_extended(
2102 ctx, ctx->locals + idx + chan, count,
2105 values[chan] = LLVMBuildExtractElement(ctx->builder,
2109 values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
2112 return to_integer(ctx, build_gather_values(ctx, values, ve));
2113 case nir_var_shader_out:
2114 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2115 &const_index, &indir_index);
2116 for (unsigned chan = 0; chan < ve; chan++) {
2118 unsigned count = glsl_count_attribute_slots(
2119 instr->variables[0]->var->type, false);
2120 LLVMValueRef tmp_vec = build_gather_values_extended(
2121 ctx, ctx->outputs + idx + chan, count,
2124 values[chan] = LLVMBuildExtractElement(ctx->builder,
2128 values[chan] = LLVMBuildLoad(ctx->builder,
2129 ctx->outputs[idx + chan + const_index * 4],
2133 return to_integer(ctx, build_gather_values(ctx, values, ve));
2134 case nir_var_shared: {
2135 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2136 &const_index, &indir_index);
2137 LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2138 LLVMValueRef derived_ptr;
2140 for (unsigned chan = 0; chan < ve; chan++) {
2141 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
2143 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2144 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2145 values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, "");
2147 return to_integer(ctx, build_gather_values(ctx, values, ve));
2156 visit_store_var(struct nir_to_llvm_context *ctx,
2157 nir_intrinsic_instr *instr)
2159 LLVMValueRef temp_ptr, value;
2160 int idx = instr->variables[0]->var->data.driver_location;
2161 LLVMValueRef src = to_float(ctx, get_src(ctx, instr->src[0]));
2162 int writemask = instr->const_index[0];
2163 LLVMValueRef indir_index;
2164 unsigned const_index;
2165 switch (instr->variables[0]->var->data.mode) {
2166 case nir_var_shader_out:
2167 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2168 &const_index, &indir_index);
2169 for (unsigned chan = 0; chan < 4; chan++) {
2171 if (!(writemask & (1 << chan)))
2173 if (get_llvm_num_components(src) == 1)
2176 value = LLVMBuildExtractElement(ctx->builder, src,
2177 LLVMConstInt(ctx->i32,
2181 if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 ||
2182 instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0)
2185 unsigned count = glsl_count_attribute_slots(
2186 instr->variables[0]->var->type, false);
2187 LLVMValueRef tmp_vec = build_gather_values_extended(
2188 ctx, ctx->outputs + idx + chan, count,
2191 if (get_llvm_num_components(tmp_vec) > 1) {
2192 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2193 value, indir_index, "");
2196 build_store_values_extended(ctx, ctx->outputs + idx + chan,
2197 count, stride, tmp_vec);
2200 temp_ptr = ctx->outputs[idx + chan + const_index * stride];
2202 LLVMBuildStore(ctx->builder, value, temp_ptr);
2207 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2208 &const_index, &indir_index);
2209 for (unsigned chan = 0; chan < 4; chan++) {
2210 if (!(writemask & (1 << chan)))
2213 if (get_llvm_num_components(src) == 1)
2216 value = LLVMBuildExtractElement(ctx->builder, src,
2217 LLVMConstInt(ctx->i32, chan, false), "");
2219 unsigned count = glsl_count_attribute_slots(
2220 instr->variables[0]->var->type, false);
2221 LLVMValueRef tmp_vec = build_gather_values_extended(
2222 ctx, ctx->locals + idx + chan, count,
2225 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2226 value, indir_index, "");
2227 build_store_values_extended(ctx, ctx->locals + idx + chan,
2230 temp_ptr = ctx->locals[idx + chan + const_index * 4];
2232 LLVMBuildStore(ctx->builder, value, temp_ptr);
2236 case nir_var_shared: {
2238 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2239 &const_index, &indir_index);
2241 ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2242 LLVMValueRef derived_ptr;
2244 for (unsigned chan = 0; chan < 4; chan++) {
2245 if (!(writemask & (1 << chan)))
2248 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
2250 if (get_llvm_num_components(src) == 1)
2253 value = LLVMBuildExtractElement(ctx->builder, src,
2254 LLVMConstInt(ctx->i32,
2259 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2261 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2262 LLVMBuildStore(ctx->builder,
2263 to_integer(ctx, value), derived_ptr);
2272 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
2275 case GLSL_SAMPLER_DIM_BUF:
2277 case GLSL_SAMPLER_DIM_1D:
2278 return array ? 2 : 1;
2279 case GLSL_SAMPLER_DIM_2D:
2280 return array ? 3 : 2;
2281 case GLSL_SAMPLER_DIM_MS:
2282 return array ? 4 : 3;
2283 case GLSL_SAMPLER_DIM_3D:
2284 case GLSL_SAMPLER_DIM_CUBE:
2286 case GLSL_SAMPLER_DIM_RECT:
2287 case GLSL_SAMPLER_DIM_SUBPASS:
2289 case GLSL_SAMPLER_DIM_SUBPASS_MS:
2297 static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
2298 nir_intrinsic_instr *instr, bool add_frag_pos)
2300 const struct glsl_type *type = instr->variables[0]->var->type;
2301 if(instr->variables[0]->deref.child)
2302 type = instr->variables[0]->deref.child->type;
2304 LLVMValueRef src0 = get_src(ctx, instr->src[0]);
2305 LLVMValueRef coords[4];
2306 LLVMValueRef masks[] = {
2307 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
2308 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
2312 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
2313 bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
2314 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2316 count = image_type_to_components_count(dim,
2317 glsl_sampler_type_is_array(type));
2320 if (instr->src[0].ssa->num_components)
2321 res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
2328 for (chan = 0; chan < count; ++chan) {
2329 coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
2333 for (chan = 0; chan < count; ++chan)
2334 coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
2337 coords[count] = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0);
2342 coords[3] = LLVMGetUndef(ctx->i32);
2345 res = build_gather_values(ctx, coords, count);
2350 static void build_type_name_for_intr(
2352 char *buf, unsigned bufsize)
2354 LLVMTypeRef elem_type = type;
2356 assert(bufsize >= 8);
2358 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
2359 int ret = snprintf(buf, bufsize, "v%u",
2360 LLVMGetVectorSize(type));
2362 char *type_name = LLVMPrintTypeToString(type);
2363 fprintf(stderr, "Error building type name for: %s\n",
2367 elem_type = LLVMGetElementType(type);
2371 switch (LLVMGetTypeKind(elem_type)) {
2373 case LLVMIntegerTypeKind:
2374 snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
2376 case LLVMFloatTypeKind:
2377 snprintf(buf, bufsize, "f32");
2379 case LLVMDoubleTypeKind:
2380 snprintf(buf, bufsize, "f64");
2385 static void get_image_intr_name(const char *base_name,
2386 LLVMTypeRef data_type,
2387 LLVMTypeRef coords_type,
2388 LLVMTypeRef rsrc_type,
2389 char *out_name, unsigned out_len)
2391 char coords_type_name[8];
2393 build_type_name_for_intr(coords_type, coords_type_name,
2394 sizeof(coords_type_name));
2396 if (HAVE_LLVM <= 0x0309) {
2397 snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name);
2399 char data_type_name[8];
2400 char rsrc_type_name[8];
2402 build_type_name_for_intr(data_type, data_type_name,
2403 sizeof(data_type_name));
2404 build_type_name_for_intr(rsrc_type, rsrc_type_name,
2405 sizeof(rsrc_type_name));
2406 snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
2407 data_type_name, coords_type_name, rsrc_type_name);
2411 static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
2412 nir_intrinsic_instr *instr)
2414 LLVMValueRef params[7];
2416 char intrinsic_name[64];
2417 const nir_variable *var = instr->variables[0]->var;
2418 const struct glsl_type *type = var->type;
2419 if(instr->variables[0]->deref.child)
2420 type = instr->variables[0]->deref.child->type;
2422 type = glsl_without_array(type);
2423 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2424 params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2425 params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2426 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2427 params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2428 params[3] = LLVMConstInt(ctx->i1, 0, false); /* glc */
2429 params[4] = LLVMConstInt(ctx->i1, 0, false); /* slc */
2430 res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32,
2433 res = trim_vector(ctx, res, instr->dest.ssa.num_components);
2434 res = to_integer(ctx, res);
2436 bool is_da = glsl_sampler_type_is_array(type) ||
2437 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2438 bool add_frag_pos = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS;
2439 LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero;
2440 LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false);
2441 LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false);
2443 params[0] = get_image_coords(ctx, instr, add_frag_pos);
2444 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2445 params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2446 if (HAVE_LLVM <= 0x0309) {
2447 params[3] = LLVMConstInt(ctx->i1, 0, false); /* r128 */
2452 LLVMValueRef lwe = LLVMConstInt(ctx->i1, 0, false);
2459 get_image_intr_name("llvm.amdgcn.image.load",
2460 ctx->v4f32, /* vdata */
2461 LLVMTypeOf(params[0]), /* coords */
2462 LLVMTypeOf(params[1]), /* rsrc */
2463 intrinsic_name, sizeof(intrinsic_name));
2465 res = ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32,
2466 params, 7, AC_FUNC_ATTR_READONLY);
2468 return to_integer(ctx, res);
2471 static void visit_image_store(struct nir_to_llvm_context *ctx,
2472 nir_intrinsic_instr *instr)
2474 LLVMValueRef params[8];
2475 char intrinsic_name[64];
2476 const nir_variable *var = instr->variables[0]->var;
2477 LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2478 LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2479 const struct glsl_type *type = glsl_without_array(var->type);
2481 if (ctx->stage == MESA_SHADER_FRAGMENT)
2482 ctx->shader_info->fs.writes_memory = true;
2484 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2485 params[0] = to_float(ctx, get_src(ctx, instr->src[2])); /* data */
2486 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2487 params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2488 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2489 params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2490 params[4] = i1false; /* glc */
2491 params[5] = i1false; /* slc */
2492 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
2495 bool is_da = glsl_sampler_type_is_array(type) ||
2496 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2497 LLVMValueRef da = is_da ? i1true : i1false;
2498 LLVMValueRef glc = i1false;
2499 LLVMValueRef slc = i1false;
2501 params[0] = to_float(ctx, get_src(ctx, instr->src[2]));
2502 params[1] = get_image_coords(ctx, instr, false); /* coords */
2503 params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2504 params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2505 if (HAVE_LLVM <= 0x0309) {
2506 params[4] = i1false; /* r128 */
2511 LLVMValueRef lwe = i1false;
2518 get_image_intr_name("llvm.amdgcn.image.store",
2519 LLVMTypeOf(params[0]), /* vdata */
2520 LLVMTypeOf(params[1]), /* coords */
2521 LLVMTypeOf(params[2]), /* rsrc */
2522 intrinsic_name, sizeof(intrinsic_name));
2524 ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt,
2530 static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
2531 nir_intrinsic_instr *instr)
2533 LLVMValueRef params[6];
2534 int param_count = 0;
2535 const nir_variable *var = instr->variables[0]->var;
2536 LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2537 LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2538 const char *base_name = "llvm.amdgcn.image.atomic";
2539 const char *atomic_name;
2540 LLVMValueRef coords;
2541 char intrinsic_name[32], coords_type[8];
2542 const struct glsl_type *type = glsl_without_array(var->type);
2544 if (ctx->stage == MESA_SHADER_FRAGMENT)
2545 ctx->shader_info->fs.writes_memory = true;
2547 params[param_count++] = get_src(ctx, instr->src[2]);
2548 if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
2549 params[param_count++] = get_src(ctx, instr->src[3]);
2551 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2552 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2553 coords = params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2554 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2555 params[param_count++] = ctx->i32zero; /* voffset */
2556 params[param_count++] = i1false; /* glc */
2557 params[param_count++] = i1false; /* slc */
2559 bool da = glsl_sampler_type_is_array(type) ||
2560 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2562 coords = params[param_count++] = get_image_coords(ctx, instr, false);
2563 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2564 params[param_count++] = i1false; /* r128 */
2565 params[param_count++] = da ? i1true : i1false; /* da */
2566 params[param_count++] = i1false; /* slc */
2569 switch (instr->intrinsic) {
2570 case nir_intrinsic_image_atomic_add:
2571 atomic_name = "add";
2573 case nir_intrinsic_image_atomic_min:
2574 atomic_name = "smin";
2576 case nir_intrinsic_image_atomic_max:
2577 atomic_name = "smax";
2579 case nir_intrinsic_image_atomic_and:
2580 atomic_name = "and";
2582 case nir_intrinsic_image_atomic_or:
2585 case nir_intrinsic_image_atomic_xor:
2586 atomic_name = "xor";
2588 case nir_intrinsic_image_atomic_exchange:
2589 atomic_name = "swap";
2591 case nir_intrinsic_image_atomic_comp_swap:
2592 atomic_name = "cmpswap";
2597 build_int_type_name(LLVMTypeOf(coords),
2598 coords_type, sizeof(coords_type));
2600 snprintf(intrinsic_name, sizeof(intrinsic_name),
2601 "%s.%s.%s", base_name, atomic_name, coords_type);
2602 return ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0);
2605 static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
2606 nir_intrinsic_instr *instr)
2609 LLVMValueRef params[10];
2610 const nir_variable *var = instr->variables[0]->var;
2611 const struct glsl_type *type = instr->variables[0]->var->type;
2612 bool da = glsl_sampler_type_is_array(var->type) ||
2613 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE;
2614 if(instr->variables[0]->deref.child)
2615 type = instr->variables[0]->deref.child->type;
2617 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
2618 return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true);
2619 params[0] = ctx->i32zero;
2620 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2621 params[2] = LLVMConstInt(ctx->i32, 15, false);
2622 params[3] = ctx->i32zero;
2623 params[4] = ctx->i32zero;
2624 params[5] = da ? ctx->i32one : ctx->i32zero;
2625 params[6] = ctx->i32zero;
2626 params[7] = ctx->i32zero;
2627 params[8] = ctx->i32zero;
2628 params[9] = ctx->i32zero;
2630 res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
2631 params, 10, AC_FUNC_ATTR_READNONE);
2633 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
2634 glsl_sampler_type_is_array(type)) {
2635 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
2636 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
2637 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
2638 z = LLVMBuildSDiv(ctx->builder, z, six, "");
2639 res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
2644 static void emit_waitcnt(struct nir_to_llvm_context *ctx)
2646 LLVMValueRef args[1] = {
2647 LLVMConstInt(ctx->i32, 0xf70, false),
2649 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt",
2650 ctx->voidt, args, 1, 0);
2653 static void emit_barrier(struct nir_to_llvm_context *ctx)
2656 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
2657 ctx->voidt, NULL, 0, 0);
2660 static void emit_discard_if(struct nir_to_llvm_context *ctx,
2661 nir_intrinsic_instr *instr)
2664 ctx->shader_info->fs.can_discard = true;
2666 cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
2667 get_src(ctx, instr->src[0]),
2670 cond = LLVMBuildSelect(ctx->builder, cond,
2671 LLVMConstReal(ctx->f32, -1.0f),
2673 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
2674 LLVMVoidTypeInContext(ctx->context),
2679 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
2681 LLVMValueRef result;
2682 LLVMValueRef thread_id = get_thread_id(ctx);
2683 result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
2684 LLVMConstInt(ctx->i32, 0xfc0, false), "");
2686 return LLVMBuildAdd(ctx->builder, result, thread_id, "");
2689 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
2690 nir_intrinsic_instr *instr)
2692 LLVMValueRef ptr, result;
2693 int idx = instr->variables[0]->var->data.driver_location;
2694 LLVMValueRef src = get_src(ctx, instr->src[0]);
2695 ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2697 if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
2698 LLVMValueRef src1 = get_src(ctx, instr->src[1]);
2699 result = LLVMBuildAtomicCmpXchg(ctx->builder,
2701 LLVMAtomicOrderingSequentiallyConsistent,
2702 LLVMAtomicOrderingSequentiallyConsistent,
2705 LLVMAtomicRMWBinOp op;
2706 switch (instr->intrinsic) {
2707 case nir_intrinsic_var_atomic_add:
2708 op = LLVMAtomicRMWBinOpAdd;
2710 case nir_intrinsic_var_atomic_umin:
2711 op = LLVMAtomicRMWBinOpUMin;
2713 case nir_intrinsic_var_atomic_umax:
2714 op = LLVMAtomicRMWBinOpUMax;
2716 case nir_intrinsic_var_atomic_imin:
2717 op = LLVMAtomicRMWBinOpMin;
2719 case nir_intrinsic_var_atomic_imax:
2720 op = LLVMAtomicRMWBinOpMax;
2722 case nir_intrinsic_var_atomic_and:
2723 op = LLVMAtomicRMWBinOpAnd;
2725 case nir_intrinsic_var_atomic_or:
2726 op = LLVMAtomicRMWBinOpOr;
2728 case nir_intrinsic_var_atomic_xor:
2729 op = LLVMAtomicRMWBinOpXor;
2731 case nir_intrinsic_var_atomic_exchange:
2732 op = LLVMAtomicRMWBinOpXchg;
2738 result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(ctx, src),
2739 LLVMAtomicOrderingSequentiallyConsistent,
2745 #define INTERP_CENTER 0
2746 #define INTERP_CENTROID 1
2747 #define INTERP_SAMPLE 2
2749 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
2750 enum glsl_interp_mode interp, unsigned location)
2753 case INTERP_MODE_FLAT:
2756 case INTERP_MODE_SMOOTH:
2757 case INTERP_MODE_NONE:
2758 if (location == INTERP_CENTER)
2759 return ctx->persp_center;
2760 else if (location == INTERP_CENTROID)
2761 return ctx->persp_centroid;
2762 else if (location == INTERP_SAMPLE)
2763 return ctx->persp_sample;
2765 case INTERP_MODE_NOPERSPECTIVE:
2766 if (location == INTERP_CENTER)
2767 return ctx->linear_center;
2768 else if (location == INTERP_CENTROID)
2769 return ctx->linear_centroid;
2770 else if (location == INTERP_SAMPLE)
2771 return ctx->linear_sample;
2777 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
2778 LLVMValueRef sample_id)
2780 /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
2781 LLVMValueRef offset0 = LLVMBuildMul(ctx->builder, sample_id, LLVMConstInt(ctx->i32, 8, false), "");
2782 LLVMValueRef offset1 = LLVMBuildAdd(ctx->builder, offset0, LLVMConstInt(ctx->i32, 4, false), "");
2783 LLVMValueRef result[2];
2785 result[0] = build_indexed_load_const(ctx, ctx->sample_positions, offset0);
2786 result[1] = build_indexed_load_const(ctx, ctx->sample_positions, offset1);
2788 return build_gather_values(ctx, result, 2);
2791 static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx)
2793 LLVMValueRef values[2];
2795 values[0] = emit_ffract(ctx, ctx->frag_pos[0]);
2796 values[1] = emit_ffract(ctx, ctx->frag_pos[1]);
2797 return build_gather_values(ctx, values, 2);
2800 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
2801 nir_intrinsic_instr *instr)
2803 LLVMValueRef result[2];
2804 LLVMValueRef interp_param, attr_number;
2807 LLVMValueRef src_c0, src_c1;
2808 const char *intr_name;
2810 int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
2811 switch (instr->intrinsic) {
2812 case nir_intrinsic_interp_var_at_centroid:
2813 location = INTERP_CENTROID;
2815 case nir_intrinsic_interp_var_at_sample:
2816 case nir_intrinsic_interp_var_at_offset:
2817 location = INTERP_SAMPLE;
2818 src0 = get_src(ctx, instr->src[0]);
2824 if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
2825 src_c0 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""));
2826 src_c1 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""));
2827 } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
2828 LLVMValueRef sample_position;
2829 LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
2831 /* fetch sample ID */
2832 sample_position = load_sample_position(ctx, src0);
2834 src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, "");
2835 src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
2836 src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, "");
2837 src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
2839 interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
2840 attr_number = LLVMConstInt(ctx->i32, input_index, false);
2842 if (location == INTERP_SAMPLE) {
2843 LLVMValueRef ij_out[2];
2844 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
2847 * take the I then J parameters, and the DDX/Y for it, and
2848 * calculate the IJ inputs for the interpolator.
2849 * temp1 = ddx * offset/sample.x + I;
2850 * interp_param.I = ddy * offset/sample.y + temp1;
2851 * temp1 = ddx * offset/sample.x + J;
2852 * interp_param.J = ddy * offset/sample.y + temp1;
2854 for (unsigned i = 0; i < 2; i++) {
2855 LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, false);
2856 LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, false);
2857 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
2858 ddxy_out, ix_ll, "");
2859 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
2860 ddxy_out, iy_ll, "");
2861 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
2862 interp_param, ix_ll, "");
2863 LLVMValueRef temp1, temp2;
2865 interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
2868 temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
2869 temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
2871 temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
2872 temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
2874 ij_out[i] = LLVMBuildBitCast(ctx->builder,
2875 temp2, ctx->i32, "");
2877 interp_param = build_gather_values(ctx, ij_out, 2);
2880 intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
2881 for (chan = 0; chan < 2; chan++) {
2882 LLVMValueRef args[4];
2883 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
2885 args[0] = llvm_chan;
2886 args[1] = attr_number;
2887 args[2] = ctx->prim_mask;
2888 args[3] = interp_param;
2889 result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
2890 ctx->f32, args, args[3] ? 4 : 3,
2891 AC_FUNC_ATTR_READNONE);
2893 return build_gather_values(ctx, result, 2);
2896 static void visit_intrinsic(struct nir_to_llvm_context *ctx,
2897 nir_intrinsic_instr *instr)
2899 LLVMValueRef result = NULL;
2901 switch (instr->intrinsic) {
2902 case nir_intrinsic_load_work_group_id: {
2903 result = ctx->workgroup_ids;
2906 case nir_intrinsic_load_base_vertex: {
2907 result = ctx->base_vertex;
2910 case nir_intrinsic_load_vertex_id_zero_base: {
2911 result = ctx->vertex_id;
2914 case nir_intrinsic_load_local_invocation_id: {
2915 result = ctx->local_invocation_ids;
2918 case nir_intrinsic_load_base_instance:
2919 result = ctx->start_instance;
2921 case nir_intrinsic_load_sample_id:
2922 ctx->shader_info->fs.force_persample = true;
2923 result = unpack_param(ctx, ctx->ancillary, 8, 4);
2925 case nir_intrinsic_load_sample_pos:
2926 ctx->shader_info->fs.force_persample = true;
2927 result = load_sample_pos(ctx);
2929 case nir_intrinsic_load_front_face:
2930 result = ctx->front_face;
2932 case nir_intrinsic_load_instance_id:
2933 result = ctx->instance_id;
2934 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
2935 ctx->shader_info->vs.vgpr_comp_cnt);
2937 case nir_intrinsic_load_num_work_groups:
2938 result = ctx->num_work_groups;
2940 case nir_intrinsic_load_local_invocation_index:
2941 result = visit_load_local_invocation_index(ctx);
2943 case nir_intrinsic_load_push_constant:
2944 result = visit_load_push_constant(ctx, instr);
2946 case nir_intrinsic_vulkan_resource_index:
2947 result = visit_vulkan_resource_index(ctx, instr);
2949 case nir_intrinsic_store_ssbo:
2950 visit_store_ssbo(ctx, instr);
2952 case nir_intrinsic_load_ssbo:
2953 result = visit_load_buffer(ctx, instr);
2955 case nir_intrinsic_ssbo_atomic_add:
2956 case nir_intrinsic_ssbo_atomic_imin:
2957 case nir_intrinsic_ssbo_atomic_umin:
2958 case nir_intrinsic_ssbo_atomic_imax:
2959 case nir_intrinsic_ssbo_atomic_umax:
2960 case nir_intrinsic_ssbo_atomic_and:
2961 case nir_intrinsic_ssbo_atomic_or:
2962 case nir_intrinsic_ssbo_atomic_xor:
2963 case nir_intrinsic_ssbo_atomic_exchange:
2964 case nir_intrinsic_ssbo_atomic_comp_swap:
2965 result = visit_atomic_ssbo(ctx, instr);
2967 case nir_intrinsic_load_ubo:
2968 result = visit_load_ubo_buffer(ctx, instr);
2970 case nir_intrinsic_get_buffer_size:
2971 result = visit_get_buffer_size(ctx, instr);
2973 case nir_intrinsic_load_var:
2974 result = visit_load_var(ctx, instr);
2976 case nir_intrinsic_store_var:
2977 visit_store_var(ctx, instr);
2979 case nir_intrinsic_image_load:
2980 result = visit_image_load(ctx, instr);
2982 case nir_intrinsic_image_store:
2983 visit_image_store(ctx, instr);
2985 case nir_intrinsic_image_atomic_add:
2986 case nir_intrinsic_image_atomic_min:
2987 case nir_intrinsic_image_atomic_max:
2988 case nir_intrinsic_image_atomic_and:
2989 case nir_intrinsic_image_atomic_or:
2990 case nir_intrinsic_image_atomic_xor:
2991 case nir_intrinsic_image_atomic_exchange:
2992 case nir_intrinsic_image_atomic_comp_swap:
2993 result = visit_image_atomic(ctx, instr);
2995 case nir_intrinsic_image_size:
2996 result = visit_image_size(ctx, instr);
2998 case nir_intrinsic_discard:
2999 ctx->shader_info->fs.can_discard = true;
3000 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
3001 LLVMVoidTypeInContext(ctx->context),
3004 case nir_intrinsic_discard_if:
3005 emit_discard_if(ctx, instr);
3007 case nir_intrinsic_memory_barrier:
3010 case nir_intrinsic_barrier:
3013 case nir_intrinsic_var_atomic_add:
3014 case nir_intrinsic_var_atomic_imin:
3015 case nir_intrinsic_var_atomic_umin:
3016 case nir_intrinsic_var_atomic_imax:
3017 case nir_intrinsic_var_atomic_umax:
3018 case nir_intrinsic_var_atomic_and:
3019 case nir_intrinsic_var_atomic_or:
3020 case nir_intrinsic_var_atomic_xor:
3021 case nir_intrinsic_var_atomic_exchange:
3022 case nir_intrinsic_var_atomic_comp_swap:
3023 result = visit_var_atomic(ctx, instr);
3025 case nir_intrinsic_interp_var_at_centroid:
3026 case nir_intrinsic_interp_var_at_sample:
3027 case nir_intrinsic_interp_var_at_offset:
3028 result = visit_interp(ctx, instr);
3031 fprintf(stderr, "Unknown intrinsic: ");
3032 nir_print_instr(&instr->instr, stderr);
3033 fprintf(stderr, "\n");
3037 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3041 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
3042 nir_deref_var *deref,
3043 enum desc_type desc_type)
3045 unsigned desc_set = deref->var->data.descriptor_set;
3046 LLVMValueRef list = ctx->descriptor_sets[desc_set];
3047 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
3048 struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding;
3049 unsigned offset = binding->offset;
3050 unsigned stride = binding->size;
3052 LLVMBuilderRef builder = ctx->builder;
3054 LLVMValueRef index = NULL;
3056 assert(deref->var->data.binding < layout->binding_count);
3058 switch (desc_type) {
3070 if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
3080 unreachable("invalid desc_type\n");
3083 if (deref->deref.child) {
3084 nir_deref_array *child = (nir_deref_array*)deref->deref.child;
3086 assert(child->deref_array_type != nir_deref_array_type_wildcard);
3087 offset += child->base_offset * stride;
3088 if (child->deref_array_type == nir_deref_array_type_indirect) {
3089 index = get_src(ctx, child->indirect);
3093 assert(stride % type_size == 0);
3096 index = ctx->i32zero;
3098 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), "");
3100 list = build_gep0(ctx, list, LLVMConstInt(ctx->i32, offset, 0));
3101 list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
3103 return build_indexed_load_const(ctx, list, index);
3106 static void set_tex_fetch_args(struct nir_to_llvm_context *ctx,
3107 struct ac_tex_info *tinfo,
3108 nir_tex_instr *instr,
3110 LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
3111 LLVMValueRef *param, unsigned count,
3115 unsigned is_rect = 0;
3116 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
3118 if (op == nir_texop_lod)
3120 /* Pad to power of two vector */
3121 while (count < util_next_power_of_two(count))
3122 param[count++] = LLVMGetUndef(ctx->i32);
3125 tinfo->args[0] = build_gather_values(ctx, param, count);
3127 tinfo->args[0] = param[0];
3129 tinfo->args[1] = res_ptr;
3132 if (op == nir_texop_txf ||
3133 op == nir_texop_txf_ms ||
3134 op == nir_texop_query_levels ||
3135 op == nir_texop_texture_samples ||
3136 op == nir_texop_txs)
3137 tinfo->dst_type = ctx->v4i32;
3139 tinfo->dst_type = ctx->v4f32;
3140 tinfo->args[num_args++] = samp_ptr;
3143 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
3144 tinfo->args[0] = res_ptr;
3145 tinfo->args[1] = LLVMConstInt(ctx->i32, 0, false);
3146 tinfo->args[2] = param[0];
3147 tinfo->arg_count = 3;
3151 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, dmask, 0);
3152 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, is_rect, 0); /* unorm */
3153 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
3154 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
3155 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
3156 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
3157 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
3158 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
3160 tinfo->arg_count = num_args;
3163 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
3166 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
3167 * filtering manually. The driver sets img7 to a mask clearing
3168 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
3169 * s_and_b32 samp0, samp0, img7
3172 * The ANISO_OVERRIDE sampler field enables this fix in TA.
3174 static LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx,
3175 LLVMValueRef res, LLVMValueRef samp)
3177 LLVMBuilderRef builder = ctx->builder;
3178 LLVMValueRef img7, samp0;
3180 if (ctx->options->chip_class >= VI)
3183 img7 = LLVMBuildExtractElement(builder, res,
3184 LLVMConstInt(ctx->i32, 7, 0), "");
3185 samp0 = LLVMBuildExtractElement(builder, samp,
3186 LLVMConstInt(ctx->i32, 0, 0), "");
3187 samp0 = LLVMBuildAnd(builder, samp0, img7, "");
3188 return LLVMBuildInsertElement(builder, samp, samp0,
3189 LLVMConstInt(ctx->i32, 0, 0), "");
3192 static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
3193 nir_tex_instr *instr,
3194 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
3195 LLVMValueRef *fmask_ptr)
3197 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
3198 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER);
3200 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE);
3203 *samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER);
3205 *samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER);
3206 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
3207 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
3209 if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
3210 instr->op == nir_texop_samples_identical))
3211 *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
3214 static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
3216 LLVMValueRef result = NULL;
3217 struct ac_tex_info tinfo = { 0 };
3218 unsigned dmask = 0xf;
3219 LLVMValueRef address[16];
3220 LLVMValueRef coords[5];
3221 LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
3222 LLVMValueRef bias = NULL, offsets = NULL;
3223 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
3224 LLVMValueRef ddx = NULL, ddy = NULL;
3225 LLVMValueRef derivs[6];
3226 unsigned chan, count = 0;
3227 unsigned const_src = 0, num_deriv_comp = 0;
3229 tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
3231 for (unsigned i = 0; i < instr->num_srcs; i++) {
3232 switch (instr->src[i].src_type) {
3233 case nir_tex_src_coord:
3234 coord = get_src(ctx, instr->src[i].src);
3236 case nir_tex_src_projector:
3238 case nir_tex_src_comparator:
3239 comparator = get_src(ctx, instr->src[i].src);
3241 case nir_tex_src_offset:
3242 offsets = get_src(ctx, instr->src[i].src);
3245 case nir_tex_src_bias:
3246 bias = get_src(ctx, instr->src[i].src);
3248 case nir_tex_src_lod:
3249 lod = get_src(ctx, instr->src[i].src);
3251 case nir_tex_src_ms_index:
3252 sample_index = get_src(ctx, instr->src[i].src);
3254 case nir_tex_src_ms_mcs:
3256 case nir_tex_src_ddx:
3257 ddx = get_src(ctx, instr->src[i].src);
3258 num_deriv_comp = instr->src[i].src.ssa->num_components;
3260 case nir_tex_src_ddy:
3261 ddy = get_src(ctx, instr->src[i].src);
3263 case nir_tex_src_texture_offset:
3264 case nir_tex_src_sampler_offset:
3265 case nir_tex_src_plane:
3271 if (instr->op == nir_texop_texture_samples) {
3272 LLVMValueRef res, samples, is_msaa;
3273 res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
3274 samples = LLVMBuildExtractElement(ctx->builder, res,
3275 LLVMConstInt(ctx->i32, 3, false), "");
3276 is_msaa = LLVMBuildLShr(ctx->builder, samples,
3277 LLVMConstInt(ctx->i32, 28, false), "");
3278 is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
3279 LLVMConstInt(ctx->i32, 0xe, false), "");
3280 is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
3281 LLVMConstInt(ctx->i32, 0xe, false), "");
3283 samples = LLVMBuildLShr(ctx->builder, samples,
3284 LLVMConstInt(ctx->i32, 16, false), "");
3285 samples = LLVMBuildAnd(ctx->builder, samples,
3286 LLVMConstInt(ctx->i32, 0xf, false), "");
3287 samples = LLVMBuildShl(ctx->builder, ctx->i32one,
3289 samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
3296 for (chan = 0; chan < instr->coord_components; chan++)
3297 coords[chan] = llvm_extract_elem(ctx, coord, chan);
3299 if (offsets && instr->op != nir_texop_txf) {
3300 LLVMValueRef offset[3], pack;
3301 for (chan = 0; chan < 3; ++chan)
3302 offset[chan] = ctx->i32zero;
3304 tinfo.has_offset = true;
3305 for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
3306 offset[chan] = llvm_extract_elem(ctx, offsets, chan);
3307 offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan],
3308 LLVMConstInt(ctx->i32, 0x3f, false), "");
3310 offset[chan] = LLVMBuildShl(ctx->builder, offset[chan],
3311 LLVMConstInt(ctx->i32, chan * 8, false), "");
3313 pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], "");
3314 pack = LLVMBuildOr(ctx->builder, pack, offset[2], "");
3315 address[count++] = pack;
3318 /* pack LOD bias value */
3319 if (instr->op == nir_texop_txb && bias) {
3320 address[count++] = bias;
3323 /* Pack depth comparison value */
3324 if (instr->is_shadow && comparator) {
3325 address[count++] = llvm_extract_elem(ctx, comparator, 0);
3328 /* pack derivatives */
3330 switch (instr->sampler_dim) {
3331 case GLSL_SAMPLER_DIM_3D:
3332 case GLSL_SAMPLER_DIM_CUBE:
3335 case GLSL_SAMPLER_DIM_2D:
3339 case GLSL_SAMPLER_DIM_1D:
3344 for (unsigned i = 0; i < num_deriv_comp; i++) {
3345 derivs[i * 2] = to_float(ctx, llvm_extract_elem(ctx, ddx, i));
3346 derivs[i * 2 + 1] = to_float(ctx, llvm_extract_elem(ctx, ddy, i));
3350 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
3351 for (chan = 0; chan < instr->coord_components; chan++)
3352 coords[chan] = to_float(ctx, coords[chan]);
3353 if (instr->coord_components == 3)
3354 coords[3] = LLVMGetUndef(ctx->f32);
3355 ac_prepare_cube_coords(&ctx->ac,
3356 instr->op == nir_texop_txd, instr->is_array,
3363 for (unsigned i = 0; i < num_deriv_comp * 2; i++)
3364 address[count++] = derivs[i];
3367 /* Pack texture coordinates */
3369 address[count++] = coords[0];
3370 if (instr->coord_components > 1)
3371 address[count++] = coords[1];
3372 if (instr->coord_components > 2) {
3373 /* This seems like a bit of a hack - but it passes Vulkan CTS with it */
3374 if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && instr->op != nir_texop_txf) {
3375 coords[2] = to_float(ctx, coords[2]);
3376 coords[2] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coords[2],
3378 coords[2] = to_integer(ctx, coords[2]);
3380 address[count++] = coords[2];
3385 if ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && lod) {
3386 address[count++] = lod;
3387 } else if (instr->op == nir_texop_txf_ms && sample_index) {
3388 address[count++] = sample_index;
3389 } else if(instr->op == nir_texop_txs) {
3392 address[count++] = lod;
3394 address[count++] = ctx->i32zero;
3397 for (chan = 0; chan < count; chan++) {
3398 address[chan] = LLVMBuildBitCast(ctx->builder,
3399 address[chan], ctx->i32, "");
3402 if (instr->op == nir_texop_samples_identical) {
3403 LLVMValueRef txf_address[4];
3404 struct ac_tex_info txf_info = { 0 };
3405 unsigned txf_count = count;
3406 memcpy(txf_address, address, sizeof(txf_address));
3408 if (!instr->is_array)
3409 txf_address[2] = ctx->i32zero;
3410 txf_address[3] = ctx->i32zero;
3412 set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3414 txf_address, txf_count, 0xf);
3416 result = build_tex_intrinsic(ctx, instr, &txf_info);
3418 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
3419 result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero);
3423 /* Adjust the sample index according to FMASK.
3425 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
3426 * which is the identity mapping. Each nibble says which physical sample
3427 * should be fetched to get that sample.
3429 * For example, 0x11111100 means there are only 2 samples stored and
3430 * the second sample covers 3/4 of the pixel. When reading samples 0
3431 * and 1, return physical sample 0 (determined by the first two 0s
3432 * in FMASK), otherwise return physical sample 1.
3434 * The sample index should be adjusted as follows:
3435 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
3437 if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS) {
3438 LLVMValueRef txf_address[4];
3439 struct ac_tex_info txf_info = { 0 };
3440 unsigned txf_count = count;
3441 memcpy(txf_address, address, sizeof(txf_address));
3443 if (!instr->is_array)
3444 txf_address[2] = ctx->i32zero;
3445 txf_address[3] = ctx->i32zero;
3447 set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3449 txf_address, txf_count, 0xf);
3451 result = build_tex_intrinsic(ctx, instr, &txf_info);
3452 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
3453 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
3455 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
3459 unsigned sample_chan = instr->is_array ? 3 : 2;
3461 LLVMValueRef sample_index4 =
3462 LLVMBuildMul(ctx->builder, address[sample_chan], four, "");
3463 LLVMValueRef shifted_fmask =
3464 LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
3465 LLVMValueRef final_sample =
3466 LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
3468 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
3469 * resource descriptor is 0 (invalid),
3471 LLVMValueRef fmask_desc =
3472 LLVMBuildBitCast(ctx->builder, fmask_ptr,
3475 LLVMValueRef fmask_word1 =
3476 LLVMBuildExtractElement(ctx->builder, fmask_desc,
3479 LLVMValueRef word1_is_nonzero =
3480 LLVMBuildICmp(ctx->builder, LLVMIntNE,
3481 fmask_word1, ctx->i32zero, "");
3483 /* Replace the MSAA sample index. */
3484 address[sample_chan] =
3485 LLVMBuildSelect(ctx->builder, word1_is_nonzero,
3486 final_sample, address[sample_chan], "");
3489 if (offsets && instr->op == nir_texop_txf) {
3490 nir_const_value *const_offset =
3491 nir_src_as_const_value(instr->src[const_src].src);
3492 int num_offsets = instr->src[const_src].src.ssa->num_components;
3493 assert(const_offset);
3494 num_offsets = MIN2(num_offsets, instr->coord_components);
3495 if (num_offsets > 2)
3496 address[2] = LLVMBuildAdd(ctx->builder,
3497 address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
3498 if (num_offsets > 1)
3499 address[1] = LLVMBuildAdd(ctx->builder,
3500 address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
3501 address[0] = LLVMBuildAdd(ctx->builder,
3502 address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), "");
3506 /* TODO TG4 support */
3507 if (instr->op == nir_texop_tg4) {
3508 if (instr->is_shadow)
3511 dmask = 1 << instr->component;
3513 set_tex_fetch_args(ctx, &tinfo, instr, instr->op,
3514 res_ptr, samp_ptr, address, count, dmask);
3516 result = build_tex_intrinsic(ctx, instr, &tinfo);
3518 if (instr->op == nir_texop_query_levels)
3519 result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
3520 else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
3521 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
3522 else if (instr->op == nir_texop_txs &&
3523 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
3525 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
3526 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
3527 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
3528 z = LLVMBuildSDiv(ctx->builder, z, six, "");
3529 result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
3530 } else if (instr->dest.ssa.num_components != 4)
3531 result = trim_vector(ctx, result, instr->dest.ssa.num_components);
3535 assert(instr->dest.is_ssa);
3536 result = to_integer(ctx, result);
3537 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3542 static void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr)
3544 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
3545 LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, "");
3547 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3548 _mesa_hash_table_insert(ctx->phis, instr, result);
3551 static void visit_post_phi(struct nir_to_llvm_context *ctx,
3552 nir_phi_instr *instr,
3553 LLVMValueRef llvm_phi)
3555 nir_foreach_phi_src(src, instr) {
3556 LLVMBasicBlockRef block = get_block(ctx, src->pred);
3557 LLVMValueRef llvm_src = get_src(ctx, src->src);
3559 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
3563 static void phi_post_pass(struct nir_to_llvm_context *ctx)
3565 struct hash_entry *entry;
3566 hash_table_foreach(ctx->phis, entry) {
3567 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
3568 (LLVMValueRef)entry->data);
3573 static void visit_ssa_undef(struct nir_to_llvm_context *ctx,
3574 nir_ssa_undef_instr *instr)
3576 unsigned num_components = instr->def.num_components;
3579 if (num_components == 1)
3580 undef = LLVMGetUndef(ctx->i32);
3582 undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components));
3584 _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
3587 static void visit_jump(struct nir_to_llvm_context *ctx,
3588 nir_jump_instr *instr)
3590 switch (instr->type) {
3591 case nir_jump_break:
3592 LLVMBuildBr(ctx->builder, ctx->break_block);
3593 LLVMClearInsertionPosition(ctx->builder);
3595 case nir_jump_continue:
3596 LLVMBuildBr(ctx->builder, ctx->continue_block);
3597 LLVMClearInsertionPosition(ctx->builder);
3600 fprintf(stderr, "Unknown NIR jump instr: ");
3601 nir_print_instr(&instr->instr, stderr);
3602 fprintf(stderr, "\n");
3607 static void visit_cf_list(struct nir_to_llvm_context *ctx,
3608 struct exec_list *list);
3610 static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block)
3612 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder);
3613 nir_foreach_instr(instr, block)
3615 switch (instr->type) {
3616 case nir_instr_type_alu:
3617 visit_alu(ctx, nir_instr_as_alu(instr));
3619 case nir_instr_type_load_const:
3620 visit_load_const(ctx, nir_instr_as_load_const(instr));
3622 case nir_instr_type_intrinsic:
3623 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
3625 case nir_instr_type_tex:
3626 visit_tex(ctx, nir_instr_as_tex(instr));
3628 case nir_instr_type_phi:
3629 visit_phi(ctx, nir_instr_as_phi(instr));
3631 case nir_instr_type_ssa_undef:
3632 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
3634 case nir_instr_type_jump:
3635 visit_jump(ctx, nir_instr_as_jump(instr));
3638 fprintf(stderr, "Unknown NIR instr type: ");
3639 nir_print_instr(instr, stderr);
3640 fprintf(stderr, "\n");
3645 _mesa_hash_table_insert(ctx->defs, block, llvm_block);
3648 static void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt)
3650 LLVMValueRef value = get_src(ctx, if_stmt->condition);
3652 LLVMBasicBlockRef merge_block =
3653 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3654 LLVMBasicBlockRef if_block =
3655 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3656 LLVMBasicBlockRef else_block = merge_block;
3657 if (!exec_list_is_empty(&if_stmt->else_list))
3658 else_block = LLVMAppendBasicBlockInContext(
3659 ctx->context, ctx->main_function, "");
3661 LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value,
3662 LLVMConstInt(ctx->i32, 0, false), "");
3663 LLVMBuildCondBr(ctx->builder, cond, if_block, else_block);
3665 LLVMPositionBuilderAtEnd(ctx->builder, if_block);
3666 visit_cf_list(ctx, &if_stmt->then_list);
3667 if (LLVMGetInsertBlock(ctx->builder))
3668 LLVMBuildBr(ctx->builder, merge_block);
3670 if (!exec_list_is_empty(&if_stmt->else_list)) {
3671 LLVMPositionBuilderAtEnd(ctx->builder, else_block);
3672 visit_cf_list(ctx, &if_stmt->else_list);
3673 if (LLVMGetInsertBlock(ctx->builder))
3674 LLVMBuildBr(ctx->builder, merge_block);
3677 LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
3680 static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
3682 LLVMBasicBlockRef continue_parent = ctx->continue_block;
3683 LLVMBasicBlockRef break_parent = ctx->break_block;
3685 ctx->continue_block =
3686 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3688 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3690 LLVMBuildBr(ctx->builder, ctx->continue_block);
3691 LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
3692 visit_cf_list(ctx, &loop->body);
3694 if (LLVMGetInsertBlock(ctx->builder))
3695 LLVMBuildBr(ctx->builder, ctx->continue_block);
3696 LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
3698 ctx->continue_block = continue_parent;
3699 ctx->break_block = break_parent;
3702 static void visit_cf_list(struct nir_to_llvm_context *ctx,
3703 struct exec_list *list)
3705 foreach_list_typed(nir_cf_node, node, node, list)
3707 switch (node->type) {
3708 case nir_cf_node_block:
3709 visit_block(ctx, nir_cf_node_as_block(node));
3712 case nir_cf_node_if:
3713 visit_if(ctx, nir_cf_node_as_if(node));
3716 case nir_cf_node_loop:
3717 visit_loop(ctx, nir_cf_node_as_loop(node));
3727 handle_vs_input_decl(struct nir_to_llvm_context *ctx,
3728 struct nir_variable *variable)
3730 LLVMValueRef t_list_ptr = ctx->vertex_buffers;
3731 LLVMValueRef t_offset;
3732 LLVMValueRef t_list;
3733 LLVMValueRef args[3];
3735 LLVMValueRef buffer_index;
3736 int index = variable->data.location - VERT_ATTRIB_GENERIC0;
3737 int idx = variable->data.location;
3738 unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
3740 variable->data.driver_location = idx * 4;
3742 if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
3743 buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id,
3744 ctx->start_instance, "");
3745 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
3746 ctx->shader_info->vs.vgpr_comp_cnt);
3748 buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id,
3749 ctx->base_vertex, "");
3751 for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
3752 t_offset = LLVMConstInt(ctx->i32, index + i, false);
3754 t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset);
3756 args[1] = LLVMConstInt(ctx->i32, 0, false);
3757 args[2] = buffer_index;
3758 input = ac_emit_llvm_intrinsic(&ctx->ac,
3759 "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
3760 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
3762 for (unsigned chan = 0; chan < 4; chan++) {
3763 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3764 ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
3765 to_integer(ctx, LLVMBuildExtractElement(ctx->builder,
3766 input, llvm_chan, ""));
3772 static void interp_fs_input(struct nir_to_llvm_context *ctx,
3774 LLVMValueRef interp_param,
3775 LLVMValueRef prim_mask,
3776 LLVMValueRef result[4])
3778 const char *intr_name;
3779 LLVMValueRef attr_number;
3782 attr_number = LLVMConstInt(ctx->i32, attr, false);
3784 /* fs.constant returns the param from the middle vertex, so it's not
3785 * really useful for flat shading. It's meant to be used for custom
3786 * interpolation (but the intrinsic can't fetch from the other two
3789 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
3790 * to do the right thing. The only reason we use fs.constant is that
3791 * fs.interp cannot be used on integers, because they can be equal
3794 intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
3796 for (chan = 0; chan < 4; chan++) {
3797 LLVMValueRef args[4];
3798 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3800 args[0] = llvm_chan;
3801 args[1] = attr_number;
3802 args[2] = prim_mask;
3803 args[3] = interp_param;
3804 result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
3805 ctx->f32, args, args[3] ? 4 : 3,
3806 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
3811 handle_fs_input_decl(struct nir_to_llvm_context *ctx,
3812 struct nir_variable *variable)
3814 int idx = variable->data.location;
3815 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3816 LLVMValueRef interp;
3818 variable->data.driver_location = idx * 4;
3819 ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
3821 if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
3822 unsigned interp_type;
3823 if (variable->data.sample) {
3824 interp_type = INTERP_SAMPLE;
3825 ctx->shader_info->fs.force_persample = true;
3826 } else if (variable->data.centroid)
3827 interp_type = INTERP_CENTROID;
3829 interp_type = INTERP_CENTER;
3831 interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type);
3835 for (unsigned i = 0; i < attrib_count; ++i)
3836 ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
3841 handle_shader_input_decl(struct nir_to_llvm_context *ctx,
3842 struct nir_variable *variable)
3844 switch (ctx->stage) {
3845 case MESA_SHADER_VERTEX:
3846 handle_vs_input_decl(ctx, variable);
3848 case MESA_SHADER_FRAGMENT:
3849 handle_fs_input_decl(ctx, variable);
3858 handle_fs_inputs_pre(struct nir_to_llvm_context *ctx,
3859 struct nir_shader *nir)
3862 for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
3863 LLVMValueRef interp_param;
3864 LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
3866 if (!(ctx->input_mask & (1ull << i)))
3869 if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC) {
3870 interp_param = *inputs;
3871 interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
3875 ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
3877 } else if (i == VARYING_SLOT_POS) {
3878 for(int i = 0; i < 3; ++i)
3879 inputs[i] = ctx->frag_pos[i];
3881 inputs[3] = emit_fdiv(ctx, ctx->f32one, ctx->frag_pos[3]);
3884 ctx->shader_info->fs.num_interp = index;
3885 if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
3886 ctx->shader_info->fs.has_pcoord = true;
3887 ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
3891 ac_build_alloca(struct nir_to_llvm_context *ctx,
3895 LLVMBuilderRef builder = ctx->builder;
3896 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
3897 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
3898 LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
3899 LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
3900 LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context);
3904 LLVMPositionBuilderBefore(first_builder, first_instr);
3906 LLVMPositionBuilderAtEnd(first_builder, first_block);
3909 res = LLVMBuildAlloca(first_builder, type, name);
3910 LLVMBuildStore(builder, LLVMConstNull(type), res);
3912 LLVMDisposeBuilder(first_builder);
3917 static LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx,
3921 LLVMValueRef ptr = ac_build_alloca(ctx, type, name);
3922 LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr);
3927 handle_shader_output_decl(struct nir_to_llvm_context *ctx,
3928 struct nir_variable *variable)
3930 int idx = variable->data.location + variable->data.index;
3931 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3933 variable->data.driver_location = idx * 4;
3935 if (ctx->stage == MESA_SHADER_VERTEX) {
3937 if (idx == VARYING_SLOT_CLIP_DIST0 ||
3938 idx == VARYING_SLOT_CULL_DIST0) {
3939 int length = glsl_get_length(variable->type);
3940 if (idx == VARYING_SLOT_CLIP_DIST0) {
3941 ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1;
3942 ctx->num_clips = length;
3943 } else if (idx == VARYING_SLOT_CULL_DIST0) {
3944 ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1;
3945 ctx->num_culls = length;
3954 for (unsigned i = 0; i < attrib_count; ++i) {
3955 for (unsigned chan = 0; chan < 4; chan++) {
3956 ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] =
3957 si_build_alloca_undef(ctx, ctx->f32, "");
3960 ctx->output_mask |= ((1ull << attrib_count) - 1) << idx;
3964 setup_locals(struct nir_to_llvm_context *ctx,
3965 struct nir_function *func)
3968 ctx->num_locals = 0;
3969 nir_foreach_variable(variable, &func->impl->locals) {
3970 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3971 variable->data.driver_location = ctx->num_locals * 4;
3972 ctx->num_locals += attrib_count;
3974 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
3978 for (i = 0; i < ctx->num_locals; i++) {
3979 for (j = 0; j < 4; j++) {
3980 ctx->locals[i * 4 + j] =
3981 si_build_alloca_undef(ctx, ctx->f32, "temp");
3987 emit_float_saturate(struct nir_to_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
3989 v = to_float(ctx, v);
3990 v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", v, LLVMConstReal(ctx->f32, lo));
3991 return emit_intrin_2f_param(ctx, "llvm.minnum.f32", v, LLVMConstReal(ctx->f32, hi));
3995 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
3996 LLVMValueRef src0, LLVMValueRef src1)
3998 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
3999 LLVMValueRef comp[2];
4001 comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx-> i32, 65535, 0), "");
4002 comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx-> i32, 65535, 0), "");
4003 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
4004 return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
4007 /* Initialize arguments for the shader export intrinsic */
4009 si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
4010 LLVMValueRef *values,
4014 /* Default is 0xf. Adjusted below depending on the format. */
4015 args[0] = LLVMConstInt(ctx->i32, target != V_008DFC_SQ_EXP_NULL ? 0xf : 0, false);
4016 /* Specify whether the EXEC mask represents the valid mask */
4017 args[1] = LLVMConstInt(ctx->i32, 0, false);
4019 /* Specify whether this is the last export */
4020 args[2] = LLVMConstInt(ctx->i32, 0, false);
4021 /* Specify the target we are exporting */
4022 args[3] = LLVMConstInt(ctx->i32, target, false);
4024 args[4] = LLVMConstInt(ctx->i32, 0, false); /* COMPR flag */
4025 args[5] = LLVMGetUndef(ctx->f32);
4026 args[6] = LLVMGetUndef(ctx->f32);
4027 args[7] = LLVMGetUndef(ctx->f32);
4028 args[8] = LLVMGetUndef(ctx->f32);
4033 if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
4034 LLVMValueRef val[4];
4035 unsigned index = target - V_008DFC_SQ_EXP_MRT;
4036 unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
4037 bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
4039 switch(col_format) {
4040 case V_028714_SPI_SHADER_ZERO:
4041 args[0] = LLVMConstInt(ctx->i32, 0x0, 0);
4042 args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_NULL, 0);
4045 case V_028714_SPI_SHADER_32_R:
4046 args[0] = LLVMConstInt(ctx->i32, 0x1, 0);
4047 args[5] = values[0];
4050 case V_028714_SPI_SHADER_32_GR:
4051 args[0] = LLVMConstInt(ctx->i32, 0x3, 0);
4052 args[5] = values[0];
4053 args[6] = values[1];
4056 case V_028714_SPI_SHADER_32_AR:
4057 args[0] = LLVMConstInt(ctx->i32, 0x9, 0);
4058 args[5] = values[0];
4059 args[8] = values[3];
4062 case V_028714_SPI_SHADER_FP16_ABGR:
4063 args[4] = ctx->i32one;
4065 for (unsigned chan = 0; chan < 2; chan++) {
4066 LLVMValueRef pack_args[2] = {
4068 values[2 * chan + 1]
4070 LLVMValueRef packed;
4072 packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16",
4073 ctx->i32, pack_args, 2,
4074 AC_FUNC_ATTR_READNONE);
4075 args[chan + 5] = packed;
4079 case V_028714_SPI_SHADER_UNORM16_ABGR:
4080 for (unsigned chan = 0; chan < 4; chan++) {
4081 val[chan] = emit_float_saturate(ctx, values[chan], 0, 1);
4082 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4083 LLVMConstReal(ctx->f32, 65535), "");
4084 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4085 LLVMConstReal(ctx->f32, 0.5), "");
4086 val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
4090 args[4] = ctx->i32one;
4091 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4092 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4095 case V_028714_SPI_SHADER_SNORM16_ABGR:
4096 for (unsigned chan = 0; chan < 4; chan++) {
4097 val[chan] = emit_float_saturate(ctx, values[chan], -1, 1);
4098 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4099 LLVMConstReal(ctx->f32, 32767), "");
4101 /* If positive, add 0.5, else add -0.5. */
4102 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4103 LLVMBuildSelect(ctx->builder,
4104 LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
4105 val[chan], ctx->f32zero, ""),
4106 LLVMConstReal(ctx->f32, 0.5),
4107 LLVMConstReal(ctx->f32, -0.5), ""), "");
4108 val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
4111 args[4] = ctx->i32one;
4112 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4113 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4116 case V_028714_SPI_SHADER_UINT16_ABGR: {
4117 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
4119 for (unsigned chan = 0; chan < 4; chan++) {
4120 val[chan] = to_integer(ctx, values[chan]);
4121 val[chan] = emit_minmax_int(ctx, LLVMIntULT, val[chan], max);
4124 args[4] = ctx->i32one;
4125 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4126 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4130 case V_028714_SPI_SHADER_SINT16_ABGR: {
4131 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
4132 LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
4135 for (unsigned chan = 0; chan < 4; chan++) {
4136 val[chan] = to_integer(ctx, values[chan]);
4137 val[chan] = emit_minmax_int(ctx, LLVMIntSLT, val[chan], max);
4138 val[chan] = emit_minmax_int(ctx, LLVMIntSGT, val[chan], min);
4141 args[4] = ctx->i32one;
4142 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4143 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4148 case V_028714_SPI_SHADER_32_ABGR:
4149 memcpy(&args[5], values, sizeof(values[0]) * 4);
4153 memcpy(&args[5], values, sizeof(values[0]) * 4);
4155 for (unsigned i = 5; i < 9; ++i)
4156 args[i] = to_float(ctx, args[i]);
4160 handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
4162 uint32_t param_count = 0;
4164 unsigned pos_idx, num_pos_exports = 0;
4165 LLVMValueRef args[9];
4166 LLVMValueRef pos_args[4][9] = { { 0 } };
4167 LLVMValueRef psize_value = 0;
4169 const uint64_t clip_mask = ctx->output_mask & ((1ull << VARYING_SLOT_CLIP_DIST0) |
4170 (1ull << VARYING_SLOT_CLIP_DIST1) |
4171 (1ull << VARYING_SLOT_CULL_DIST0) |
4172 (1ull << VARYING_SLOT_CULL_DIST1));
4175 LLVMValueRef slots[8];
4178 if (ctx->shader_info->vs.cull_dist_mask)
4179 ctx->shader_info->vs.cull_dist_mask <<= ctx->num_clips;
4181 i = VARYING_SLOT_CLIP_DIST0;
4182 for (j = 0; j < ctx->num_clips; j++)
4183 slots[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4184 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4185 i = VARYING_SLOT_CULL_DIST0;
4186 for (j = 0; j < ctx->num_culls; j++)
4187 slots[ctx->num_clips + j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4188 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4190 for (i = ctx->num_clips + ctx->num_culls; i < 8; i++)
4191 slots[i] = LLVMGetUndef(ctx->f32);
4193 if (ctx->num_clips + ctx->num_culls > 4) {
4194 target = V_008DFC_SQ_EXP_POS + 3;
4195 si_llvm_init_export_args(ctx, &slots[4], target, args);
4196 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4197 args, sizeof(args));
4200 target = V_008DFC_SQ_EXP_POS + 2;
4201 si_llvm_init_export_args(ctx, &slots[0], target, args);
4202 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4203 args, sizeof(args));
4207 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4208 LLVMValueRef values[4];
4209 if (!(ctx->output_mask & (1ull << i)))
4212 for (unsigned j = 0; j < 4; j++)
4213 values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4214 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4216 if (i == VARYING_SLOT_POS) {
4217 target = V_008DFC_SQ_EXP_POS;
4218 } else if (i == VARYING_SLOT_CLIP_DIST0 ||
4219 i == VARYING_SLOT_CLIP_DIST1 ||
4220 i == VARYING_SLOT_CULL_DIST0 ||
4221 i == VARYING_SLOT_CULL_DIST1) {
4223 } else if (i == VARYING_SLOT_PSIZ) {
4224 ctx->shader_info->vs.writes_pointsize = true;
4225 psize_value = values[0];
4227 } else if (i >= VARYING_SLOT_VAR0) {
4228 ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0);
4229 target = V_008DFC_SQ_EXP_PARAM + param_count;
4233 si_llvm_init_export_args(ctx, values, target, args);
4235 if (target >= V_008DFC_SQ_EXP_POS &&
4236 target <= (V_008DFC_SQ_EXP_POS + 3)) {
4237 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4238 args, sizeof(args));
4240 ac_emit_llvm_intrinsic(&ctx->ac,
4242 LLVMVoidTypeInContext(ctx->context),
4247 /* We need to add the position output manually if it's missing. */
4248 if (!pos_args[0][0]) {
4249 pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
4250 pos_args[0][1] = ctx->i32zero; /* EXEC mask */
4251 pos_args[0][2] = ctx->i32zero; /* last export? */
4252 pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false);
4253 pos_args[0][4] = ctx->i32zero; /* COMPR flag */
4254 pos_args[0][5] = ctx->f32zero; /* X */
4255 pos_args[0][6] = ctx->f32zero; /* Y */
4256 pos_args[0][7] = ctx->f32zero; /* Z */
4257 pos_args[0][8] = ctx->f32one; /* W */
4260 if (ctx->shader_info->vs.writes_pointsize == true) {
4261 pos_args[1][0] = LLVMConstInt(ctx->i32, (ctx->shader_info->vs.writes_pointsize == true), false); /* writemask */
4262 pos_args[1][1] = ctx->i32zero; /* EXEC mask */
4263 pos_args[1][2] = ctx->i32zero; /* last export? */
4264 pos_args[1][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + 1, false);
4265 pos_args[1][4] = ctx->i32zero; /* COMPR flag */
4266 pos_args[1][5] = ctx->f32zero; /* X */
4267 pos_args[1][6] = ctx->f32zero; /* Y */
4268 pos_args[1][7] = ctx->f32zero; /* Z */
4269 pos_args[1][8] = ctx->f32zero; /* W */
4271 if (ctx->shader_info->vs.writes_pointsize == true)
4272 pos_args[1][5] = psize_value;
4274 for (i = 0; i < 4; i++) {
4280 for (i = 0; i < 4; i++) {
4281 if (!pos_args[i][0])
4284 /* Specify the target we are exporting */
4285 pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false);
4286 if (pos_idx == num_pos_exports)
4287 pos_args[i][2] = ctx->i32one;
4288 ac_emit_llvm_intrinsic(&ctx->ac,
4290 LLVMVoidTypeInContext(ctx->context),
4294 ctx->shader_info->vs.pos_exports = num_pos_exports;
4295 ctx->shader_info->vs.param_exports = param_count;
4299 si_export_mrt_color(struct nir_to_llvm_context *ctx,
4300 LLVMValueRef *color, unsigned param, bool is_last)
4302 LLVMValueRef args[9];
4304 si_llvm_init_export_args(ctx, color, param,
4308 args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4309 args[2] = ctx->i32one; /* DONE bit */
4310 } else if (args[0] == ctx->i32zero)
4311 return; /* unnecessary NULL export */
4313 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
4314 ctx->voidt, args, 9, 0);
4318 si_export_mrt_z(struct nir_to_llvm_context *ctx,
4319 LLVMValueRef depth, LLVMValueRef stencil,
4320 LLVMValueRef samplemask)
4322 LLVMValueRef args[9];
4324 args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4325 args[2] = ctx->i32one; /* DONE bit */
4326 /* Specify the target we are exporting */
4327 args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_MRTZ, false);
4329 args[4] = ctx->i32zero; /* COMP flag */
4330 args[5] = LLVMGetUndef(ctx->f32); /* R, depth */
4331 args[6] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
4332 args[7] = LLVMGetUndef(ctx->f32); /* B, sample mask */
4333 args[8] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
4346 args[7] = samplemask;
4350 /* SI (except OLAND) has a bug that it only looks
4351 * at the X writemask component. */
4352 if (ctx->options->chip_class == SI &&
4353 ctx->options->family != CHIP_OLAND)
4356 args[0] = LLVMConstInt(ctx->i32, mask, false);
4357 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
4358 ctx->voidt, args, 9, 0);
4362 handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
4365 LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
4367 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4368 LLVMValueRef values[4];
4370 if (!(ctx->output_mask & (1ull << i)))
4373 if (i == FRAG_RESULT_DEPTH) {
4374 ctx->shader_info->fs.writes_z = true;
4375 depth = to_float(ctx, LLVMBuildLoad(ctx->builder,
4376 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4377 } else if (i == FRAG_RESULT_STENCIL) {
4378 ctx->shader_info->fs.writes_stencil = true;
4379 stencil = to_float(ctx, LLVMBuildLoad(ctx->builder,
4380 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4383 for (unsigned j = 0; j < 4; j++)
4384 values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4385 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4387 if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil)
4388 last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
4390 si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
4395 if (depth || stencil)
4396 si_export_mrt_z(ctx, depth, stencil, samplemask);
4398 si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true);
4400 ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
4404 handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
4406 switch (ctx->stage) {
4407 case MESA_SHADER_VERTEX:
4408 handle_vs_outputs_post(ctx);
4410 case MESA_SHADER_FRAGMENT:
4411 handle_fs_outputs_post(ctx);
4419 handle_shared_compute_var(struct nir_to_llvm_context *ctx,
4420 struct nir_variable *variable, uint32_t *offset, int idx)
4422 unsigned size = glsl_count_attribute_slots(variable->type, false);
4423 variable->data.driver_location = *offset;
4427 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
4429 LLVMPassManagerRef passmgr;
4430 /* Create the pass manager */
4431 passmgr = LLVMCreateFunctionPassManagerForModule(
4434 /* This pass should eliminate all the load and store instructions */
4435 LLVMAddPromoteMemoryToRegisterPass(passmgr);
4437 /* Add some optimization passes */
4438 LLVMAddScalarReplAggregatesPass(passmgr);
4439 LLVMAddLICMPass(passmgr);
4440 LLVMAddAggressiveDCEPass(passmgr);
4441 LLVMAddCFGSimplificationPass(passmgr);
4442 LLVMAddInstructionCombiningPass(passmgr);
4445 LLVMInitializeFunctionPassManager(passmgr);
4446 LLVMRunFunctionPassManager(passmgr, ctx->main_function);
4447 LLVMFinalizeFunctionPassManager(passmgr);
4449 LLVMDisposeBuilder(ctx->builder);
4450 LLVMDisposePassManager(passmgr);
4454 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
4455 struct nir_shader *nir,
4456 struct ac_shader_variant_info *shader_info,
4457 const struct ac_nir_compiler_options *options)
4459 struct nir_to_llvm_context ctx = {0};
4460 struct nir_function *func;
4462 ctx.options = options;
4463 ctx.shader_info = shader_info;
4464 ctx.context = LLVMContextCreate();
4465 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
4467 ac_llvm_context_init(&ctx.ac, ctx.context);
4468 ctx.ac.module = ctx.module;
4470 ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
4472 memset(shader_info, 0, sizeof(*shader_info));
4474 LLVMSetTarget(ctx.module, "amdgcn--");
4477 ctx.builder = LLVMCreateBuilderInContext(ctx.context);
4478 ctx.ac.builder = ctx.builder;
4479 ctx.stage = nir->stage;
4481 for (i = 0; i < AC_UD_MAX_SETS; i++)
4482 shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
4483 for (i = 0; i < AC_UD_MAX_UD; i++)
4484 shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
4486 create_function(&ctx);
4488 if (nir->stage == MESA_SHADER_COMPUTE) {
4490 nir_foreach_variable(variable, &nir->shared)
4494 uint32_t shared_size = 0;
4496 LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE);
4497 nir_foreach_variable(variable, &nir->shared) {
4498 handle_shared_compute_var(&ctx, variable, &shared_size, idx);
4503 var = LLVMAddGlobalInAddressSpace(ctx.module,
4504 LLVMArrayType(ctx.i8, shared_size),
4507 LLVMSetAlignment(var, 4);
4508 ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, "");
4512 nir_foreach_variable(variable, &nir->inputs)
4513 handle_shader_input_decl(&ctx, variable);
4515 if (nir->stage == MESA_SHADER_FRAGMENT)
4516 handle_fs_inputs_pre(&ctx, nir);
4518 nir_foreach_variable(variable, &nir->outputs)
4519 handle_shader_output_decl(&ctx, variable);
4521 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4522 _mesa_key_pointer_equal);
4523 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4524 _mesa_key_pointer_equal);
4526 func = (struct nir_function *)exec_list_get_head(&nir->functions);
4528 setup_locals(&ctx, func);
4530 visit_cf_list(&ctx, &func->impl->body);
4531 phi_post_pass(&ctx);
4533 handle_shader_outputs_post(&ctx);
4534 LLVMBuildRetVoid(ctx.builder);
4536 ac_llvm_finalize_module(&ctx);
4538 ralloc_free(ctx.defs);
4539 ralloc_free(ctx.phis);
4544 static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
4546 unsigned *retval = (unsigned *)context;
4547 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
4548 char *description = LLVMGetDiagInfoDescription(di);
4550 if (severity == LLVMDSError) {
4552 fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
4556 LLVMDisposeMessage(description);
4559 static unsigned ac_llvm_compile(LLVMModuleRef M,
4560 struct ac_shader_binary *binary,
4561 LLVMTargetMachineRef tm)
4563 unsigned retval = 0;
4565 LLVMContextRef llvm_ctx;
4566 LLVMMemoryBufferRef out_buffer;
4567 unsigned buffer_size;
4568 const char *buffer_data;
4571 /* Setup Diagnostic Handler*/
4572 llvm_ctx = LLVMGetModuleContext(M);
4574 LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
4578 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
4581 /* Process Errors/Warnings */
4583 fprintf(stderr, "%s: %s", __FUNCTION__, err);
4589 /* Extract Shader Code*/
4590 buffer_size = LLVMGetBufferSize(out_buffer);
4591 buffer_data = LLVMGetBufferStart(out_buffer);
4593 ac_elf_read(buffer_data, buffer_size, binary);
4596 LLVMDisposeMemoryBuffer(out_buffer);
4602 void ac_compile_nir_shader(LLVMTargetMachineRef tm,
4603 struct ac_shader_binary *binary,
4604 struct ac_shader_config *config,
4605 struct ac_shader_variant_info *shader_info,
4606 struct nir_shader *nir,
4607 const struct ac_nir_compiler_options *options,
4611 LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
4614 LLVMDumpModule(llvm_module);
4616 memset(binary, 0, sizeof(*binary));
4617 int v = ac_llvm_compile(llvm_module, binary, tm);
4619 fprintf(stderr, "compile failed\n");
4623 fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
4625 ac_shader_binary_read_config(binary, config, 0);
4627 LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
4628 LLVMDisposeModule(llvm_module);
4629 LLVMContextDispose(ctx);
4631 if (nir->stage == MESA_SHADER_FRAGMENT) {
4632 shader_info->num_input_vgprs = 0;
4633 if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
4634 shader_info->num_input_vgprs += 2;
4635 if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
4636 shader_info->num_input_vgprs += 2;
4637 if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
4638 shader_info->num_input_vgprs += 2;
4639 if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
4640 shader_info->num_input_vgprs += 3;
4641 if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
4642 shader_info->num_input_vgprs += 2;
4643 if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
4644 shader_info->num_input_vgprs += 2;
4645 if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
4646 shader_info->num_input_vgprs += 2;
4647 if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
4648 shader_info->num_input_vgprs += 1;
4649 if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
4650 shader_info->num_input_vgprs += 1;
4651 if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
4652 shader_info->num_input_vgprs += 1;
4653 if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
4654 shader_info->num_input_vgprs += 1;
4655 if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
4656 shader_info->num_input_vgprs += 1;
4657 if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
4658 shader_info->num_input_vgprs += 1;
4659 if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
4660 shader_info->num_input_vgprs += 1;
4661 if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
4662 shader_info->num_input_vgprs += 1;
4663 if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
4664 shader_info->num_input_vgprs += 1;
4666 config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
4668 /* +3 for scratch wave offset and VCC */
4669 config->num_sgprs = MAX2(config->num_sgprs,
4670 shader_info->num_input_sgprs + 3);
4671 if (nir->stage == MESA_SHADER_COMPUTE) {
4672 for (int i = 0; i < 3; ++i)
4673 shader_info->cs.block_size[i] = nir->info->cs.local_size[i];
4676 if (nir->stage == MESA_SHADER_FRAGMENT)
4677 shader_info->fs.early_fragment_test = nir->info->fs.early_fragment_tests;