OSDN Git Service

draw: try to prevent overflows on index buffers
[android-x86/external-mesa.git] / src / gallium / auxiliary / draw / draw_llvm.c
1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27
28 #include "draw_llvm.h"
29
30 #include "draw_context.h"
31 #include "draw_vs.h"
32 #include "draw_gs.h"
33
34 #include "gallivm/lp_bld_arit.h"
35 #include "gallivm/lp_bld_logic.h"
36 #include "gallivm/lp_bld_const.h"
37 #include "gallivm/lp_bld_swizzle.h"
38 #include "gallivm/lp_bld_struct.h"
39 #include "gallivm/lp_bld_type.h"
40 #include "gallivm/lp_bld_flow.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_tgsi.h"
43 #include "gallivm/lp_bld_printf.h"
44 #include "gallivm/lp_bld_intr.h"
45 #include "gallivm/lp_bld_init.h"
46 #include "gallivm/lp_bld_type.h"
47 #include "gallivm/lp_bld_pack.h"
48 #include "gallivm/lp_bld_format.h"
49
50 #include "tgsi/tgsi_exec.h"
51 #include "tgsi/tgsi_dump.h"
52
53 #include "util/u_math.h"
54 #include "util/u_pointer.h"
55 #include "util/u_string.h"
56 #include "util/u_simple_list.h"
57
58
59 #define DEBUG_STORE 0
60
61
62 static void
63 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var,
64                    boolean elts);
65
66
67 struct draw_gs_llvm_iface {
68    struct lp_build_tgsi_gs_iface base;
69
70    struct draw_gs_llvm_variant *variant;
71    LLVMValueRef input;
72 };
73
74 static INLINE const struct draw_gs_llvm_iface *
75 draw_gs_llvm_iface(const struct lp_build_tgsi_gs_iface *iface)
76 {
77    return (const struct draw_gs_llvm_iface *)iface;
78 }
79
80 /**
81  * Create LLVM type for draw_vertex_buffer.
82  */
83 static LLVMTypeRef
84 create_jit_dvbuffer_type(struct gallivm_state *gallivm,
85                          const char *struct_name)
86 {
87    LLVMTargetDataRef target = gallivm->target;
88    LLVMTypeRef dvbuffer_type;
89    LLVMTypeRef elem_types[DRAW_JIT_DVBUFFER_NUM_FIELDS];
90    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
91
92    elem_types[DRAW_JIT_DVBUFFER_MAP] =
93       LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
94    elem_types[DRAW_JIT_DVBUFFER_SIZE] = int32_type;
95
96    dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types,
97                                            Elements(elem_types), 0);
98
99 #if HAVE_LLVM < 0x0300
100    LLVMAddTypeName(gallivm->module, struct_name, dvbuffer_type);
101
102    /* Make sure the target's struct layout cache doesn't return
103     * stale/invalid data.
104     */
105    LLVMInvalidateStructLayout(gallivm->target, dvbuffer_type);
106 #endif
107
108    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map,
109                           target, dvbuffer_type,
110                           DRAW_JIT_DVBUFFER_MAP);
111    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, size,
112                           target, dvbuffer_type,
113                           DRAW_JIT_DVBUFFER_SIZE);
114
115    return dvbuffer_type;
116 }
117
118 /**
119  * Create LLVM type for struct draw_jit_texture
120  */
121 static LLVMTypeRef
122 create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name)
123 {
124    LLVMTargetDataRef target = gallivm->target;
125    LLVMTypeRef texture_type;
126    LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
127    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
128
129    elem_types[DRAW_JIT_TEXTURE_WIDTH]  =
130    elem_types[DRAW_JIT_TEXTURE_HEIGHT] =
131    elem_types[DRAW_JIT_TEXTURE_DEPTH] =
132    elem_types[DRAW_JIT_TEXTURE_FIRST_LEVEL] =
133    elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type;
134    elem_types[DRAW_JIT_TEXTURE_BASE] =
135       LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
136    elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
137    elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
138    elem_types[DRAW_JIT_TEXTURE_MIP_OFFSETS] =
139       LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS);
140
141    texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
142                                           Elements(elem_types), 0);
143
144 #if HAVE_LLVM < 0x0300
145    LLVMAddTypeName(gallivm->module, struct_name, texture_type);
146
147    /* Make sure the target's struct layout cache doesn't return
148     * stale/invalid data.
149     */
150    LLVMInvalidateStructLayout(gallivm->target, texture_type);
151 #endif
152
153    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
154                           target, texture_type,
155                           DRAW_JIT_TEXTURE_WIDTH);
156    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
157                           target, texture_type,
158                           DRAW_JIT_TEXTURE_HEIGHT);
159    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
160                           target, texture_type,
161                           DRAW_JIT_TEXTURE_DEPTH);
162    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, first_level,
163                           target, texture_type,
164                           DRAW_JIT_TEXTURE_FIRST_LEVEL);
165    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
166                           target, texture_type,
167                           DRAW_JIT_TEXTURE_LAST_LEVEL);
168    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, base,
169                           target, texture_type,
170                           DRAW_JIT_TEXTURE_BASE);
171    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
172                           target, texture_type,
173                           DRAW_JIT_TEXTURE_ROW_STRIDE);
174    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
175                           target, texture_type,
176                           DRAW_JIT_TEXTURE_IMG_STRIDE);
177    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, mip_offsets,
178                           target, texture_type,
179                           DRAW_JIT_TEXTURE_MIP_OFFSETS);
180
181    LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type);
182
183    return texture_type;
184 }
185
186
187 /**
188  * Create LLVM type for struct draw_jit_sampler
189  */
190 static LLVMTypeRef
191 create_jit_sampler_type(struct gallivm_state *gallivm, const char *struct_name)
192 {
193    LLVMTargetDataRef target = gallivm->target;
194    LLVMTypeRef sampler_type;
195    LLVMTypeRef elem_types[DRAW_JIT_SAMPLER_NUM_FIELDS];
196
197    elem_types[DRAW_JIT_SAMPLER_MIN_LOD] =
198    elem_types[DRAW_JIT_SAMPLER_MAX_LOD] =
199    elem_types[DRAW_JIT_SAMPLER_LOD_BIAS] = LLVMFloatTypeInContext(gallivm->context);
200    elem_types[DRAW_JIT_SAMPLER_BORDER_COLOR] =
201       LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
202
203    sampler_type = LLVMStructTypeInContext(gallivm->context, elem_types,
204                                           Elements(elem_types), 0);
205
206 #if HAVE_LLVM < 0x0300
207    LLVMAddTypeName(gallivm->module, struct_name, sampler_type);
208
209    /* Make sure the target's struct layout cache doesn't return
210     * stale/invalid data.
211     */
212    LLVMInvalidateStructLayout(gallivm->target, sampler_type);
213 #endif
214
215    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, min_lod,
216                           target, sampler_type,
217                           DRAW_JIT_SAMPLER_MIN_LOD);
218    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, max_lod,
219                           target, sampler_type,
220                           DRAW_JIT_SAMPLER_MAX_LOD);
221    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, lod_bias,
222                           target, sampler_type,
223                           DRAW_JIT_SAMPLER_LOD_BIAS);
224    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, border_color,
225                           target, sampler_type,
226                           DRAW_JIT_SAMPLER_BORDER_COLOR);
227
228    LP_CHECK_STRUCT_SIZE(struct draw_jit_sampler, target, sampler_type);
229
230    return sampler_type;
231 }
232
233
234 /**
235  * Create LLVM type for struct draw_jit_context
236  */
237 static LLVMTypeRef
238 create_jit_context_type(struct gallivm_state *gallivm,
239                         LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
240                         const char *struct_name)
241 {
242    LLVMTargetDataRef target = gallivm->target;
243    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
244    LLVMTypeRef elem_types[DRAW_JIT_CTX_NUM_FIELDS];
245    LLVMTypeRef context_type;
246
247    elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* vs_constants */
248                                  LP_MAX_TGSI_CONST_BUFFERS);
249    elem_types[1] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
250                                                  DRAW_TOTAL_CLIP_PLANES), 0);
251    elem_types[2] = LLVMPointerType(float_type, 0); /* viewport */
252    elem_types[3] = LLVMArrayType(texture_type,
253                                  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
254    elem_types[4] = LLVMArrayType(sampler_type,
255                                  PIPE_MAX_SAMPLERS); /* samplers */
256    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
257                                           Elements(elem_types), 0);
258 #if HAVE_LLVM < 0x0300
259    LLVMAddTypeName(gallivm->module, struct_name, context_type);
260
261    LLVMInvalidateStructLayout(gallivm->target, context_type);
262 #endif
263
264    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
265                           target, context_type, DRAW_JIT_CTX_CONSTANTS);
266    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
267                           target, context_type, DRAW_JIT_CTX_PLANES);
268    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, viewport,
269                           target, context_type, DRAW_JIT_CTX_VIEWPORT);
270    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
271                           target, context_type,
272                           DRAW_JIT_CTX_TEXTURES);
273    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, samplers,
274                           target, context_type,
275                           DRAW_JIT_CTX_SAMPLERS);
276    LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
277                         target, context_type);
278
279    return context_type;
280 }
281
282
283 /**
284  * Create LLVM type for struct draw_gs_jit_context
285  */
286 static LLVMTypeRef
287 create_gs_jit_context_type(struct gallivm_state *gallivm,
288                            unsigned vector_length,
289                            LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
290                            const char *struct_name)
291 {
292    LLVMTargetDataRef target = gallivm->target;
293    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
294    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
295    LLVMTypeRef elem_types[DRAW_GS_JIT_CTX_NUM_FIELDS];
296    LLVMTypeRef context_type;
297
298    elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */
299                                  LP_MAX_TGSI_CONST_BUFFERS);
300    elem_types[1] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
301                                                  DRAW_TOTAL_CLIP_PLANES), 0);
302    elem_types[2] = LLVMPointerType(float_type, 0); /* viewport */
303
304    elem_types[3] = LLVMArrayType(texture_type,
305                                  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
306    elem_types[4] = LLVMArrayType(sampler_type,
307                                  PIPE_MAX_SAMPLERS); /* samplers */
308    
309    elem_types[5] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
310    elem_types[6] = LLVMPointerType(LLVMVectorType(int_type,
311                                                   vector_length), 0);
312    elem_types[7] = LLVMPointerType(LLVMVectorType(int_type,
313                                                   vector_length), 0);
314
315    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
316                                           Elements(elem_types), 0);
317 #if HAVE_LLVM < 0x0300
318    LLVMAddTypeName(gallivm->module, struct_name, context_type);
319
320    LLVMInvalidateStructLayout(gallivm->target, context_type);
321 #endif
322
323    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants,
324                           target, context_type, DRAW_GS_JIT_CTX_CONSTANTS);
325    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,
326                           target, context_type, DRAW_GS_JIT_CTX_PLANES);
327    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewport,
328                           target, context_type, DRAW_GS_JIT_CTX_VIEWPORT);
329    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures,
330                           target, context_type,
331                           DRAW_GS_JIT_CTX_TEXTURES);
332    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers,
333                           target, context_type,
334                           DRAW_GS_JIT_CTX_SAMPLERS);
335    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
336                           target, context_type,
337                           DRAW_GS_JIT_CTX_PRIM_LENGTHS);
338    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
339                           target, context_type,
340                           DRAW_GS_JIT_CTX_EMITTED_VERTICES);
341    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
342                           target, context_type,
343                           DRAW_GS_JIT_CTX_EMITTED_PRIMS);
344    LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
345                         target, context_type);
346
347    return context_type;
348 }
349
350
351 static LLVMTypeRef
352 create_gs_jit_input_type(struct gallivm_state *gallivm)
353 {
354    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
355    LLVMTypeRef input_array;
356
357    input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */
358    input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */
359    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
360    input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
361
362    return input_array;
363 }
364
365 /**
366  * Create LLVM type for struct pipe_vertex_buffer
367  */
368 static LLVMTypeRef
369 create_jit_vertex_buffer_type(struct gallivm_state *gallivm,
370                               const char *struct_name)
371 {
372    LLVMTargetDataRef target = gallivm->target;
373    LLVMTypeRef elem_types[4];
374    LLVMTypeRef vb_type;
375
376    elem_types[0] =
377    elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
378    elem_types[2] =
379    elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
380
381    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
382                                      Elements(elem_types), 0);
383 #if HAVE_LLVM < 0x0300
384    LLVMAddTypeName(gallivm->module, struct_name, vb_type);
385
386    LLVMInvalidateStructLayout(gallivm->target, vb_type);
387 #endif
388
389    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
390                           target, vb_type, 0);
391    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
392                           target, vb_type, 1);
393
394    LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
395
396    return vb_type;
397 }
398
399
400 /**
401  * Create LLVM type for struct vertex_header;
402  */
403 static LLVMTypeRef
404 create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
405 {
406    LLVMTargetDataRef target = gallivm->target;
407    LLVMTypeRef elem_types[4];
408    LLVMTypeRef vertex_header;
409    char struct_name[24];
410
411    util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
412
413    elem_types[DRAW_JIT_VERTEX_VERTEX_ID]  = LLVMIntTypeInContext(gallivm->context, 32);
414    elem_types[DRAW_JIT_VERTEX_CLIP]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
415    elem_types[DRAW_JIT_VERTEX_PRE_CLIP_POS]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
416    elem_types[DRAW_JIT_VERTEX_DATA]  = LLVMArrayType(elem_types[1], data_elems);
417
418    vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
419                                            Elements(elem_types), 0);
420 #if HAVE_LLVM < 0x0300
421    LLVMAddTypeName(gallivm->module, struct_name, vertex_header);
422
423    LLVMInvalidateStructLayout(gallivm->target, vertex_header);
424 #endif
425
426    /* these are bit-fields and we can't take address of them
427       LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
428       target, vertex_header,
429       DRAW_JIT_VERTEX_CLIPMASK);
430       LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
431       target, vertex_header,
432       DRAW_JIT_VERTEX_EDGEFLAG);
433       LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
434       target, vertex_header,
435       DRAW_JIT_VERTEX_PAD);
436       LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
437       target, vertex_header,
438       DRAW_JIT_VERTEX_VERTEX_ID);
439    */
440    LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
441                           target, vertex_header,
442                           DRAW_JIT_VERTEX_CLIP);
443    LP_CHECK_MEMBER_OFFSET(struct vertex_header, pre_clip_pos,
444                           target, vertex_header,
445                           DRAW_JIT_VERTEX_PRE_CLIP_POS);
446    LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
447                           target, vertex_header,
448                           DRAW_JIT_VERTEX_DATA);
449
450    assert(LLVMABISizeOfType(target, vertex_header) ==
451           offsetof(struct vertex_header, data[data_elems]));
452
453    return vertex_header;
454 }
455
456
457 /**
458  * Create LLVM types for various structures.
459  */
460 static void
461 create_jit_types(struct draw_llvm_variant *variant)
462 {
463    struct gallivm_state *gallivm = variant->gallivm;
464    LLVMTypeRef texture_type, sampler_type, context_type, buffer_type,
465       vb_type;
466
467    texture_type = create_jit_texture_type(gallivm, "texture");
468    sampler_type = create_jit_sampler_type(gallivm, "sampler");
469
470    context_type = create_jit_context_type(gallivm, texture_type, sampler_type,
471                                           "draw_jit_context");
472    variant->context_ptr_type = LLVMPointerType(context_type, 0);
473
474    buffer_type = create_jit_dvbuffer_type(gallivm, "draw_vertex_buffer");
475    variant->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
476    
477    vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
478    variant->vb_ptr_type = LLVMPointerType(vb_type, 0);
479 }
480
481
482 static LLVMTypeRef
483 get_context_ptr_type(struct draw_llvm_variant *variant)
484 {
485    if (!variant->context_ptr_type)
486       create_jit_types(variant);
487    return variant->context_ptr_type;
488 }
489
490
491 static LLVMTypeRef
492 get_buffer_ptr_type(struct draw_llvm_variant *variant)
493 {
494    if (!variant->buffer_ptr_type)
495       create_jit_types(variant);
496    return variant->buffer_ptr_type;
497 }
498
499
500 static LLVMTypeRef
501 get_vb_ptr_type(struct draw_llvm_variant *variant)
502 {
503    if (!variant->vb_ptr_type)
504       create_jit_types(variant);
505    return variant->vb_ptr_type;
506 }
507
508 static LLVMTypeRef
509 get_vertex_header_ptr_type(struct draw_llvm_variant *variant)
510 {
511    if (!variant->vertex_header_ptr_type)
512       create_jit_types(variant);
513    return variant->vertex_header_ptr_type;
514 }
515
516
517 /**
518  * Create per-context LLVM info.
519  */
520 struct draw_llvm *
521 draw_llvm_create(struct draw_context *draw)
522 {
523    struct draw_llvm *llvm;
524
525    llvm = CALLOC_STRUCT( draw_llvm );
526    if (!llvm)
527       return NULL;
528
529    lp_build_init();
530
531    llvm->draw = draw;
532
533    llvm->nr_variants = 0;
534    make_empty_list(&llvm->vs_variants_list);
535
536    llvm->nr_gs_variants = 0;
537    make_empty_list(&llvm->gs_variants_list);
538
539    return llvm;
540 }
541
542
543 /**
544  * Free per-context LLVM info.
545  */
546 void
547 draw_llvm_destroy(struct draw_llvm *llvm)
548 {
549    /* XXX free other draw_llvm data? */
550    FREE(llvm);
551 }
552
553
554 /**
555  * Create LLVM-generated code for a vertex shader.
556  */
557 struct draw_llvm_variant *
558 draw_llvm_create_variant(struct draw_llvm *llvm,
559                          unsigned num_inputs,
560                          const struct draw_llvm_variant_key *key)
561 {
562    struct draw_llvm_variant *variant;
563    struct llvm_vertex_shader *shader =
564       llvm_vertex_shader(llvm->draw->vs.vertex_shader);
565    LLVMTypeRef vertex_header;
566
567    variant = MALLOC(sizeof *variant +
568                     shader->variant_key_size -
569                     sizeof variant->key);
570    if (variant == NULL)
571       return NULL;
572
573    variant->llvm = llvm;
574
575    variant->gallivm = gallivm_create();
576
577    create_jit_types(variant);
578
579    memcpy(&variant->key, key, shader->variant_key_size);
580
581    vertex_header = create_jit_vertex_header(variant->gallivm, num_inputs);
582
583    variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
584
585    draw_llvm_generate(llvm, variant, FALSE);  /* linear */
586    draw_llvm_generate(llvm, variant, TRUE);   /* elts */
587
588    gallivm_compile_module(variant->gallivm);
589
590    variant->jit_func = (draw_jit_vert_func)
591          gallivm_jit_function(variant->gallivm, variant->function);
592
593    variant->jit_func_elts = (draw_jit_vert_func_elts)
594          gallivm_jit_function(variant->gallivm, variant->function_elts);
595
596    variant->shader = shader;
597    variant->list_item_global.base = variant;
598    variant->list_item_local.base = variant;
599    /*variant->no = */shader->variants_created++;
600    variant->list_item_global.base = variant;
601
602    return variant;
603 }
604
605
606 static void
607 generate_vs(struct draw_llvm_variant *variant,
608             LLVMBuilderRef builder,
609             struct lp_type vs_type,
610             LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
611             const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
612             const struct lp_bld_tgsi_system_values *system_values,
613             LLVMValueRef context_ptr,
614             struct lp_build_sampler_soa *draw_sampler,
615             boolean clamp_vertex_color)
616 {
617    struct draw_llvm *llvm = variant->llvm;
618    const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
619    LLVMValueRef consts_ptr = draw_jit_context_vs_constants(variant->gallivm, context_ptr);
620    struct lp_build_sampler_soa *sampler = 0;
621
622    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
623       tgsi_dump(tokens, 0);
624       draw_llvm_dump_variant_key(&variant->key);
625    }
626
627    if (llvm->draw->num_sampler_views && llvm->draw->num_samplers)
628       sampler = draw_sampler;
629
630    lp_build_tgsi_soa(variant->gallivm,
631                      tokens,
632                      vs_type,
633                      NULL /*struct lp_build_mask_context *mask*/,
634                      consts_ptr,
635                      system_values,
636                      inputs,
637                      outputs,
638                      sampler,
639                      &llvm->draw->vs.vertex_shader->info,
640                      NULL);
641
642    {
643       LLVMValueRef out;
644       unsigned chan, attrib;
645       struct lp_build_context bld;
646       struct tgsi_shader_info* info = &llvm->draw->vs.vertex_shader->info;
647       lp_build_context_init(&bld, variant->gallivm, vs_type);
648
649       for (attrib = 0; attrib < info->num_outputs; ++attrib) {
650          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
651             if (outputs[attrib][chan]) {
652                switch (info->output_semantic_name[attrib]) {
653                case TGSI_SEMANTIC_COLOR:
654                case TGSI_SEMANTIC_BCOLOR:
655                   if (clamp_vertex_color) {
656                      out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
657                      out = lp_build_clamp(&bld, out, bld.zero, bld.one);
658                      LLVMBuildStore(builder, out, outputs[attrib][chan]);
659                   }
660                   break;
661                case TGSI_SEMANTIC_FOG:
662                   if (chan == 1 || chan == 2)
663                      LLVMBuildStore(builder, bld.zero, outputs[attrib][chan]);
664                   else if (chan == 3)
665                      LLVMBuildStore(builder, bld.one, outputs[attrib][chan]);
666                   break;
667                }
668             }
669          }
670       }
671    }
672 }
673
674 static void
675 generate_fetch(struct gallivm_state *gallivm,
676                LLVMValueRef vbuffers_ptr,
677                LLVMValueRef *res,
678                struct pipe_vertex_element *velem,
679                LLVMValueRef vbuf,
680                LLVMValueRef index,
681                LLVMValueRef instance_id)
682 {
683    const struct util_format_description *format_desc =
684       util_format_description(velem->src_format);
685    LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
686    LLVMBuilderRef builder = gallivm->builder;
687    LLVMValueRef indices =
688       LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
689                    velem->vertex_buffer_index, 0);
690    LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
691                                            &indices, 1, "");
692    LLVMValueRef vb_stride = draw_jit_vbuffer_stride(gallivm, vbuf);
693    LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vbuf);
694    LLVMValueRef map_ptr = draw_jit_dvbuffer_map(gallivm, vbuffer_ptr);
695    LLVMValueRef buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
696    LLVMValueRef stride;
697    LLVMValueRef buffer_overflowed;
698    LLVMValueRef temp_ptr =
699       lp_build_alloca(gallivm,
700                       lp_build_vec_type(gallivm, lp_float32_vec4_type()), "");
701    struct lp_build_if_state if_ctx;
702
703    if (velem->instance_divisor) {
704       /* array index = instance_id / instance_divisor */
705       index = LLVMBuildUDiv(builder, instance_id,
706                             lp_build_const_int32(gallivm, velem->instance_divisor),
707                             "instance_divisor");
708    }
709
710    stride = LLVMBuildMul(builder, vb_stride, index, "");
711
712    stride = LLVMBuildAdd(builder, stride,
713                          vb_buffer_offset,
714                          "");
715    stride = LLVMBuildAdd(builder, stride,
716                          lp_build_const_int32(gallivm, velem->src_offset),
717                          "");
718
719    buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
720                                      stride, buffer_size,
721                                      "buffer_overflowed");
722    /*
723    lp_build_printf(gallivm, "vbuf index = %d, stride is %d\n", indices, stride);
724    lp_build_print_value(gallivm, "   buffer size = ", buffer_size);
725    lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
726    */
727
728    lp_build_if(&if_ctx, gallivm, buffer_overflowed);
729    {
730       LLVMValueRef val =
731          lp_build_const_vec(gallivm, lp_float32_vec4_type(), 0);
732       LLVMBuildStore(builder, val, temp_ptr);
733    }
734    lp_build_else(&if_ctx);
735    {
736       LLVMValueRef val;
737       map_ptr = LLVMBuildGEP(builder, map_ptr, &stride, 1, "");
738
739       val = lp_build_fetch_rgba_aos(gallivm,
740                                     format_desc,
741                                     lp_float32_vec4_type(),
742                                     map_ptr,
743                                     zero, zero, zero);
744       LLVMBuildStore(builder, val, temp_ptr);
745    }
746    lp_build_endif(&if_ctx);
747
748    *res = LLVMBuildLoad(builder, temp_ptr, "aos");
749 }
750
751 static void
752 convert_to_soa(struct gallivm_state *gallivm,
753                LLVMValueRef (*src_aos)[LP_MAX_VECTOR_WIDTH / 32],
754                LLVMValueRef (*dst_soa)[TGSI_NUM_CHANNELS],
755                unsigned num_attribs, const struct lp_type soa_type)
756 {
757    unsigned i, j, k;
758    struct lp_type aos_channel_type = soa_type;
759
760    debug_assert(TGSI_NUM_CHANNELS == 4);
761    debug_assert((soa_type.length % TGSI_NUM_CHANNELS) == 0);
762
763    aos_channel_type.length >>= 1;
764
765    for (i = 0; i < num_attribs; ++i) {
766       LLVMValueRef aos_channels[TGSI_NUM_CHANNELS];
767       unsigned pixels_per_channel = soa_type.length / TGSI_NUM_CHANNELS;
768
769       for (j = 0; j < TGSI_NUM_CHANNELS; ++j) {
770          LLVMValueRef channel[LP_MAX_VECTOR_LENGTH] = { 0 };
771
772          assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH);
773
774          for (k = 0; k < pixels_per_channel; ++k) {
775             channel[k] = src_aos[i][j + TGSI_NUM_CHANNELS * k];
776          }
777
778          aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel);
779       }
780
781       lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa[i]);
782    }
783 }
784
785
786 static void
787 store_aos(struct gallivm_state *gallivm,
788           LLVMValueRef io_ptr,
789           LLVMValueRef index,
790           LLVMValueRef value)
791 {
792    LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0);
793    LLVMBuilderRef builder = gallivm->builder;
794    LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr);
795    LLVMValueRef indices[3];
796
797    indices[0] = lp_build_const_int32(gallivm, 0);
798    indices[1] = index;
799    indices[2] = lp_build_const_int32(gallivm, 0);
800
801    data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
802    data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
803
804 #if DEBUG_STORE
805    lp_build_printf(gallivm, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
806 #endif
807
808    /* Unaligned store due to the vertex header */
809    lp_set_store_alignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
810 }
811
812 /**
813  * Adjust the mask to architecture endianess. The mask will the store in struct:
814  *
815  * struct vertex_header {
816  *    unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
817  *    unsigned edgeflag:1;
818  *    unsigned have_clipdist:1;
819  *    unsigned vertex_id:16;
820  *    [...]
821  * }
822  *
823  * On little-endian machine nothing needs to done, however on bit-endian machine
824  * the mask's fields need to be adjusted with the algorithm:
825  *
826  * uint32_t reverse (uint32_t x)
827  * {
828  *   return (x >> 16) |              // vertex_id
829  *          ((x & 0x3fff) << 18) |   // clipmask
830  *          ((x & 0x4000) << 3) |    // have_clipdist
831  *          ((x & 0x8000) << 1);     // edgeflag
832  * }
833  */
834 static LLVMValueRef
835 adjust_mask(struct gallivm_state *gallivm,
836             LLVMValueRef mask)
837 {
838 #ifdef PIPE_ARCH_BIG_ENDIAN
839    LLVMBuilderRef builder = gallivm->builder;
840    LLVMValueRef vertex_id;
841    LLVMValueRef clipmask;
842    LLVMValueRef have_clipdist;
843    LLVMValueRef edgeflag;
844
845    vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
846    clipmask  = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
847    clipmask  = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
848    have_clipdist = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
849    have_clipdist = LLVMBuildShl(builder, have_clipdist, lp_build_const_int32(gallivm, 3), "");
850    edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
851    edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 1), "");
852
853    mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
854    mask = LLVMBuildOr(builder, mask, have_clipdist, "");
855    mask = LLVMBuildOr(builder, mask, edgeflag, "");
856 #endif
857    return mask;
858 }
859
860 static void
861 store_aos_array(struct gallivm_state *gallivm,
862                 struct lp_type soa_type,
863                 LLVMValueRef io_ptr,
864                 LLVMValueRef *indices,
865                 LLVMValueRef* aos,
866                 int attrib,
867                 int num_outputs,
868                 LLVMValueRef clipmask,
869                 boolean have_clipdist)
870 {
871    LLVMBuilderRef builder = gallivm->builder;
872    LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
873    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
874    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
875    int vector_length = soa_type.length;
876    int i;
877
878    debug_assert(TGSI_NUM_CHANNELS == 4);
879
880    for (i = 0; i < vector_length; i++) {
881       if (indices) {
882          inds[i] = indices[i];
883       } else {
884          inds[i] = lp_build_const_int32(gallivm, i);
885       }
886       io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
887    }
888
889    if (attrib == 0) {
890       /* store vertex header for each of the n vertices */
891       LLVMValueRef val, cliptmp;
892       int vertex_id_pad_edgeflag;
893
894       /* If this assertion fails, it means we need to update the bit twidding
895        * code here.  See struct vertex_header in draw_private.h.
896        */
897       assert(DRAW_TOTAL_CLIP_PLANES==14);
898       /* initialize vertex id:16 = 0xffff, have_clipdist:1 = 0, edgeflag:1 = 1 */
899       vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
900       if (have_clipdist)
901          vertex_id_pad_edgeflag |= 1 << (DRAW_TOTAL_CLIP_PLANES+1);
902       val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type), vertex_id_pad_edgeflag);
903       /* OR with the clipmask */
904       cliptmp = LLVMBuildOr(builder, val, clipmask, "");
905       for (i = 0; i < vector_length; i++) {
906          LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptrs[i]);
907          val = LLVMBuildExtractElement(builder, cliptmp, inds[i], "");
908          val = adjust_mask(gallivm, val);
909          LLVMBuildStore(builder, val, id_ptr);
910 #if DEBUG_STORE
911          lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n",
912                          io_ptrs[i], inds[i], val);
913 #endif
914       }
915    }
916
917    /* store for each of the n vertices */
918    for (i = 0; i < vector_length; i++) {
919       store_aos(gallivm, io_ptrs[i], attr_index, aos[i]);
920    }
921 }
922
923
924 static void
925 convert_to_aos(struct gallivm_state *gallivm,
926                LLVMValueRef io,
927                LLVMValueRef *indices,
928                LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
929                LLVMValueRef clipmask,
930                int num_outputs,
931                struct lp_type soa_type,
932                boolean have_clipdist)
933 {
934    LLVMBuilderRef builder = gallivm->builder;
935    unsigned chan, attrib, i;
936
937 #if DEBUG_STORE
938    lp_build_printf(gallivm, "   # storing begin\n");
939 #endif
940    for (attrib = 0; attrib < num_outputs; ++attrib) {
941       LLVMValueRef soa[TGSI_NUM_CHANNELS];
942       LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
943       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
944          if (outputs[attrib][chan]) {
945             LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
946             lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
947 #if DEBUG_STORE
948             lp_build_printf(gallivm, "output %d : %d ",
949                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
950                                          attrib, 0),
951                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
952                                          chan, 0));
953             lp_build_print_value(gallivm, "val = ", out);
954 #endif
955             soa[chan] = out;
956          }
957          else {
958             soa[chan] = 0;
959          }
960       }
961
962
963       if (soa_type.length == TGSI_NUM_CHANNELS) {
964          lp_build_transpose_aos(gallivm, soa_type, soa, aos);
965       } else {
966          lp_build_transpose_aos(gallivm, soa_type, soa, soa);
967
968          for (i = 0; i < soa_type.length; ++i) {
969             aos[i] = lp_build_extract_range(gallivm,
970                                             soa[i % TGSI_NUM_CHANNELS],
971                                             (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
972                                             TGSI_NUM_CHANNELS);
973          }
974       }
975
976       store_aos_array(gallivm,
977                       soa_type,
978                       io, indices,
979                       aos,
980                       attrib,
981                       num_outputs,
982                       clipmask, have_clipdist);
983    }
984 #if DEBUG_STORE
985    lp_build_printf(gallivm, "   # storing end\n");
986 #endif
987 }
988
989
990 /**
991  * Stores original vertex positions in clip coordinates
992  */
993 static void
994 store_clip(struct gallivm_state *gallivm,
995            const struct lp_type vs_type,
996            LLVMValueRef io_ptr,
997            LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
998            boolean pre_clip_pos, int idx)
999 {
1000    LLVMBuilderRef builder = gallivm->builder;
1001    LLVMValueRef soa[4];
1002    LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
1003    LLVMValueRef indices[2];
1004    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1005    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
1006    LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1007    int i, j;
1008
1009    indices[0] =
1010    indices[1] = lp_build_const_int32(gallivm, 0);
1011
1012    for (i = 0; i < vs_type.length; i++) {
1013       inds[i] = lp_build_const_int32(gallivm, i);
1014       io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
1015    }
1016
1017    soa[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 .. xn*/
1018    soa[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 .. yn*/
1019    soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/
1020    soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/
1021
1022    if (!pre_clip_pos) {
1023       for (i = 0; i < vs_type.length; i++) {
1024          clip_ptrs[i] = draw_jit_header_clip(gallivm, io_ptrs[i]);
1025       }
1026    } else {
1027       for (i = 0; i < vs_type.length; i++) {
1028          clip_ptrs[i] = draw_jit_header_pre_clip_pos(gallivm, io_ptrs[i]);
1029       }
1030    }
1031
1032    lp_build_transpose_aos(gallivm, vs_type, soa, soa);
1033    for (i = 0; i < vs_type.length; ++i) {
1034       aos[i] = lp_build_extract_range(gallivm,
1035                                       soa[i % TGSI_NUM_CHANNELS],
1036                                       (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
1037                                       TGSI_NUM_CHANNELS);
1038    }
1039
1040    for (j = 0; j < vs_type.length; j++) {
1041       LLVMTypeRef  clip_ptr_type = LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), 0);
1042       LLVMValueRef clip_ptr;
1043
1044       clip_ptr = LLVMBuildGEP(builder, clip_ptrs[j], indices, 2, "clipo");
1045       clip_ptr = LLVMBuildPointerCast(builder, clip_ptr, clip_ptr_type, "");
1046
1047       /* Unaligned store */
1048       lp_set_store_alignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
1049    }
1050 }
1051
1052
1053 /**
1054  * Transforms the outputs for viewport mapping
1055  */
1056 static void
1057 generate_viewport(struct draw_llvm_variant *variant,
1058                   LLVMBuilderRef builder,
1059                   struct lp_type vs_type,
1060                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1061                   LLVMValueRef context_ptr)
1062 {
1063    int i;
1064    struct gallivm_state *gallivm = variant->gallivm;
1065    struct lp_type f32_type = vs_type;
1066    const unsigned pos = draw_current_shader_position_output(variant->llvm->draw);
1067    LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1068    LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn*/
1069    LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
1070    LLVMValueRef vp_ptr = draw_jit_context_viewport(gallivm, context_ptr);
1071
1072    /* for 1/w convention*/
1073    out3 = LLVMBuildFDiv(builder, const1, out3, "");
1074    LLVMBuildStore(builder, out3, outputs[pos][3]);
1075
1076    /* Viewport Mapping */
1077    for (i=0; i<3; i++) {
1078       LLVMValueRef out = LLVMBuildLoad(builder, outputs[pos][i], ""); /*x0 x1 .. xn*/
1079       LLVMValueRef scale;
1080       LLVMValueRef trans;
1081       LLVMValueRef scale_i;
1082       LLVMValueRef trans_i;
1083       LLVMValueRef index;
1084
1085       index = lp_build_const_int32(gallivm, i);
1086       scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
1087
1088       index = lp_build_const_int32(gallivm, i+4);
1089       trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
1090
1091       scale = lp_build_broadcast(gallivm, vs_type_llvm,
1092                                  LLVMBuildLoad(builder, scale_i, "scale"));
1093       trans = lp_build_broadcast(gallivm, vs_type_llvm,
1094                                  LLVMBuildLoad(builder, trans_i, "trans"));
1095
1096       /* divide by w */
1097       out = LLVMBuildFMul(builder, out, out3, "");
1098       /* mult by scale */
1099       out = LLVMBuildFMul(builder, out, scale, "");
1100       /* add translation */
1101       out = LLVMBuildFAdd(builder, out, trans, "");
1102
1103       /* store transformed outputs */
1104       LLVMBuildStore(builder, out, outputs[pos][i]);
1105    }
1106
1107 }
1108
1109
1110 /**
1111  * Returns clipmask as nxi32 bitmask for the n vertices
1112  */
1113 static LLVMValueRef
1114 generate_clipmask(struct draw_llvm *llvm,
1115                   struct gallivm_state *gallivm,
1116                   struct lp_type vs_type,
1117                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1118                   boolean clip_xy,
1119                   boolean clip_z,
1120                   boolean clip_user,
1121                   boolean clip_halfz,
1122                   unsigned ucp_enable,
1123                   LLVMValueRef context_ptr,
1124                   boolean *have_clipdist)
1125 {
1126    LLVMBuilderRef builder = gallivm->builder;
1127    LLVMValueRef mask; /* stores the <nxi32> clipmasks */
1128    LLVMValueRef test, temp;
1129    LLVMValueRef zero, shift;
1130    LLVMValueRef pos_x, pos_y, pos_z, pos_w;
1131    LLVMValueRef cv_x, cv_y, cv_z, cv_w;
1132    LLVMValueRef plane1, planes, plane_ptr, sum;
1133    struct lp_type f32_type = vs_type;
1134    struct lp_type i32_type = lp_int_type(vs_type);
1135    const unsigned pos = draw_current_shader_position_output(llvm->draw);
1136    const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw);
1137    int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
1138    bool have_cd = false;
1139    unsigned cd[2];
1140
1141    cd[0] = draw_current_shader_clipdistance_output(llvm->draw, 0);
1142    cd[1] = draw_current_shader_clipdistance_output(llvm->draw, 1);
1143
1144    if (cd[0] != pos || cd[1] != pos)
1145       have_cd = true;
1146
1147    mask = lp_build_const_int_vec(gallivm, i32_type, 0);
1148    temp = lp_build_const_int_vec(gallivm, i32_type, 0);
1149    zero = lp_build_const_vec(gallivm, f32_type, 0);         /* 0.0f 0.0f 0.0f 0.0f */
1150    shift = lp_build_const_int_vec(gallivm, i32_type, 1);    /* 1 1 1 1 */
1151
1152    /*
1153     * load clipvertex and position from correct locations.
1154     * if they are the same just load them once.
1155     */
1156    pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 .. xn */
1157    pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 .. yn */
1158    pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 .. zn */
1159    pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn */
1160
1161    if (clip_user && cv != pos) {
1162       cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 .. xn */
1163       cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 .. yn */
1164       cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 .. zn */
1165       cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 .. wn */
1166    } else {
1167       cv_x = pos_x;
1168       cv_y = pos_y;
1169       cv_z = pos_z;
1170       cv_w = pos_w;
1171    }
1172
1173    /* Cliptest, for hardwired planes */
1174    if (clip_xy) {
1175       /* plane 1 */
1176       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
1177       temp = shift;
1178       test = LLVMBuildAnd(builder, test, temp, "");
1179       mask = test;
1180
1181       /* plane 2 */
1182       test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
1183       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1184       temp = LLVMBuildShl(builder, temp, shift, "");
1185       test = LLVMBuildAnd(builder, test, temp, "");
1186       mask = LLVMBuildOr(builder, mask, test, "");
1187
1188       /* plane 3 */
1189       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
1190       temp = LLVMBuildShl(builder, temp, shift, "");
1191       test = LLVMBuildAnd(builder, test, temp, "");
1192       mask = LLVMBuildOr(builder, mask, test, "");
1193
1194       /* plane 4 */
1195       test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
1196       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1197       temp = LLVMBuildShl(builder, temp, shift, "");
1198       test = LLVMBuildAnd(builder, test, temp, "");
1199       mask = LLVMBuildOr(builder, mask, test, "");
1200    }
1201
1202    if (clip_z) {
1203       temp = lp_build_const_int_vec(gallivm, i32_type, 16);
1204       if (clip_halfz) {
1205          /* plane 5 */
1206          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
1207          test = LLVMBuildAnd(builder, test, temp, "");
1208          mask = LLVMBuildOr(builder, mask, test, "");
1209       }
1210       else {
1211          /* plane 5 */
1212          test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
1213          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1214          test = LLVMBuildAnd(builder, test, temp, "");
1215          mask = LLVMBuildOr(builder, mask, test, "");
1216       }
1217       /* plane 6 */
1218       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
1219       temp = LLVMBuildShl(builder, temp, shift, "");
1220       test = LLVMBuildAnd(builder, test, temp, "");
1221       mask = LLVMBuildOr(builder, mask, test, "");
1222    }
1223
1224    if (clip_user) {
1225       LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);
1226       LLVMValueRef indices[3];
1227
1228       /* userclip planes */
1229       while (ucp_enable) {
1230          unsigned plane_idx = ffs(ucp_enable)-1;
1231          ucp_enable &= ~(1 << plane_idx);
1232          plane_idx += 6;
1233
1234          if (have_cd && num_written_clipdistance) {
1235             LLVMValueRef clipdist;
1236             int i;
1237             i = plane_idx - 6;
1238
1239             *have_clipdist = TRUE;
1240             if (i < 4) {
1241                clipdist = LLVMBuildLoad(builder, outputs[cd[0]][i], "");
1242             } else {
1243                clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], "");
1244             }
1245             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
1246             temp = lp_build_const_int_vec(gallivm, i32_type, 1 << plane_idx);
1247             test = LLVMBuildAnd(builder, test, temp, "");
1248             mask = LLVMBuildOr(builder, mask, test, "");
1249          } else {
1250             LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1251             indices[0] = lp_build_const_int32(gallivm, 0);
1252             indices[1] = lp_build_const_int32(gallivm, plane_idx);
1253
1254             indices[2] = lp_build_const_int32(gallivm, 0);
1255             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1256             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
1257             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1258             sum = LLVMBuildFMul(builder, planes, cv_x, "");
1259
1260             indices[2] = lp_build_const_int32(gallivm, 1);
1261             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1262             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
1263             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1264             test = LLVMBuildFMul(builder, planes, cv_y, "");
1265             sum = LLVMBuildFAdd(builder, sum, test, "");
1266
1267             indices[2] = lp_build_const_int32(gallivm, 2);
1268             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1269             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
1270             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1271             test = LLVMBuildFMul(builder, planes, cv_z, "");
1272             sum = LLVMBuildFAdd(builder, sum, test, "");
1273
1274             indices[2] = lp_build_const_int32(gallivm, 3);
1275             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1276             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
1277             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1278             test = LLVMBuildFMul(builder, planes, cv_w, "");
1279             sum = LLVMBuildFAdd(builder, sum, test, "");
1280
1281             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
1282             temp = lp_build_const_int_vec(gallivm, i32_type, 1 << plane_idx);
1283             test = LLVMBuildAnd(builder, test, temp, "");
1284             mask = LLVMBuildOr(builder, mask, test, "");
1285          }
1286       }
1287    }
1288    return mask;
1289 }
1290
1291
1292 /**
1293  * Returns boolean if any clipping has occurred
1294  * Used zero/non-zero i32 value to represent boolean
1295  */
1296 static LLVMValueRef
1297 clipmask_booli32(struct gallivm_state *gallivm,
1298                  const struct lp_type vs_type,
1299                  LLVMValueRef clipmask_bool_ptr)
1300 {
1301    LLVMBuilderRef builder = gallivm->builder;
1302    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
1303    LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
1304    LLVMValueRef ret = LLVMConstNull(int32_type);
1305    LLVMValueRef temp;
1306    int i;
1307
1308    /*
1309     * Can do this with log2(vector length) pack instructions and one extract
1310     * (as we don't actually need a or) with sse2 which would be way better.
1311     */
1312    for (i=0; i < vs_type.length; i++) {
1313       temp = LLVMBuildExtractElement(builder, clipmask_bool,
1314                                      lp_build_const_int32(gallivm, i) , "");
1315       ret = LLVMBuildOr(builder, ret, temp, "");
1316    }
1317    return ret;
1318 }
1319
1320 static LLVMValueRef
1321 draw_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
1322                          struct lp_build_tgsi_context * bld_base,
1323                          boolean is_indirect,
1324                          LLVMValueRef vertex_index,
1325                          LLVMValueRef attrib_index,
1326                          LLVMValueRef swizzle_index)
1327 {
1328    const struct draw_gs_llvm_iface *gs = draw_gs_llvm_iface(gs_iface);
1329    struct gallivm_state *gallivm = bld_base->base.gallivm;
1330    LLVMBuilderRef builder = gallivm->builder;
1331    LLVMValueRef indices[3];
1332    LLVMValueRef res;
1333    struct lp_type type = bld_base->base.type;
1334
1335    if (is_indirect) {
1336       int i;
1337       res = bld_base->base.zero;
1338       for (i = 0; i < type.length; ++i) {
1339          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
1340          LLVMValueRef vert_chan_index = LLVMBuildExtractElement(builder,
1341                                                                 vertex_index, idx, "");
1342          LLVMValueRef channel_vec, value;
1343          indices[0] = vert_chan_index;
1344          indices[1] = attrib_index;
1345          indices[2] = swizzle_index;
1346          
1347          channel_vec = LLVMBuildGEP(builder, gs->input, indices, 3, "");
1348          channel_vec = LLVMBuildLoad(builder, channel_vec, "");
1349          value = LLVMBuildExtractElement(builder, channel_vec, idx, "");
1350
1351          res = LLVMBuildInsertElement(builder, res, value, idx, "");
1352       }
1353    } else {
1354       indices[0] = vertex_index;
1355       indices[1] = attrib_index;
1356       indices[2] = swizzle_index;
1357
1358       res = LLVMBuildGEP(builder, gs->input, indices, 3, "");
1359       res = LLVMBuildLoad(builder, res, "");
1360    }
1361
1362    return res;
1363 }
1364
1365 static void
1366 draw_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
1367                          struct lp_build_tgsi_context * bld_base,
1368                          LLVMValueRef (*outputs)[4],
1369                          LLVMValueRef emitted_vertices_vec)
1370 {
1371    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1372    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1373    struct gallivm_state *gallivm = variant->gallivm;
1374    LLVMBuilderRef builder = gallivm->builder;
1375    struct lp_type gs_type = bld_base->base.type;
1376    LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
1377                                                   lp_int_type(gs_type), 0);
1378    LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
1379    LLVMValueRef next_prim_offset =
1380       lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
1381    LLVMValueRef io = variant->io_ptr;
1382    unsigned i;
1383    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
1384
1385    for (i = 0; i < gs_type.length; ++i) {
1386       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1387       LLVMValueRef currently_emitted =
1388          LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
1389       indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
1390       indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
1391    }
1392
1393    convert_to_aos(gallivm, io, indices,
1394                   outputs, clipmask,
1395                   gs_info->num_outputs, gs_type,
1396                   FALSE);
1397 }
1398
1399 static void
1400 draw_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base,
1401                            struct lp_build_tgsi_context * bld_base,
1402                            LLVMValueRef verts_per_prim_vec,
1403                            LLVMValueRef emitted_prims_vec)
1404 {
1405    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1406    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1407    struct gallivm_state *gallivm = variant->gallivm;
1408    LLVMBuilderRef builder = gallivm->builder;
1409    LLVMValueRef prim_lengts_ptr =
1410       draw_gs_jit_prim_lengths(variant->gallivm, variant->context_ptr);
1411    unsigned i;
1412
1413    for (i = 0; i < bld_base->base.type.length; ++i) {
1414       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1415       LLVMValueRef prims_emitted =
1416          LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");
1417       LLVMValueRef store_ptr;
1418       LLVMValueRef num_vertices =
1419          LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
1420
1421       store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, "");
1422       store_ptr = LLVMBuildLoad(builder, store_ptr, "");
1423       store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, "");
1424       LLVMBuildStore(builder, num_vertices, store_ptr);
1425    }
1426 }
1427
1428 static void
1429 draw_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
1430                       struct lp_build_tgsi_context * bld_base,
1431                       LLVMValueRef total_emitted_vertices_vec,
1432                       LLVMValueRef emitted_prims_vec)
1433 {
1434    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1435    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1436    struct gallivm_state *gallivm = variant->gallivm;
1437    LLVMBuilderRef builder = gallivm->builder;
1438    LLVMValueRef emitted_verts_ptr =
1439       draw_gs_jit_emitted_vertices(gallivm, variant->context_ptr);
1440    LLVMValueRef emitted_prims_ptr =
1441       draw_gs_jit_emitted_prims(gallivm, variant->context_ptr);
1442    LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
1443    
1444    emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &zero, 0, "");
1445    emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &zero, 0, "");
1446
1447    LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
1448    LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
1449 }
1450
1451 static void
1452 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
1453                    boolean elts)
1454 {
1455    struct gallivm_state *gallivm = variant->gallivm;
1456    LLVMContextRef context = gallivm->context;
1457    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
1458    LLVMTypeRef arg_types[9];
1459    unsigned num_arg_types =
1460       elts ? Elements(arg_types) : Elements(arg_types) - 1;
1461    LLVMTypeRef func_type;
1462    LLVMValueRef context_ptr;
1463    LLVMBasicBlockRef block;
1464    LLVMBuilderRef builder;
1465    struct lp_type vs_type;
1466    LLVMValueRef end, start;
1467    LLVMValueRef count, fetch_elts, fetch_elt_max, fetch_count;
1468    LLVMValueRef stride, step, io_itr;
1469    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1470    LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
1471    LLVMValueRef one = lp_build_const_int32(gallivm, 1);
1472    struct draw_context *draw = llvm->draw;
1473    const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
1474    unsigned i, j;
1475    struct lp_build_context bld;
1476    struct lp_build_loop_state lp_loop;
1477    const int vector_length = lp_native_vector_width / 32;
1478    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1479    LLVMValueRef fetch_max;
1480    struct lp_build_sampler_soa *sampler = 0;
1481    LLVMValueRef ret, clipmask_bool_ptr;
1482    const struct draw_geometry_shader *gs = draw->gs.geometry_shader;
1483    struct draw_llvm_variant_key *key = &variant->key;
1484    /* If geometry shader is present we need to skip both the viewport
1485     * transformation and clipping otherwise the inputs to the geometry
1486     * shader will be incorrect.
1487     */
1488    const boolean bypass_viewport = gs || key->bypass_viewport;
1489    const boolean enable_cliptest = !gs && (key->clip_xy ||
1490                                            key->clip_z  ||
1491                                            key->clip_user);
1492    LLVMValueRef variant_func;
1493    const unsigned pos = draw_current_shader_position_output(llvm->draw);
1494    const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw);
1495    boolean have_clipdist = FALSE;
1496    struct lp_bld_tgsi_system_values system_values;
1497
1498    memset(&system_values, 0, sizeof(system_values));
1499
1500    i = 0;
1501    arg_types[i++] = get_context_ptr_type(variant);       /* context */
1502    arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */
1503    arg_types[i++] = get_buffer_ptr_type(variant);        /* vbuffers */
1504    if (elts) {
1505       arg_types[i++] = LLVMPointerType(int32_type, 0);/* fetch_elts  */
1506       arg_types[i++] = int32_type;                  /* fetch_elt_max */
1507    } else
1508       arg_types[i++] = int32_type;                  /* start */
1509    arg_types[i++] = int32_type;                     /* fetch_count / count */
1510    arg_types[i++] = int32_type;                     /* stride */
1511    arg_types[i++] = get_vb_ptr_type(variant);       /* pipe_vertex_buffer's */
1512    arg_types[i++] = int32_type;                     /* instance_id */
1513
1514    func_type = LLVMFunctionType(int32_type, arg_types, num_arg_types, 0);
1515
1516    variant_func = LLVMAddFunction(gallivm->module,
1517                                   elts ? "draw_llvm_shader_elts" : "draw_llvm_shader",
1518                                   func_type);
1519
1520    if (elts)
1521       variant->function_elts = variant_func;
1522    else
1523       variant->function = variant_func;
1524
1525    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
1526    for (i = 0; i < num_arg_types; ++i)
1527       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1528          LLVMAddAttribute(LLVMGetParam(variant_func, i),
1529                           LLVMNoAliasAttribute);
1530
1531    context_ptr               = LLVMGetParam(variant_func, 0);
1532    io_ptr                    = LLVMGetParam(variant_func, 1);
1533    vbuffers_ptr              = LLVMGetParam(variant_func, 2);
1534    stride                    = LLVMGetParam(variant_func, 5 + (elts ? 1 : 0));
1535    vb_ptr                    = LLVMGetParam(variant_func, 6 + (elts ? 1 : 0));
1536    system_values.instance_id = LLVMGetParam(variant_func, 7 + (elts ? 1 : 0));
1537
1538    lp_build_name(context_ptr, "context");
1539    lp_build_name(io_ptr, "io");
1540    lp_build_name(vbuffers_ptr, "vbuffers");
1541    lp_build_name(stride, "stride");
1542    lp_build_name(vb_ptr, "vb");
1543    lp_build_name(system_values.instance_id, "instance_id");
1544
1545    if (elts) {
1546       fetch_elts    = LLVMGetParam(variant_func, 3);
1547       fetch_elt_max = LLVMGetParam(variant_func, 4);
1548       fetch_count   = LLVMGetParam(variant_func, 5);
1549       lp_build_name(fetch_elts, "fetch_elts");
1550       lp_build_name(fetch_elt_max, "fetch_elt_max");
1551       lp_build_name(fetch_count, "fetch_count");
1552       start = count = NULL;
1553    }
1554    else {
1555       start        = LLVMGetParam(variant_func, 3);
1556       count        = LLVMGetParam(variant_func, 4);
1557       lp_build_name(start, "start");
1558       lp_build_name(count, "count");
1559       fetch_elts = fetch_count = NULL;
1560    }
1561
1562    /*
1563     * Function body
1564     */
1565
1566    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
1567    builder = gallivm->builder;
1568    LLVMPositionBuilderAtEnd(builder, block);
1569
1570    lp_build_context_init(&bld, gallivm, lp_type_int(32));
1571
1572    memset(&vs_type, 0, sizeof vs_type);
1573    vs_type.floating = TRUE; /* floating point values */
1574    vs_type.sign = TRUE;     /* values are signed */
1575    vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
1576    vs_type.width = 32;      /* 32-bit float */
1577    vs_type.length = vector_length;
1578
1579    /* hold temporary "bool" clipmask */
1580    clipmask_bool_ptr = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, vs_type), "");
1581    LLVMBuildStore(builder, lp_build_zero(gallivm, lp_int_type(vs_type)), clipmask_bool_ptr);
1582
1583    /* code generated texture sampling */
1584    sampler = draw_llvm_sampler_soa_create(
1585       draw_llvm_variant_key_samplers(key),
1586       context_ptr);
1587
1588    if (elts) {
1589       start = zero;
1590       end = fetch_count;
1591    }
1592    else {
1593       end = lp_build_add(&bld, start, count);
1594    }
1595
1596    step = lp_build_const_int32(gallivm, vector_length);
1597
1598    fetch_max = LLVMBuildSub(builder, end, one, "fetch_max");
1599
1600    lp_build_loop_begin(&lp_loop, gallivm, start);
1601    {
1602       LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1603       LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][LP_MAX_VECTOR_WIDTH / 32] = { { 0 } };
1604       LLVMValueRef io;
1605       LLVMValueRef clipmask;   /* holds the clipmask value */
1606       const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
1607
1608       if (elts)
1609          io_itr = lp_loop.counter;
1610       else
1611          io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
1612
1613       io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1614 #if DEBUG_STORE
1615       lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
1616                       io_itr, io, lp_loop.counter);
1617 #endif
1618       system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32, 32*vector_length));
1619       for (i = 0; i < vector_length; ++i) {
1620          LLVMValueRef true_index =
1621             LLVMBuildAdd(builder,
1622                          lp_loop.counter,
1623                          lp_build_const_int32(gallivm, i), "");
1624
1625          /* make sure we're not out of bounds which can happen
1626           * if fetch_count % 4 != 0, because on the last iteration
1627           * a few of the 4 vertex fetches will be out of bounds */
1628          true_index = lp_build_min(&bld, true_index, fetch_max);
1629
1630          system_values.vertex_id = LLVMBuildInsertElement(
1631             gallivm->builder,
1632             system_values.vertex_id, true_index,
1633             lp_build_const_int32(gallivm, i), "");
1634
1635          if (elts) {
1636             LLVMValueRef fetch_ptr;
1637             LLVMValueRef index_overflowed;
1638             LLVMValueRef index_ptr =
1639                lp_build_alloca(
1640                   gallivm,
1641                   lp_build_vec_type(gallivm, lp_type_int(32)), "");
1642             struct lp_build_if_state if_ctx;
1643             index_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
1644                                              true_index, fetch_elt_max,
1645                                              "index_overflowed");
1646             
1647             lp_build_if(&if_ctx, gallivm, index_overflowed);
1648             {
1649                /* Generate maximum possible index so that
1650                 * generate_fetch can treat it just like
1651                 * any other overflow and return zeros.
1652                 * We don't have to worry about the restart
1653                 * primitive index because it has already been 
1654                 * handled
1655                 */
1656                LLVMValueRef val =
1657                   lp_build_const_int32(gallivm, 0xffffffff);
1658                LLVMBuildStore(builder, val, index_ptr);
1659             }
1660             lp_build_else(&if_ctx);
1661             {
1662                LLVMValueRef val;
1663                fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
1664                                         &true_index, 1, "");
1665                val = LLVMBuildLoad(builder, fetch_ptr, "");
1666                LLVMBuildStore(builder, val, index_ptr);
1667             }
1668             lp_build_endif(&if_ctx);
1669             true_index = LLVMBuildLoad(builder, index_ptr, "true_index");
1670          }
1671
1672          for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1673             struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1674             LLVMValueRef vb_index =
1675                lp_build_const_int32(gallivm, velem->vertex_buffer_index);
1676             LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
1677             generate_fetch(gallivm, vbuffers_ptr,
1678                            &aos_attribs[j][i], velem, vb, true_index,
1679                            system_values.instance_id);
1680          }
1681       }
1682       convert_to_soa(gallivm, aos_attribs, inputs,
1683                      draw->pt.nr_vertex_elements, vs_type);
1684
1685       ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
1686       generate_vs(variant,
1687                   builder,
1688                   vs_type,
1689                   outputs,
1690                   ptr_aos,
1691                   &system_values,
1692                   context_ptr,
1693                   sampler,
1694                   key->clamp_vertex_color);
1695
1696       if (pos != -1 && cv != -1) {
1697          /* store original positions in clip before further manipulation */
1698          store_clip(gallivm, vs_type, io, outputs, 0, cv);
1699          store_clip(gallivm, vs_type, io, outputs, 1, pos);
1700
1701          /* do cliptest */
1702          if (enable_cliptest) {
1703             LLVMValueRef temp = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
1704             /* allocate clipmask, assign it integer type */
1705             clipmask = generate_clipmask(llvm,
1706                                          gallivm,
1707                                          vs_type,
1708                                          outputs,
1709                                          key->clip_xy,
1710                                          key->clip_z,
1711                                          key->clip_user,
1712                                          key->clip_halfz,
1713                                          key->ucp_enable,
1714                                          context_ptr, &have_clipdist);
1715             temp = LLVMBuildOr(builder, clipmask, temp, "");
1716             /* store temporary clipping boolean value */
1717             LLVMBuildStore(builder, temp, clipmask_bool_ptr);
1718          }
1719          else {
1720             clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0);
1721          }
1722
1723          /* do viewport mapping */
1724          if (!bypass_viewport) {
1725             generate_viewport(variant, builder, vs_type, outputs, context_ptr);
1726          }
1727       }
1728       else {
1729          clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0);
1730       }
1731
1732       /* store clipmask in vertex header,
1733        * original positions in clip
1734        * and transformed positions in data
1735        */
1736       convert_to_aos(gallivm, io, NULL, outputs, clipmask,
1737                      vs_info->num_outputs, vs_type,
1738                      have_clipdist);
1739    }
1740
1741    lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE);
1742
1743    sampler->destroy(sampler);
1744
1745    /* return clipping boolean value for function */
1746    ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr);
1747
1748    LLVMBuildRet(builder, ret);
1749
1750    gallivm_verify_function(gallivm, variant_func);
1751 }
1752
1753
1754 struct draw_llvm_variant_key *
1755 draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
1756 {
1757    unsigned i;
1758    struct draw_llvm_variant_key *key;
1759    struct draw_sampler_static_state *draw_sampler;
1760
1761    key = (struct draw_llvm_variant_key *)store;
1762
1763    key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color; /**/
1764
1765    /* Presumably all variants of the shader should have the same
1766     * number of vertex elements - ie the number of shader inputs.
1767     * NOTE: we NEED to store the needed number of needed inputs
1768     * here, not the number of provided elements to match keysize
1769     * (and the offset of sampler state in the key).
1770     */
1771    key->nr_vertex_elements = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 1;
1772    assert(key->nr_vertex_elements <= llvm->draw->pt.nr_vertex_elements);
1773
1774    /* will have to rig this up properly later */
1775    key->clip_xy = llvm->draw->clip_xy;
1776    key->clip_z = llvm->draw->clip_z;
1777    key->clip_user = llvm->draw->clip_user;
1778    key->bypass_viewport = llvm->draw->identity_viewport;
1779    key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
1780    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
1781    key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
1782    key->has_gs = llvm->draw->gs.geometry_shader != NULL;
1783    key->pad1 = 0;
1784
1785    /* All variants of this shader will have the same value for
1786     * nr_samplers.  Not yet trying to compact away holes in the
1787     * sampler array.
1788     */
1789    key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
1790    if (llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
1791       key->nr_sampler_views =
1792          llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
1793    }
1794    else {
1795       key->nr_sampler_views = key->nr_samplers;
1796    }
1797
1798    draw_sampler = draw_llvm_variant_key_samplers(key);
1799
1800    memcpy(key->vertex_element,
1801           llvm->draw->pt.vertex_element,
1802           sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
1803
1804    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
1805
1806    for (i = 0 ; i < key->nr_samplers; i++) {
1807       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
1808                                       llvm->draw->samplers[PIPE_SHADER_VERTEX][i]);
1809    }
1810    for (i = 0 ; i < key->nr_sampler_views; i++) {
1811       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
1812                                       llvm->draw->sampler_views[PIPE_SHADER_VERTEX][i]);
1813    }
1814
1815    return key;
1816 }
1817
1818
1819 void
1820 draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
1821 {
1822    unsigned i;
1823    struct draw_sampler_static_state *sampler = draw_llvm_variant_key_samplers(key);
1824
1825    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
1826    debug_printf("clip_xy = %u\n", key->clip_xy);
1827    debug_printf("clip_z = %u\n", key->clip_z);
1828    debug_printf("clip_user = %u\n", key->clip_user);
1829    debug_printf("bypass_viewport = %u\n", key->bypass_viewport);
1830    debug_printf("clip_halfz = %u\n", key->clip_halfz);
1831    debug_printf("need_edgeflags = %u\n", key->need_edgeflags);
1832    debug_printf("has_gs = %u\n", key->has_gs);
1833    debug_printf("ucp_enable = %u\n", key->ucp_enable);
1834
1835    for (i = 0 ; i < key->nr_vertex_elements; i++) {
1836       debug_printf("vertex_element[%i].src_offset = %u\n", i, key->vertex_element[i].src_offset);
1837       debug_printf("vertex_element[%i].instance_divisor = %u\n", i, key->vertex_element[i].instance_divisor);
1838       debug_printf("vertex_element[%i].vertex_buffer_index = %u\n", i, key->vertex_element[i].vertex_buffer_index);
1839       debug_printf("vertex_element[%i].src_format = %s\n", i, util_format_name(key->vertex_element[i].src_format));
1840    }
1841
1842    for (i = 0 ; i < key->nr_sampler_views; i++) {
1843       debug_printf("sampler[%i].src_format = %s\n", i, util_format_name(sampler[i].texture_state.format));
1844    }
1845 }
1846
1847
1848 void
1849 draw_llvm_set_mapped_texture(struct draw_context *draw,
1850                              unsigned shader_stage,
1851                              unsigned sview_idx,
1852                              uint32_t width, uint32_t height, uint32_t depth,
1853                              uint32_t first_level, uint32_t last_level,
1854                              const void *base_ptr,
1855                              uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
1856                              uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
1857                              uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])
1858 {
1859    unsigned j;
1860    struct draw_jit_texture *jit_tex;
1861
1862    assert(shader_stage == PIPE_SHADER_VERTEX ||
1863           shader_stage == PIPE_SHADER_GEOMETRY);
1864
1865    if (shader_stage == PIPE_SHADER_VERTEX) {
1866       assert(sview_idx < Elements(draw->llvm->jit_context.textures));
1867
1868       jit_tex = &draw->llvm->jit_context.textures[sview_idx];
1869    } else if (shader_stage == PIPE_SHADER_GEOMETRY) {
1870       assert(sview_idx < Elements(draw->llvm->gs_jit_context.textures));
1871
1872       jit_tex = &draw->llvm->gs_jit_context.textures[sview_idx];
1873    } else {
1874       assert(0);
1875       return;
1876    }
1877
1878    jit_tex->width = width;
1879    jit_tex->height = height;
1880    jit_tex->depth = depth;
1881    jit_tex->first_level = first_level;
1882    jit_tex->last_level = last_level;
1883    jit_tex->base = base_ptr;
1884
1885    for (j = first_level; j <= last_level; j++) {
1886       jit_tex->mip_offsets[j] = mip_offsets[j];
1887       jit_tex->row_stride[j] = row_stride[j];
1888       jit_tex->img_stride[j] = img_stride[j];
1889    }
1890 }
1891
1892
1893 void
1894 draw_llvm_set_sampler_state(struct draw_context *draw, 
1895                             unsigned shader_type)
1896 {
1897    unsigned i;
1898
1899    if (shader_type == PIPE_SHADER_VERTEX) {
1900       for (i = 0; i < draw->num_samplers[PIPE_SHADER_VERTEX]; i++) {
1901          struct draw_jit_sampler *jit_sam = &draw->llvm->jit_context.samplers[i];
1902
1903          if (draw->samplers[i]) {
1904             const struct pipe_sampler_state *s
1905                = draw->samplers[PIPE_SHADER_VERTEX][i];
1906             jit_sam->min_lod = s->min_lod;
1907             jit_sam->max_lod = s->max_lod;
1908             jit_sam->lod_bias = s->lod_bias;
1909             COPY_4V(jit_sam->border_color, s->border_color.f);
1910          }
1911       }
1912    } else if (shader_type == PIPE_SHADER_GEOMETRY) {
1913       for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) {
1914          struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i];
1915
1916          if (draw->samplers[i]) {
1917             const struct pipe_sampler_state *s
1918                = draw->samplers[PIPE_SHADER_GEOMETRY][i];
1919             jit_sam->min_lod = s->min_lod;
1920             jit_sam->max_lod = s->max_lod;
1921             jit_sam->lod_bias = s->lod_bias;
1922             COPY_4V(jit_sam->border_color, s->border_color.f);
1923          }
1924       }
1925    }
1926 }
1927
1928
1929 void
1930 draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
1931 {
1932    struct draw_llvm *llvm = variant->llvm;
1933
1934    if (variant->function_elts) {
1935       gallivm_free_function(variant->gallivm,
1936                             variant->function_elts, variant->jit_func_elts);
1937    }
1938
1939    if (variant->function) {
1940       gallivm_free_function(variant->gallivm,
1941                             variant->function, variant->jit_func);
1942    }
1943
1944    gallivm_destroy(variant->gallivm);
1945
1946    remove_from_list(&variant->list_item_local);
1947    variant->shader->variants_cached--;
1948    remove_from_list(&variant->list_item_global);
1949    llvm->nr_variants--;
1950    FREE(variant);
1951 }
1952
1953
1954 /**
1955  * Create LLVM types for various structures.
1956  */
1957 static void
1958 create_gs_jit_types(struct draw_gs_llvm_variant *var)
1959 {
1960    struct gallivm_state *gallivm = var->gallivm;
1961    LLVMTypeRef texture_type, sampler_type, context_type;
1962
1963    texture_type = create_jit_texture_type(gallivm, "texture");
1964    sampler_type = create_jit_sampler_type(gallivm, "sampler");
1965
1966    context_type = create_gs_jit_context_type(gallivm,
1967                                              var->shader->base.vector_length,
1968                                              texture_type, sampler_type,
1969                                              "draw_gs_jit_context");
1970    var->context_ptr_type = LLVMPointerType(context_type, 0);
1971
1972    var->input_array_type = create_gs_jit_input_type(gallivm);
1973 }
1974
1975 static LLVMTypeRef
1976 get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)
1977 {
1978    if (!variant->context_ptr_type)
1979       create_gs_jit_types(variant);
1980    return variant->context_ptr_type;
1981 }
1982
1983 static LLVMValueRef
1984 generate_mask_value(struct draw_gs_llvm_variant *variant,
1985                     struct lp_type gs_type)
1986 {
1987    struct gallivm_state *gallivm = variant->gallivm;
1988    LLVMBuilderRef builder = gallivm->builder;
1989    LLVMValueRef bits[16];
1990    struct lp_type  mask_type = lp_int_type(gs_type);
1991    struct lp_type mask_elem_type = lp_elem_type(mask_type);
1992    LLVMValueRef mask_val = lp_build_const_vec(gallivm,
1993                                               mask_type,
1994                                               0);
1995    unsigned i;
1996
1997    assert(gs_type.length <= Elements(bits));
1998
1999    for (i = gs_type.length; i >= 1; --i) {
2000       int idx = i - 1;
2001       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
2002       bits[idx] = lp_build_compare(gallivm,
2003                                    mask_elem_type, PIPE_FUNC_GEQUAL,
2004                                    variant->num_prims, ind);
2005    }
2006    for (i = 0; i < gs_type.length; ++i) {
2007       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
2008       mask_val = LLVMBuildInsertElement(builder, mask_val, bits[i], ind, "");
2009    }
2010    mask_val = lp_build_compare(gallivm,
2011                                mask_type, PIPE_FUNC_NOTEQUAL,
2012                                mask_val,
2013                                lp_build_const_int_vec(gallivm, mask_type, 0));
2014
2015    return mask_val;
2016 }
2017
2018 static void
2019 draw_gs_llvm_generate(struct draw_llvm *llvm,
2020                       struct draw_gs_llvm_variant *variant)
2021 {
2022    struct gallivm_state *gallivm = variant->gallivm;
2023    LLVMContextRef context = gallivm->context;
2024    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
2025    LLVMTypeRef arg_types[6];
2026    LLVMTypeRef func_type;
2027    LLVMValueRef variant_func;
2028    LLVMValueRef context_ptr;
2029    LLVMValueRef prim_id_ptr;
2030    LLVMBasicBlockRef block;
2031    LLVMBuilderRef builder;
2032    LLVMValueRef io_ptr, input_array, num_prims, mask_val;
2033    struct lp_build_sampler_soa *sampler = 0;
2034    struct lp_build_context bld;
2035    struct lp_bld_tgsi_system_values system_values;
2036    struct lp_type gs_type;
2037    unsigned i;
2038    struct draw_gs_llvm_iface gs_iface;
2039    const struct tgsi_token *tokens = variant->shader->base.state.tokens;
2040    LLVMValueRef consts_ptr;
2041    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
2042    struct lp_build_mask_context mask;
2043    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
2044    unsigned vector_length = variant->shader->base.vector_length;
2045
2046    memset(&system_values, 0, sizeof(system_values));
2047
2048    assert(variant->vertex_header_ptr_type);
2049
2050    arg_types[0] = get_gs_context_ptr_type(variant);    /* context */
2051    arg_types[1] = variant->input_array_type;           /* input */
2052    arg_types[2] = variant->vertex_header_ptr_type;     /* vertex_header */
2053    arg_types[3] = int32_type;                          /* num_prims */
2054    arg_types[4] = int32_type;                          /* instance_id */
2055    arg_types[5] = LLVMPointerType(
2056       LLVMVectorType(int32_type, vector_length), 0);   /* prim_id_ptr */
2057
2058    func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
2059
2060    variant_func = LLVMAddFunction(gallivm->module, "draw_geometry_shader",
2061                                   func_type);
2062    variant->function = variant_func;
2063
2064    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
2065
2066    for (i = 0; i < Elements(arg_types); ++i)
2067       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
2068          LLVMAddAttribute(LLVMGetParam(variant_func, i),
2069                           LLVMNoAliasAttribute);
2070
2071    context_ptr               = LLVMGetParam(variant_func, 0);
2072    input_array               = LLVMGetParam(variant_func, 1);
2073    io_ptr                    = LLVMGetParam(variant_func, 2);
2074    num_prims                 = LLVMGetParam(variant_func, 3);
2075    system_values.instance_id = LLVMGetParam(variant_func, 4);
2076    prim_id_ptr               = LLVMGetParam(variant_func, 5);
2077
2078    lp_build_name(context_ptr, "context");
2079    lp_build_name(input_array, "input");
2080    lp_build_name(io_ptr, "io");
2081    lp_build_name(num_prims, "num_prims");
2082    lp_build_name(system_values.instance_id, "instance_id");
2083    lp_build_name(prim_id_ptr, "prim_id_ptr");
2084
2085    variant->context_ptr = context_ptr;
2086    variant->io_ptr = io_ptr;
2087    variant->num_prims = num_prims;
2088
2089    gs_iface.base.fetch_input = draw_gs_llvm_fetch_input;
2090    gs_iface.base.emit_vertex = draw_gs_llvm_emit_vertex;
2091    gs_iface.base.end_primitive = draw_gs_llvm_end_primitive;
2092    gs_iface.base.gs_epilogue = draw_gs_llvm_epilogue;
2093    gs_iface.input = input_array;
2094    gs_iface.variant = variant;
2095
2096    /*
2097     * Function body
2098     */
2099
2100    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
2101    builder = gallivm->builder;
2102    LLVMPositionBuilderAtEnd(builder, block);
2103
2104    lp_build_context_init(&bld, gallivm, lp_type_int(32));
2105
2106    memset(&gs_type, 0, sizeof gs_type);
2107    gs_type.floating = TRUE; /* floating point values */
2108    gs_type.sign = TRUE;     /* values are signed */
2109    gs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
2110    gs_type.width = 32;      /* 32-bit float */
2111    gs_type.length = vector_length;
2112
2113    consts_ptr = draw_gs_jit_context_constants(variant->gallivm, context_ptr);
2114
2115    /* code generated texture sampling */
2116    sampler = draw_llvm_sampler_soa_create(variant->key.samplers,
2117                                           context_ptr);
2118
2119    mask_val = generate_mask_value(variant, gs_type);
2120    lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);
2121
2122    if (gs_info->uses_primid) {
2123       system_values.prim_id = LLVMBuildLoad(builder, prim_id_ptr, "prim_id");;
2124    }
2125
2126    lp_build_tgsi_soa(variant->gallivm,
2127                      tokens,
2128                      gs_type,
2129                      &mask,
2130                      consts_ptr,
2131                      &system_values,
2132                      NULL,
2133                      outputs,
2134                      sampler,
2135                      &llvm->draw->gs.geometry_shader->info,
2136                      (const struct lp_build_tgsi_gs_iface *)&gs_iface);
2137
2138    sampler->destroy(sampler);
2139
2140    lp_build_mask_end(&mask);
2141
2142    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
2143
2144    gallivm_verify_function(gallivm, variant_func);
2145 }
2146
2147
2148 struct draw_gs_llvm_variant *
2149 draw_gs_llvm_create_variant(struct draw_llvm *llvm,
2150                             unsigned num_outputs,
2151                             const struct draw_gs_llvm_variant_key *key)
2152 {
2153    struct draw_gs_llvm_variant *variant;
2154    struct llvm_geometry_shader *shader =
2155       llvm_geometry_shader(llvm->draw->gs.geometry_shader);
2156    LLVMTypeRef vertex_header;
2157
2158    variant = MALLOC(sizeof *variant +
2159                     shader->variant_key_size -
2160                     sizeof variant->key);
2161    if (variant == NULL)
2162       return NULL;
2163
2164    variant->llvm = llvm;
2165    variant->shader = shader;
2166
2167    variant->gallivm = gallivm_create();
2168
2169    create_gs_jit_types(variant);
2170
2171    memcpy(&variant->key, key, shader->variant_key_size);
2172
2173    vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs);
2174
2175    variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
2176
2177    draw_gs_llvm_generate(llvm, variant);
2178
2179    gallivm_compile_module(variant->gallivm);
2180
2181    variant->jit_func = (draw_gs_jit_func)
2182          gallivm_jit_function(variant->gallivm, variant->function);
2183
2184    variant->list_item_global.base = variant;
2185    variant->list_item_local.base = variant;
2186    /*variant->no = */shader->variants_created++;
2187    variant->list_item_global.base = variant;
2188
2189    return variant;
2190 }
2191
2192 void
2193 draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)
2194 {
2195    struct draw_llvm *llvm = variant->llvm;
2196
2197    if (variant->function) {
2198       gallivm_free_function(variant->gallivm,
2199                             variant->function, variant->jit_func);
2200    }
2201
2202    gallivm_destroy(variant->gallivm);
2203
2204    remove_from_list(&variant->list_item_local);
2205    variant->shader->variants_cached--;
2206    remove_from_list(&variant->list_item_global);
2207    llvm->nr_gs_variants--;
2208    FREE(variant);
2209 }
2210
2211 struct draw_gs_llvm_variant_key *
2212 draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
2213 {
2214    unsigned i;
2215    struct draw_gs_llvm_variant_key *key;
2216    struct draw_sampler_static_state *draw_sampler;
2217
2218    key = (struct draw_gs_llvm_variant_key *)store;
2219
2220    /* All variants of this shader will have the same value for
2221     * nr_samplers.  Not yet trying to compact away holes in the
2222     * sampler array.
2223     */
2224    key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2225    if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2226       key->nr_sampler_views =
2227          llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2228    }
2229    else {
2230       key->nr_sampler_views = key->nr_samplers;
2231    }
2232
2233    draw_sampler = key->samplers;
2234
2235    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
2236
2237    for (i = 0 ; i < key->nr_samplers; i++) {
2238       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
2239                                       llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);
2240    }
2241    for (i = 0 ; i < key->nr_sampler_views; i++) {
2242       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
2243                                       llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);
2244    }
2245
2246    return key;
2247 }
2248
2249 void
2250 draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)
2251 {
2252    unsigned i;
2253    struct draw_sampler_static_state *sampler = key->samplers;
2254
2255    for (i = 0 ; i < key->nr_sampler_views; i++) {
2256       debug_printf("sampler[%i].src_format = %s\n", i,
2257                    util_format_name(sampler[i].texture_state.format));
2258    }
2259 }