OSDN Git Service

gallivm: fix bogus aos path detection
[android-x86/external-mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_soa.c
1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27
28 /**
29  * @file
30  * Texture sampling -- SoA.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  * @author Brian Paul <brianp@vmware.com>
34  */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_shader_tokens.h"
39 #include "util/u_debug.h"
40 #include "util/u_dump.h"
41 #include "util/u_memory.h"
42 #include "util/u_math.h"
43 #include "util/u_format.h"
44 #include "util/u_cpu_detect.h"
45 #include "util/u_format_rgb9e5.h"
46 #include "lp_bld_debug.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_const.h"
49 #include "lp_bld_conv.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_bitarit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_printf.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_format.h"
58 #include "lp_bld_sample.h"
59 #include "lp_bld_sample_aos.h"
60 #include "lp_bld_struct.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_pack.h"
63
64
65 /**
66  * Generate code to fetch a texel from a texture at int coords (x, y, z).
67  * The computation depends on whether the texture is 1D, 2D or 3D.
68  * The result, texel, will be float vectors:
69  *   texel[0] = red values
70  *   texel[1] = green values
71  *   texel[2] = blue values
72  *   texel[3] = alpha values
73  */
74 static void
75 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
76                           unsigned sampler_unit,
77                           LLVMValueRef width,
78                           LLVMValueRef height,
79                           LLVMValueRef depth,
80                           LLVMValueRef x,
81                           LLVMValueRef y,
82                           LLVMValueRef z,
83                           LLVMValueRef y_stride,
84                           LLVMValueRef z_stride,
85                           LLVMValueRef data_ptr,
86                           LLVMValueRef mipoffsets,
87                           LLVMValueRef texel_out[4])
88 {
89    const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
90    const unsigned dims = bld->dims;
91    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
92    LLVMBuilderRef builder = bld->gallivm->builder;
93    LLVMValueRef offset;
94    LLVMValueRef i, j;
95    LLVMValueRef use_border = NULL;
96
97    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
98    if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
99                                               static_state->min_img_filter,
100                                               static_state->mag_img_filter)) {
101       LLVMValueRef b1, b2;
102       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
103       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
104       use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
105    }
106
107    if (dims >= 2 &&
108        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
109                                               static_state->min_img_filter,
110                                               static_state->mag_img_filter)) {
111       LLVMValueRef b1, b2;
112       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
113       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
114       if (use_border) {
115          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
116          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
117       }
118       else {
119          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
120       }
121    }
122
123    if (dims == 3 &&
124        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
125                                               static_state->min_img_filter,
126                                               static_state->mag_img_filter)) {
127       LLVMValueRef b1, b2;
128       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
129       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
130       if (use_border) {
131          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
132          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
133       }
134       else {
135          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
136       }
137    }
138
139    /* convert x,y,z coords to linear offset from start of texture, in bytes */
140    lp_build_sample_offset(&bld->int_coord_bld,
141                           bld->format_desc,
142                           x, y, z, y_stride, z_stride,
143                           &offset, &i, &j);
144    if (mipoffsets) {
145       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
146    }
147
148    if (use_border) {
149       /* If we can sample the border color, it means that texcoords may
150        * lie outside the bounds of the texture image.  We need to do
151        * something to prevent reading out of bounds and causing a segfault.
152        *
153        * Simply AND the texture coords with !use_border.  This will cause
154        * coords which are out of bounds to become zero.  Zero's guaranteed
155        * to be inside the texture image.
156        */
157       offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
158    }
159
160    lp_build_fetch_rgba_soa(bld->gallivm,
161                            bld->format_desc,
162                            bld->texel_type,
163                            data_ptr, offset,
164                            i, j,
165                            texel_out);
166
167    /*
168     * Note: if we find an app which frequently samples the texture border
169     * we might want to implement a true conditional here to avoid sampling
170     * the texture whenever possible (since that's quite a bit of code).
171     * Ex:
172     *   if (use_border) {
173     *      texel = border_color;
174     *   }
175     *   else {
176     *      texel = sample_texture(coord);
177     *   }
178     * As it is now, we always sample the texture, then selectively replace
179     * the texel color results with the border color.
180     */
181
182    if (use_border) {
183       /* select texel color or border color depending on use_border. */
184       const struct util_format_description *format_desc = bld->format_desc;
185       int chan;
186       struct lp_type border_type = bld->texel_type;
187       border_type.length = 4;
188       /*
189        * Only replace channels which are actually present. The others should
190        * get optimized away eventually by sampler_view swizzle anyway but it's
191        * easier too.
192        */
193       for (chan = 0; chan < 4; chan++) {
194          unsigned chan_s;
195          /* reverse-map channel... */
196          for (chan_s = 0; chan_s < 4; chan_s++) {
197             if (chan_s == format_desc->swizzle[chan]) {
198                break;
199             }
200          }
201          if (chan_s <= 3) {
202             /* use the already clamped color */
203             LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
204             LLVMValueRef border_chan;
205
206             border_chan = lp_build_extract_broadcast(bld->gallivm,
207                                                      border_type,
208                                                      bld->texel_type,
209                                                      bld->border_color_clamped,
210                                                      idx);
211             texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
212                                               border_chan, texel_out[chan]);
213          }
214       }
215    }
216 }
217
218
219 /**
220  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
221  */
222 static LLVMValueRef
223 lp_build_coord_mirror(struct lp_build_sample_context *bld,
224                       LLVMValueRef coord)
225 {
226    struct lp_build_context *coord_bld = &bld->coord_bld;
227    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
228    LLVMValueRef fract, flr, isOdd;
229
230    lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
231
232    /* isOdd = flr & 1 */
233    isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
234
235    /* make coord positive or negative depending on isOdd */
236    coord = lp_build_set_sign(coord_bld, fract, isOdd);
237
238    /* convert isOdd to float */
239    isOdd = lp_build_int_to_float(coord_bld, isOdd);
240
241    /* add isOdd to coord */
242    coord = lp_build_add(coord_bld, coord, isOdd);
243
244    return coord;
245 }
246
247
248 /**
249  * Helper to compute the first coord and the weight for
250  * linear wrap repeat npot textures
251  */
252 void
253 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
254                                   LLVMValueRef coord_f,
255                                   LLVMValueRef length_i,
256                                   LLVMValueRef length_f,
257                                   LLVMValueRef *coord0_i,
258                                   LLVMValueRef *weight_f)
259 {
260    struct lp_build_context *coord_bld = &bld->coord_bld;
261    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
262    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
263    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
264                                                 int_coord_bld->one);
265    LLVMValueRef mask;
266    /* wrap with normalized floats is just fract */
267    coord_f = lp_build_fract(coord_bld, coord_f);
268    /* mul by size and subtract 0.5 */
269    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
270    coord_f = lp_build_sub(coord_bld, coord_f, half);
271    /*
272     * we avoided the 0.5/length division before the repeat wrap,
273     * now need to fix up edge cases with selects
274     */
275    /* convert to int, compute lerp weight */
276    lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
277    mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
278                            PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
279    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
280 }
281
282
283 /**
284  * Build LLVM code for texture wrap mode for linear filtering.
285  * \param x0_out  returns first integer texcoord
286  * \param x1_out  returns second integer texcoord
287  * \param weight_out  returns linear interpolation weight
288  */
289 static void
290 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
291                             LLVMValueRef coord,
292                             LLVMValueRef length,
293                             LLVMValueRef length_f,
294                             LLVMValueRef offset,
295                             boolean is_pot,
296                             unsigned wrap_mode,
297                             LLVMValueRef *x0_out,
298                             LLVMValueRef *x1_out,
299                             LLVMValueRef *weight_out)
300 {
301    struct lp_build_context *coord_bld = &bld->coord_bld;
302    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
303    LLVMBuilderRef builder = bld->gallivm->builder;
304    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
305    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
306    LLVMValueRef coord0, coord1, weight;
307
308    switch(wrap_mode) {
309    case PIPE_TEX_WRAP_REPEAT:
310       if (is_pot) {
311          /* mul by size and subtract 0.5 */
312          coord = lp_build_mul(coord_bld, coord, length_f);
313          coord = lp_build_sub(coord_bld, coord, half);
314          if (offset) {
315             offset = lp_build_int_to_float(coord_bld, offset);
316             coord = lp_build_add(coord_bld, coord, offset);
317          }
318          /* convert to int, compute lerp weight */
319          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
320          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
321          /* repeat wrap */
322          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
323          coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
324       }
325       else {
326          LLVMValueRef mask;
327          if (offset) {
328             offset = lp_build_int_to_float(coord_bld, offset);
329             offset = lp_build_div(coord_bld, offset, length_f);
330             coord = lp_build_add(coord_bld, coord, offset);
331          }
332          lp_build_coord_repeat_npot_linear(bld, coord,
333                                            length, length_f,
334                                            &coord0, &weight);
335          mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
336                                  PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
337          coord1 = LLVMBuildAnd(builder,
338                                lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
339                                mask, "");
340       }
341       break;
342
343    case PIPE_TEX_WRAP_CLAMP:
344       if (bld->static_sampler_state->normalized_coords) {
345          /* scale coord to length */
346          coord = lp_build_mul(coord_bld, coord, length_f);
347       }
348       if (offset) {
349          offset = lp_build_int_to_float(coord_bld, offset);
350          coord = lp_build_add(coord_bld, coord, offset);
351       }
352
353       /* clamp to [0, length] */
354       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
355
356       coord = lp_build_sub(coord_bld, coord, half);
357
358       /* convert to int, compute lerp weight */
359       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
360       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
361       break;
362
363    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
364       {
365          struct lp_build_context abs_coord_bld = bld->coord_bld;
366          abs_coord_bld.type.sign = FALSE;
367
368          if (bld->static_sampler_state->normalized_coords) {
369             /* mul by tex size */
370             coord = lp_build_mul(coord_bld, coord, length_f);
371          }
372          if (offset) {
373             offset = lp_build_int_to_float(coord_bld, offset);
374             coord = lp_build_add(coord_bld, coord, offset);
375          }
376
377          /* clamp to length max */
378          coord = lp_build_min(coord_bld, coord, length_f);
379          /* subtract 0.5 */
380          coord = lp_build_sub(coord_bld, coord, half);
381          /* clamp to [0, length - 0.5] */
382          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
383          /* convert to int, compute lerp weight */
384          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
385          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
386          /* coord1 = min(coord1, length-1) */
387          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
388          break;
389       }
390
391    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
392       if (bld->static_sampler_state->normalized_coords) {
393          /* scale coord to length */
394          coord = lp_build_mul(coord_bld, coord, length_f);
395       }
396       if (offset) {
397          offset = lp_build_int_to_float(coord_bld, offset);
398          coord = lp_build_add(coord_bld, coord, offset);
399       }
400       /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
401       /* can skip clamp (though might not work for very large coord values */
402       coord = lp_build_sub(coord_bld, coord, half);
403       /* convert to int, compute lerp weight */
404       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
405       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
406       break;
407
408    case PIPE_TEX_WRAP_MIRROR_REPEAT:
409       /* compute mirror function */
410       coord = lp_build_coord_mirror(bld, coord);
411
412       /* scale coord to length */
413       coord = lp_build_mul(coord_bld, coord, length_f);
414       coord = lp_build_sub(coord_bld, coord, half);
415       if (offset) {
416          offset = lp_build_int_to_float(coord_bld, offset);
417          coord = lp_build_add(coord_bld, coord, offset);
418       }
419
420       /* convert to int, compute lerp weight */
421       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
422       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
423
424       /* coord0 = max(coord0, 0) */
425       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
426       /* coord1 = min(coord1, length-1) */
427       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
428       break;
429
430    case PIPE_TEX_WRAP_MIRROR_CLAMP:
431       if (bld->static_sampler_state->normalized_coords) {
432          /* scale coord to length */
433          coord = lp_build_mul(coord_bld, coord, length_f);
434       }
435       if (offset) {
436          offset = lp_build_int_to_float(coord_bld, offset);
437          coord = lp_build_add(coord_bld, coord, offset);
438       }
439       coord = lp_build_abs(coord_bld, coord);
440
441       /* clamp to [0, length] */
442       coord = lp_build_min(coord_bld, coord, length_f);
443
444       coord = lp_build_sub(coord_bld, coord, half);
445
446       /* convert to int, compute lerp weight */
447       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
448       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
449       break;
450
451    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
452       {
453          struct lp_build_context abs_coord_bld = bld->coord_bld;
454          abs_coord_bld.type.sign = FALSE;
455
456          if (bld->static_sampler_state->normalized_coords) {
457             /* scale coord to length */
458             coord = lp_build_mul(coord_bld, coord, length_f);
459          }
460          if (offset) {
461             offset = lp_build_int_to_float(coord_bld, offset);
462             coord = lp_build_add(coord_bld, coord, offset);
463          }
464          coord = lp_build_abs(coord_bld, coord);
465
466          /* clamp to length max */
467          coord = lp_build_min(coord_bld, coord, length_f);
468          /* subtract 0.5 */
469          coord = lp_build_sub(coord_bld, coord, half);
470          /* clamp to [0, length - 0.5] */
471          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
472
473          /* convert to int, compute lerp weight */
474          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
475          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
476          /* coord1 = min(coord1, length-1) */
477          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
478       }
479       break;
480
481    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
482       {
483          if (bld->static_sampler_state->normalized_coords) {
484             /* scale coord to length */
485             coord = lp_build_mul(coord_bld, coord, length_f);
486          }
487          if (offset) {
488             offset = lp_build_int_to_float(coord_bld, offset);
489             coord = lp_build_add(coord_bld, coord, offset);
490          }
491          coord = lp_build_abs(coord_bld, coord);
492
493          /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
494          /* skip clamp - always positive, and other side
495             only potentially matters for very large coords */
496          coord = lp_build_sub(coord_bld, coord, half);
497
498          /* convert to int, compute lerp weight */
499          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
500          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
501       }
502       break;
503
504    default:
505       assert(0);
506       coord0 = NULL;
507       coord1 = NULL;
508       weight = NULL;
509    }
510
511    *x0_out = coord0;
512    *x1_out = coord1;
513    *weight_out = weight;
514 }
515
516
517 /**
518  * Build LLVM code for texture wrap mode for nearest filtering.
519  * \param coord  the incoming texcoord (nominally in [0,1])
520  * \param length  the texture size along one dimension, as int vector
521  * \param length_f  the texture size along one dimension, as float vector
522  * \param offset  texel offset along one dimension (as int vector)
523  * \param is_pot  if TRUE, length is a power of two
524  * \param wrap_mode  one of PIPE_TEX_WRAP_x
525  */
526 static LLVMValueRef
527 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
528                              LLVMValueRef coord,
529                              LLVMValueRef length,
530                              LLVMValueRef length_f,
531                              LLVMValueRef offset,
532                              boolean is_pot,
533                              unsigned wrap_mode)
534 {
535    struct lp_build_context *coord_bld = &bld->coord_bld;
536    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
537    LLVMBuilderRef builder = bld->gallivm->builder;
538    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
539    LLVMValueRef icoord;
540    
541    switch(wrap_mode) {
542    case PIPE_TEX_WRAP_REPEAT:
543       if (is_pot) {
544          coord = lp_build_mul(coord_bld, coord, length_f);
545          icoord = lp_build_ifloor(coord_bld, coord);
546          if (offset) {
547             icoord = lp_build_add(int_coord_bld, icoord, offset);
548          }
549          icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
550       }
551       else {
552           if (offset) {
553              offset = lp_build_int_to_float(coord_bld, offset);
554              offset = lp_build_div(coord_bld, offset, length_f);
555              coord = lp_build_add(coord_bld, coord, offset);
556           }
557           /* take fraction, unnormalize */
558           coord = lp_build_fract_safe(coord_bld, coord);
559           coord = lp_build_mul(coord_bld, coord, length_f);
560           icoord = lp_build_itrunc(coord_bld, coord);
561       }
562       break;
563
564    case PIPE_TEX_WRAP_CLAMP:
565    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
566       if (bld->static_sampler_state->normalized_coords) {
567          /* scale coord to length */
568          coord = lp_build_mul(coord_bld, coord, length_f);
569       }
570
571       /* floor */
572       /* use itrunc instead since we clamp to 0 anyway */
573       icoord = lp_build_itrunc(coord_bld, coord);
574       if (offset) {
575          icoord = lp_build_add(int_coord_bld, icoord, offset);
576       }
577
578       /* clamp to [0, length - 1]. */
579       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
580                               length_minus_one);
581       break;
582
583    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
584       if (bld->static_sampler_state->normalized_coords) {
585          /* scale coord to length */
586          coord = lp_build_mul(coord_bld, coord, length_f);
587       }
588       /* no clamp necessary, border masking will handle this */
589       icoord = lp_build_ifloor(coord_bld, coord);
590       if (offset) {
591          icoord = lp_build_add(int_coord_bld, icoord, offset);
592       }
593       break;
594
595    case PIPE_TEX_WRAP_MIRROR_REPEAT:
596       if (offset) {
597          offset = lp_build_int_to_float(coord_bld, offset);
598          offset = lp_build_div(coord_bld, offset, length_f);
599          coord = lp_build_add(coord_bld, coord, offset);
600       }
601       /* compute mirror function */
602       coord = lp_build_coord_mirror(bld, coord);
603
604       /* scale coord to length */
605       assert(bld->static_sampler_state->normalized_coords);
606       coord = lp_build_mul(coord_bld, coord, length_f);
607
608       /* itrunc == ifloor here */
609       icoord = lp_build_itrunc(coord_bld, coord);
610
611       /* clamp to [0, length - 1] */
612       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
613       break;
614
615    case PIPE_TEX_WRAP_MIRROR_CLAMP:
616    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
617       if (bld->static_sampler_state->normalized_coords) {
618          /* scale coord to length */
619          coord = lp_build_mul(coord_bld, coord, length_f);
620       }
621       if (offset) {
622          offset = lp_build_int_to_float(coord_bld, offset);
623          coord = lp_build_add(coord_bld, coord, offset);
624       }
625       coord = lp_build_abs(coord_bld, coord);
626
627       /* itrunc == ifloor here */
628       icoord = lp_build_itrunc(coord_bld, coord);
629
630       /* clamp to [0, length - 1] */
631       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
632       break;
633
634    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
635       if (bld->static_sampler_state->normalized_coords) {
636          /* scale coord to length */
637          coord = lp_build_mul(coord_bld, coord, length_f);
638       }
639       if (offset) {
640          offset = lp_build_int_to_float(coord_bld, offset);
641          coord = lp_build_add(coord_bld, coord, offset);
642       }
643       coord = lp_build_abs(coord_bld, coord);
644
645       /* itrunc == ifloor here */
646       icoord = lp_build_itrunc(coord_bld, coord);
647       break;
648
649    default:
650       assert(0);
651       icoord = NULL;
652    }
653
654    return icoord;
655 }
656
657
658 /**
659  * Do shadow test/comparison.
660  * \param p shadow ref value
661  * \param texel  the texel to compare against
662  */
663 static LLVMValueRef
664 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
665                             LLVMValueRef p,
666                             LLVMValueRef texel)
667 {
668    struct lp_build_context *texel_bld = &bld->texel_bld;
669    LLVMValueRef res;
670
671    if (0) {
672       //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
673       lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
674    }
675
676    /* result = (p FUNC texel) ? 1 : 0 */
677    /*
678     * honor d3d10 floating point rules here, which state that comparisons
679     * are ordered except NOT_EQUAL which is unordered.
680     */
681    if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
682       res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
683                                  p, texel);
684    }
685    else {
686       res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
687                          p, texel);
688    }
689    return res;
690 }
691
692
693 /**
694  * Generate code to sample a mipmap level with nearest filtering.
695  * If sampling a cube texture, r = cube face in [0,5].
696  */
697 static void
698 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
699                               unsigned sampler_unit,
700                               LLVMValueRef size,
701                               LLVMValueRef row_stride_vec,
702                               LLVMValueRef img_stride_vec,
703                               LLVMValueRef data_ptr,
704                               LLVMValueRef mipoffsets,
705                               LLVMValueRef *coords,
706                               const LLVMValueRef *offsets,
707                               LLVMValueRef colors_out[4])
708 {
709    const unsigned dims = bld->dims;
710    LLVMValueRef width_vec;
711    LLVMValueRef height_vec;
712    LLVMValueRef depth_vec;
713    LLVMValueRef flt_size;
714    LLVMValueRef flt_width_vec;
715    LLVMValueRef flt_height_vec;
716    LLVMValueRef flt_depth_vec;
717    LLVMValueRef x, y = NULL, z = NULL;
718
719    lp_build_extract_image_sizes(bld,
720                                 &bld->int_size_bld,
721                                 bld->int_coord_type,
722                                 size,
723                                 &width_vec, &height_vec, &depth_vec);
724
725    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
726
727    lp_build_extract_image_sizes(bld,
728                                 &bld->float_size_bld,
729                                 bld->coord_type,
730                                 flt_size,
731                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
732
733    /*
734     * Compute integer texcoords.
735     */
736    x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
737                                     flt_width_vec, offsets[0],
738                                     bld->static_texture_state->pot_width,
739                                     bld->static_sampler_state->wrap_s);
740    lp_build_name(x, "tex.x.wrapped");
741
742    if (dims >= 2) {
743       y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
744                                        flt_height_vec, offsets[1],
745                                        bld->static_texture_state->pot_height,
746                                        bld->static_sampler_state->wrap_t);
747       lp_build_name(y, "tex.y.wrapped");
748
749       if (dims == 3) {
750          z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
751                                           flt_depth_vec, offsets[2],
752                                           bld->static_texture_state->pot_depth,
753                                           bld->static_sampler_state->wrap_r);
754          lp_build_name(z, "tex.z.wrapped");
755       }
756    }
757    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
758        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
759        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
760       z = coords[2];
761       lp_build_name(z, "tex.z.layer");
762    }
763
764    /*
765     * Get texture colors.
766     */
767    lp_build_sample_texel_soa(bld, sampler_unit,
768                              width_vec, height_vec, depth_vec,
769                              x, y, z,
770                              row_stride_vec, img_stride_vec,
771                              data_ptr, mipoffsets, colors_out);
772
773    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
774       LLVMValueRef cmpval;
775       cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
776       /* this is really just a AND 1.0, cmpval but llvm is clever enough */
777       colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
778                                       bld->texel_bld.one, bld->texel_bld.zero);
779       colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
780    }
781
782 }
783
784
785 /**
786  * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
787  */
788 static LLVMValueRef
789 lp_build_masklerp(struct lp_build_context *bld,
790                  LLVMValueRef weight,
791                  LLVMValueRef mask0,
792                  LLVMValueRef mask1)
793 {
794    struct gallivm_state *gallivm = bld->gallivm;
795    LLVMBuilderRef builder = gallivm->builder;
796    LLVMValueRef weight2;
797
798    weight2 = lp_build_sub(bld, bld->one, weight);
799    weight = LLVMBuildBitCast(builder, weight,
800                               lp_build_int_vec_type(gallivm, bld->type), "");
801    weight2 = LLVMBuildBitCast(builder, weight2,
802                               lp_build_int_vec_type(gallivm, bld->type), "");
803    weight = LLVMBuildAnd(builder, weight, mask1, "");
804    weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
805    weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
806    weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
807    return lp_build_add(bld, weight, weight2);
808 }
809
810 /**
811  * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
812  */
813 static LLVMValueRef
814 lp_build_masklerp2d(struct lp_build_context *bld,
815                     LLVMValueRef weight0,
816                     LLVMValueRef weight1,
817                     LLVMValueRef mask00,
818                     LLVMValueRef mask01,
819                     LLVMValueRef mask10,
820                     LLVMValueRef mask11)
821 {
822    LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
823    LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
824    return lp_build_lerp(bld, weight1, val0, val1, 0);
825 }
826
827 /**
828  * Generate code to sample a mipmap level with linear filtering.
829  * If sampling a cube texture, r = cube face in [0,5].
830  */
831 static void
832 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
833                              unsigned sampler_unit,
834                              LLVMValueRef size,
835                              LLVMValueRef row_stride_vec,
836                              LLVMValueRef img_stride_vec,
837                              LLVMValueRef data_ptr,
838                              LLVMValueRef mipoffsets,
839                              LLVMValueRef *coords,
840                              const LLVMValueRef *offsets,
841                              LLVMValueRef colors_out[4])
842 {
843    const unsigned dims = bld->dims;
844    LLVMValueRef width_vec;
845    LLVMValueRef height_vec;
846    LLVMValueRef depth_vec;
847    LLVMValueRef flt_size;
848    LLVMValueRef flt_width_vec;
849    LLVMValueRef flt_height_vec;
850    LLVMValueRef flt_depth_vec;
851    LLVMValueRef x0, y0 = NULL, z0 = NULL, x1, y1 = NULL, z1 = NULL;
852    LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
853    LLVMValueRef neighbors[2][2][4];
854    int chan;
855
856    lp_build_extract_image_sizes(bld,
857                                 &bld->int_size_bld,
858                                 bld->int_coord_type,
859                                 size,
860                                 &width_vec, &height_vec, &depth_vec);
861
862    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
863
864    lp_build_extract_image_sizes(bld,
865                                 &bld->float_size_bld,
866                                 bld->coord_type,
867                                 flt_size,
868                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
869
870    /*
871     * Compute integer texcoords.
872     */
873    lp_build_sample_wrap_linear(bld, coords[0], width_vec,
874                                flt_width_vec, offsets[0],
875                                bld->static_texture_state->pot_width,
876                                bld->static_sampler_state->wrap_s,
877                                &x0, &x1, &s_fpart);
878    lp_build_name(x0, "tex.x0.wrapped");
879    lp_build_name(x1, "tex.x1.wrapped");
880
881    if (dims >= 2) {
882       lp_build_sample_wrap_linear(bld, coords[1], height_vec,
883                                   flt_height_vec, offsets[1],
884                                   bld->static_texture_state->pot_height,
885                                   bld->static_sampler_state->wrap_t,
886                                   &y0, &y1, &t_fpart);
887       lp_build_name(y0, "tex.y0.wrapped");
888       lp_build_name(y1, "tex.y1.wrapped");
889
890       if (dims == 3) {
891          lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
892                                      flt_depth_vec, offsets[2],
893                                      bld->static_texture_state->pot_depth,
894                                      bld->static_sampler_state->wrap_r,
895                                      &z0, &z1, &r_fpart);
896          lp_build_name(z0, "tex.z0.wrapped");
897          lp_build_name(z1, "tex.z1.wrapped");
898       }
899    }
900    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
901        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
902        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
903       z0 = z1 = coords[2];  /* cube face or layer */
904       lp_build_name(z0, "tex.z0.layer");
905       lp_build_name(z1, "tex.z1.layer");
906    }
907
908
909    /*
910     * Get texture colors.
911     */
912    /* get x0/x1 texels */
913    lp_build_sample_texel_soa(bld, sampler_unit,
914                              width_vec, height_vec, depth_vec,
915                              x0, y0, z0,
916                              row_stride_vec, img_stride_vec,
917                              data_ptr, mipoffsets, neighbors[0][0]);
918    lp_build_sample_texel_soa(bld, sampler_unit,
919                              width_vec, height_vec, depth_vec,
920                              x1, y0, z0,
921                              row_stride_vec, img_stride_vec,
922                              data_ptr, mipoffsets, neighbors[0][1]);
923
924    if (dims == 1) {
925       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
926          /* Interpolate two samples from 1D image to produce one color */
927          for (chan = 0; chan < 4; chan++) {
928             colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
929                                              neighbors[0][0][chan],
930                                              neighbors[0][1][chan],
931                                              0);
932          }
933       }
934       else {
935          LLVMValueRef cmpval0, cmpval1;
936          cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
937          cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
938          /* simplified lerp, AND mask with weight and add */
939          colors_out[0] = lp_build_masklerp(&bld->texel_bld, s_fpart,
940                                            cmpval0, cmpval1);
941          colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
942       }
943    }
944    else {
945       /* 2D/3D texture */
946       LLVMValueRef colors0[4];
947
948       /* get x0/x1 texels at y1 */
949       lp_build_sample_texel_soa(bld, sampler_unit,
950                                 width_vec, height_vec, depth_vec,
951                                 x0, y1, z0,
952                                 row_stride_vec, img_stride_vec,
953                                 data_ptr, mipoffsets, neighbors[1][0]);
954       lp_build_sample_texel_soa(bld, sampler_unit,
955                                 width_vec, height_vec, depth_vec,
956                                 x1, y1, z0,
957                                 row_stride_vec, img_stride_vec,
958                                 data_ptr, mipoffsets, neighbors[1][1]);
959
960       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
961          /* Bilinear interpolate the four samples from the 2D image / 3D slice */
962          for (chan = 0; chan < 4; chan++) {
963             colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
964                                              s_fpart, t_fpart,
965                                              neighbors[0][0][chan],
966                                              neighbors[0][1][chan],
967                                              neighbors[1][0][chan],
968                                              neighbors[1][1][chan],
969                                              0);
970          }
971       }
972       else {
973          LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
974          cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
975          cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
976          cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
977          cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
978          colors0[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
979                                           cmpval00, cmpval01, cmpval10, cmpval11);
980          colors0[1] = colors0[2] = colors0[3] = colors0[0];
981       }
982
983       if (dims == 3) {
984          LLVMValueRef neighbors1[2][2][4];
985          LLVMValueRef colors1[4];
986
987          /* get x0/x1/y0/y1 texels at z1 */
988          lp_build_sample_texel_soa(bld, sampler_unit,
989                                    width_vec, height_vec, depth_vec,
990                                    x0, y0, z1,
991                                    row_stride_vec, img_stride_vec,
992                                    data_ptr, mipoffsets, neighbors1[0][0]);
993          lp_build_sample_texel_soa(bld, sampler_unit,
994                                    width_vec, height_vec, depth_vec,
995                                    x1, y0, z1,
996                                    row_stride_vec, img_stride_vec,
997                                    data_ptr, mipoffsets, neighbors1[0][1]);
998          lp_build_sample_texel_soa(bld, sampler_unit,
999                                    width_vec, height_vec, depth_vec,
1000                                    x0, y1, z1,
1001                                    row_stride_vec, img_stride_vec,
1002                                    data_ptr, mipoffsets, neighbors1[1][0]);
1003          lp_build_sample_texel_soa(bld, sampler_unit,
1004                                    width_vec, height_vec, depth_vec,
1005                                    x1, y1, z1,
1006                                    row_stride_vec, img_stride_vec,
1007                                    data_ptr, mipoffsets, neighbors1[1][1]);
1008
1009          if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1010             /* Bilinear interpolate the four samples from the second Z slice */
1011             for (chan = 0; chan < 4; chan++) {
1012                colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1013                                                 s_fpart, t_fpart,
1014                                                 neighbors1[0][0][chan],
1015                                                 neighbors1[0][1][chan],
1016                                                 neighbors1[1][0][chan],
1017                                                 neighbors1[1][1][chan],
1018                                                 0);
1019             }
1020             /* Linearly interpolate the two samples from the two 3D slices */
1021             for (chan = 0; chan < 4; chan++) {
1022                colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1023                                                 r_fpart,
1024                                                 colors0[chan], colors1[chan],
1025                                                 0);
1026             }
1027          }
1028          else {
1029             LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1030             cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1031             cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1032             cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1033             cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1034             colors1[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
1035                                              cmpval00, cmpval01, cmpval10, cmpval11);
1036             /* Linearly interpolate the two samples from the two 3D slices */
1037             colors_out[0] = lp_build_lerp(&bld->texel_bld,
1038                                              r_fpart,
1039                                              colors0[0], colors1[0],
1040                                              0);
1041             colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1042          }
1043       }
1044       else {
1045          /* 2D tex */
1046          for (chan = 0; chan < 4; chan++) {
1047             colors_out[chan] = colors0[chan];
1048          }
1049       }
1050    }
1051 }
1052
1053
1054 /**
1055  * Sample the texture/mipmap using given image filter and mip filter.
1056  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1057  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1058  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1059  */
1060 static void
1061 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1062                        unsigned sampler_unit,
1063                        unsigned img_filter,
1064                        unsigned mip_filter,
1065                        LLVMValueRef *coords,
1066                        const LLVMValueRef *offsets,
1067                        LLVMValueRef ilevel0,
1068                        LLVMValueRef ilevel1,
1069                        LLVMValueRef lod_fpart,
1070                        LLVMValueRef *colors_out)
1071 {
1072    LLVMBuilderRef builder = bld->gallivm->builder;
1073    LLVMValueRef size0 = NULL;
1074    LLVMValueRef size1 = NULL;
1075    LLVMValueRef row_stride0_vec = NULL;
1076    LLVMValueRef row_stride1_vec = NULL;
1077    LLVMValueRef img_stride0_vec = NULL;
1078    LLVMValueRef img_stride1_vec = NULL;
1079    LLVMValueRef data_ptr0 = NULL;
1080    LLVMValueRef data_ptr1 = NULL;
1081    LLVMValueRef mipoff0 = NULL;
1082    LLVMValueRef mipoff1 = NULL;
1083    LLVMValueRef colors0[4], colors1[4];
1084    unsigned chan;
1085
1086    /* sample the first mipmap level */
1087    lp_build_mipmap_level_sizes(bld, ilevel0,
1088                                &size0,
1089                                &row_stride0_vec, &img_stride0_vec);
1090    if (bld->num_lods == 1) {
1091       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1092    }
1093    else {
1094       /* This path should work for num_lods 1 too but slightly less efficient */
1095       data_ptr0 = bld->base_ptr;
1096       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1097    }
1098    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1099       lp_build_sample_image_nearest(bld, sampler_unit,
1100                                     size0,
1101                                     row_stride0_vec, img_stride0_vec,
1102                                     data_ptr0, mipoff0, coords, offsets,
1103                                     colors0);
1104    }
1105    else {
1106       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1107       lp_build_sample_image_linear(bld, sampler_unit,
1108                                    size0,
1109                                    row_stride0_vec, img_stride0_vec,
1110                                    data_ptr0, mipoff0, coords, offsets,
1111                                    colors0);
1112    }
1113
1114    /* Store the first level's colors in the output variables */
1115    for (chan = 0; chan < 4; chan++) {
1116        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1117    }
1118
1119    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1120       struct lp_build_if_state if_ctx;
1121       LLVMValueRef need_lerp;
1122
1123       /* need_lerp = lod_fpart > 0 */
1124       if (bld->num_lods == 1) {
1125          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1126                                    lod_fpart, bld->levelf_bld.zero,
1127                                    "need_lerp");
1128       }
1129       else {
1130          /*
1131           * We'll do mip filtering if any of the quads (or individual
1132           * pixel in case of per-pixel lod) need it.
1133           * It might be better to split the vectors here and only fetch/filter
1134           * quads which need it.
1135           */
1136          /*
1137           * We unfortunately need to clamp lod_fpart here since we can get
1138           * negative values which would screw up filtering if not all
1139           * lod_fpart values have same sign.
1140           */
1141          lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
1142                                   bld->levelf_bld.zero);
1143          need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
1144                                       PIPE_FUNC_GREATER,
1145                                       lod_fpart, bld->levelf_bld.zero);
1146          need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, need_lerp);
1147       }
1148
1149       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1150       {
1151          /* sample the second mipmap level */
1152          lp_build_mipmap_level_sizes(bld, ilevel1,
1153                                      &size1,
1154                                      &row_stride1_vec, &img_stride1_vec);
1155          if (bld->num_lods == 1) {
1156             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1157          }
1158          else {
1159             data_ptr1 = bld->base_ptr;
1160             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1161          }
1162          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1163             lp_build_sample_image_nearest(bld, sampler_unit,
1164                                           size1,
1165                                           row_stride1_vec, img_stride1_vec,
1166                                           data_ptr1, mipoff1, coords, offsets,
1167                                           colors1);
1168          }
1169          else {
1170             lp_build_sample_image_linear(bld, sampler_unit,
1171                                          size1,
1172                                          row_stride1_vec, img_stride1_vec,
1173                                          data_ptr1, mipoff1, coords, offsets,
1174                                          colors1);
1175          }
1176
1177          /* interpolate samples from the two mipmap levels */
1178
1179          if (bld->num_lods != bld->coord_type.length)
1180             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1181                                                               bld->levelf_bld.type,
1182                                                               bld->texel_bld.type,
1183                                                               lod_fpart);
1184
1185          for (chan = 0; chan < 4; chan++) {
1186             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1187                                           colors0[chan], colors1[chan],
1188                                           0);
1189             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1190          }
1191       }
1192       lp_build_endif(&if_ctx);
1193    }
1194 }
1195
1196
1197 /**
1198  * Build (per-coord) layer value.
1199  * Either clamp layer to valid values or fill in optional out_of_bounds
1200  * value and just return value unclamped.
1201  */
1202 static LLVMValueRef
1203 lp_build_layer_coord(struct lp_build_sample_context *bld,
1204                      unsigned texture_unit,
1205                      LLVMValueRef layer,
1206                      LLVMValueRef *out_of_bounds)
1207 {
1208    LLVMValueRef num_layers;
1209    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1210
1211    num_layers = bld->dynamic_state->depth(bld->dynamic_state,
1212                                           bld->gallivm, texture_unit);
1213
1214    if (out_of_bounds) {
1215       LLVMValueRef out1, out;
1216       num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
1217       out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
1218       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
1219       *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
1220       return layer;
1221    }
1222    else {
1223       LLVMValueRef maxlayer;
1224       maxlayer = lp_build_sub(&bld->int_bld, num_layers, bld->int_bld.one);
1225       maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
1226       return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
1227    }
1228 }
1229
1230
1231 /**
1232  * Calculate cube face, lod, mip levels.
1233  */
1234 static void
1235 lp_build_sample_common(struct lp_build_sample_context *bld,
1236                        unsigned texture_index,
1237                        unsigned sampler_index,
1238                        LLVMValueRef *coords,
1239                        const struct lp_derivatives *derivs, /* optional */
1240                        LLVMValueRef lod_bias, /* optional */
1241                        LLVMValueRef explicit_lod, /* optional */
1242                        LLVMValueRef *lod_ipart,
1243                        LLVMValueRef *lod_fpart,
1244                        LLVMValueRef *ilevel0,
1245                        LLVMValueRef *ilevel1)
1246 {
1247    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1248    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1249    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1250    const unsigned target = bld->static_texture_state->target;
1251    LLVMValueRef first_level, cube_rho = NULL;
1252
1253    /*
1254    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1255           mip_filter, min_filter, mag_filter);
1256    */
1257
1258    /*
1259     * Choose cube face, recompute texcoords for the chosen face and
1260     * compute rho here too (as it requires transform of derivatives).
1261     */
1262    if (target == PIPE_TEXTURE_CUBE) {
1263       boolean need_derivs;
1264       need_derivs = ((min_filter != mag_filter ||
1265                       mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
1266                       !bld->static_sampler_state->min_max_lod_equal &&
1267                       !explicit_lod);
1268       lp_build_cube_lookup(bld, coords, derivs, &cube_rho, need_derivs);
1269    }
1270    else if (target == PIPE_TEXTURE_1D_ARRAY ||
1271             target == PIPE_TEXTURE_2D_ARRAY) {
1272       coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
1273       coords[2] = lp_build_layer_coord(bld, texture_index, coords[2], NULL);
1274    }
1275
1276    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1277       /*
1278        * Clamp p coords to [0,1] for fixed function depth texture format here.
1279        * Technically this is not entirely correct for unorm depth as the ref value
1280        * should be converted to the depth format (quantization!) and comparison
1281        * then done in texture format. This would actually help performance (since
1282        * only need to do it once and could save the per-sample conversion of texels
1283        * to floats instead), but it would need more messy code (would need to push
1284        * at least some bits down to actual fetch so conversion could be skipped,
1285        * and would have ugly interaction with border color, would need to convert
1286        * border color to that format too or do some other tricks to make it work).
1287        */
1288       const struct util_format_description *format_desc = bld->format_desc;
1289       unsigned chan_type;
1290       /* not entirely sure we couldn't end up with non-valid swizzle here */
1291       chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
1292                      format_desc->channel[format_desc->swizzle[0]].type :
1293                      UTIL_FORMAT_TYPE_FLOAT;
1294       if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
1295          coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
1296                                     bld->coord_bld.zero, bld->coord_bld.one);
1297       }
1298    }
1299
1300    /*
1301     * Compute the level of detail (float).
1302     */
1303    if (min_filter != mag_filter ||
1304        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1305       /* Need to compute lod either to choose mipmap levels or to
1306        * distinguish between minification/magnification with one mipmap level.
1307        */
1308       lp_build_lod_selector(bld, texture_index, sampler_index,
1309                             coords[0], coords[1], coords[2], cube_rho,
1310                             derivs, lod_bias, explicit_lod,
1311                             mip_filter,
1312                             lod_ipart, lod_fpart);
1313    } else {
1314       *lod_ipart = bld->leveli_bld.zero;
1315    }
1316
1317    /*
1318     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
1319     */
1320    switch (mip_filter) {
1321    default:
1322       assert(0 && "bad mip_filter value in lp_build_sample_soa()");
1323       /* fall-through */
1324    case PIPE_TEX_MIPFILTER_NONE:
1325       /* always use mip level 0 */
1326       if (HAVE_LLVM == 0x0207 && target == PIPE_TEXTURE_CUBE) {
1327          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1328           * We should be able to set ilevel0 = const(0) but that causes
1329           * bad x86 code to be emitted.
1330           */
1331          assert(*lod_ipart);
1332          lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
1333       }
1334       else {
1335          first_level = bld->dynamic_state->first_level(bld->dynamic_state,
1336                                                        bld->gallivm, texture_index);
1337          first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
1338          *ilevel0 = first_level;
1339       }
1340       break;
1341    case PIPE_TEX_MIPFILTER_NEAREST:
1342       assert(*lod_ipart);
1343       lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
1344       break;
1345    case PIPE_TEX_MIPFILTER_LINEAR:
1346       assert(*lod_ipart);
1347       assert(*lod_fpart);
1348       lp_build_linear_mip_levels(bld, texture_index,
1349                                  *lod_ipart, lod_fpart,
1350                                  ilevel0, ilevel1);
1351       break;
1352    }
1353 }
1354
1355 static void
1356 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
1357                             unsigned sampler_unit)
1358 {
1359    struct gallivm_state *gallivm = bld->gallivm;
1360    LLVMBuilderRef builder = gallivm->builder;
1361    LLVMValueRef border_color_ptr =
1362       bld->dynamic_state->border_color(bld->dynamic_state,
1363                                         gallivm, sampler_unit);
1364    LLVMValueRef border_color;
1365    const struct util_format_description *format_desc = bld->format_desc;
1366    struct lp_type vec4_type = bld->texel_type;
1367    struct lp_build_context vec4_bld;
1368    LLVMValueRef min_clamp = NULL;
1369    LLVMValueRef max_clamp = NULL;
1370
1371    /*
1372     * For normalized format need to clamp border color (technically
1373     * probably should also quantize the data). Really sucks doing this
1374     * here but can't avoid at least for now since this is part of
1375     * sampler state and texture format is part of sampler_view state.
1376     * GL expects also expects clamping for uint/sint formats too so
1377     * do that as well (d3d10 can't end up here with uint/sint since it
1378     * only supports them with ld).
1379     */
1380    vec4_type.length = 4;
1381    lp_build_context_init(&vec4_bld, gallivm, vec4_type);
1382
1383    /*
1384     * Vectorized clamping of border color. Loading is a bit of a hack since
1385     * we just cast the pointer to float array to pointer to vec4
1386     * (int or float).
1387     */
1388    border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr,
1389                                              lp_build_const_int32(gallivm, 0));
1390    border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
1391                                        LLVMPointerType(vec4_bld.vec_type, 0), "");
1392    border_color = LLVMBuildLoad(builder, border_color_ptr, "");
1393    /* we don't have aligned type in the dynamic state unfortunately */
1394    lp_set_load_alignment(border_color, 4);
1395
1396    /*
1397     * Instead of having some incredibly complex logic which will try to figure out
1398     * clamping necessary for each channel, simply use the first channel, and treat
1399     * mixed signed/unsigned normalized formats specially.
1400     * (Mixed non-normalized, which wouldn't work at all here, do not exist for a
1401     * good reason.)
1402     */
1403    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1404       int chan;
1405       /* d/s needs special handling because both present means just sampling depth */
1406       if (util_format_is_depth_and_stencil(format_desc->format)) {
1407          chan = format_desc->swizzle[0];
1408       }
1409       else {
1410          chan = util_format_get_first_non_void_channel(format_desc->format);
1411       }
1412       if (chan >= 0 && chan <= UTIL_FORMAT_SWIZZLE_W) {
1413          unsigned chan_type = format_desc->channel[chan].type;
1414          unsigned chan_norm = format_desc->channel[chan].normalized;
1415          unsigned chan_pure = format_desc->channel[chan].pure_integer;
1416          if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
1417             if (chan_norm) {
1418                min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1419                max_clamp = vec4_bld.one;
1420             }
1421             else if (chan_pure) {
1422                /*
1423                 * Border color was stored as int, hence need min/max clamp
1424                 * only if chan has less than 32 bits..
1425                 */
1426                unsigned chan_size = format_desc->channel[chan].size < 32;
1427                if (chan_size < 32) {
1428                   min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1429                                                      0 - (1 << (chan_size - 1)));
1430                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1431                                                      (1 << (chan_size - 1)) - 1);
1432                }
1433             }
1434             /* TODO: no idea about non-pure, non-normalized! */
1435          }
1436          else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
1437             if (chan_norm) {
1438                min_clamp = vec4_bld.zero;
1439                max_clamp = vec4_bld.one;
1440             }
1441             /*
1442              * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
1443              * we use Z32_FLOAT_S8X24 to imply sampling depth component
1444              * and ignoring stencil, which will blow up here if we try to
1445              * do a uint clamp in a float texel build...
1446              * And even if we had that format, mesa st also thinks using z24s8
1447              * means depth sampling ignoring stencil.
1448              */
1449             else if (chan_pure) {
1450                /*
1451                 * Border color was stored as uint, hence never need min
1452                 * clamp, and only need max clamp if chan has less than 32 bits.
1453                 */
1454                unsigned chan_size = format_desc->channel[chan].size < 32;
1455                if (chan_size < 32) {
1456                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1457                                                      (1 << chan_size) - 1);
1458                }
1459                /* TODO: no idea about non-pure, non-normalized! */
1460             }
1461          }
1462          else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
1463             /* TODO: I have no idea what clamp this would need if any! */
1464          }
1465       }
1466       /* mixed plain formats (or different pure size) */
1467       switch (format_desc->format) {
1468       case PIPE_FORMAT_B10G10R10A2_UINT:
1469       {
1470          unsigned max10 = (1 << 10) - 1;
1471          max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
1472                                         max10, (1 << 2) - 1, NULL);
1473       }
1474          break;
1475       case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
1476          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
1477                                         -1.0F, 0.0F, NULL);
1478          max_clamp = vec4_bld.one;
1479          break;
1480       case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
1481       case PIPE_FORMAT_R5SG5SB6U_NORM:
1482          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
1483                                         0.0F, 0.0F, NULL);
1484          max_clamp = vec4_bld.one;
1485          break;
1486       default:
1487          break;
1488       }
1489    }
1490    else {
1491       /* cannot figure this out from format description */
1492       if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1493          /* s3tc formats are always unorm */
1494          min_clamp = vec4_bld.zero;
1495          max_clamp = vec4_bld.one;
1496       }
1497       else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
1498                format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
1499          switch (format_desc->format) {
1500          case PIPE_FORMAT_RGTC1_UNORM:
1501          case PIPE_FORMAT_RGTC2_UNORM:
1502          case PIPE_FORMAT_LATC1_UNORM:
1503          case PIPE_FORMAT_LATC2_UNORM:
1504          case PIPE_FORMAT_ETC1_RGB8:
1505             min_clamp = vec4_bld.zero;
1506             max_clamp = vec4_bld.one;
1507             break;
1508          case PIPE_FORMAT_RGTC1_SNORM:
1509          case PIPE_FORMAT_RGTC2_SNORM:
1510          case PIPE_FORMAT_LATC1_SNORM:
1511          case PIPE_FORMAT_LATC2_SNORM:
1512             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1513             max_clamp = vec4_bld.one;
1514             break;
1515          default:
1516             assert(0);
1517             break;
1518          }
1519       }
1520       /*
1521        * all others from subsampled/other group, though we don't care
1522        * about yuv (and should not have any from zs here)
1523        */
1524       else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
1525          switch (format_desc->format) {
1526          case PIPE_FORMAT_R8G8_B8G8_UNORM:
1527          case PIPE_FORMAT_G8R8_G8B8_UNORM:
1528          case PIPE_FORMAT_G8R8_B8R8_UNORM:
1529          case PIPE_FORMAT_R8G8_R8B8_UNORM:
1530          case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
1531             min_clamp = vec4_bld.zero;
1532             max_clamp = vec4_bld.one;
1533             break;
1534          case PIPE_FORMAT_R8G8Bx_SNORM:
1535             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1536             max_clamp = vec4_bld.one;
1537             break;
1538             /*
1539              * Note smallfloat formats usually don't need clamping
1540              * (they still have infinite range) however this is not
1541              * true for r11g11b10 and r9g9b9e5, which can't represent
1542              * negative numbers (and additionally r9g9b9e5 can't represent
1543              * very large numbers). d3d10 seems happy without clamping in
1544              * this case, but gl spec is pretty clear: "for floating
1545              * point and integer formats, border values are clamped to
1546              * the representable range of the format" so do that here.
1547              */
1548          case PIPE_FORMAT_R11G11B10_FLOAT:
1549             min_clamp = vec4_bld.zero;
1550             break;
1551          case PIPE_FORMAT_R9G9B9E5_FLOAT:
1552             min_clamp = vec4_bld.zero;
1553             max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
1554             break;
1555          default:
1556             assert(0);
1557             break;
1558          }
1559       }
1560    }
1561
1562    if (min_clamp) {
1563       border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
1564    }
1565    if (max_clamp) {
1566       border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
1567    }
1568
1569    bld->border_color_clamped = border_color;
1570 }
1571
1572
1573 /**
1574  * General texture sampling codegen.
1575  * This function handles texture sampling for all texture targets (1D,
1576  * 2D, 3D, cube) and all filtering modes.
1577  */
1578 static void
1579 lp_build_sample_general(struct lp_build_sample_context *bld,
1580                         unsigned sampler_unit,
1581                         LLVMValueRef *coords,
1582                         const LLVMValueRef *offsets,
1583                         LLVMValueRef lod_ipart,
1584                         LLVMValueRef lod_fpart,
1585                         LLVMValueRef ilevel0,
1586                         LLVMValueRef ilevel1,
1587                         LLVMValueRef *colors_out)
1588 {
1589    struct lp_build_context *int_bld = &bld->int_bld;
1590    LLVMBuilderRef builder = bld->gallivm->builder;
1591    const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
1592    const unsigned mip_filter = sampler_state->min_mip_filter;
1593    const unsigned min_filter = sampler_state->min_img_filter;
1594    const unsigned mag_filter = sampler_state->mag_img_filter;
1595    LLVMValueRef texels[4];
1596    unsigned chan;
1597
1598    /* if we need border color, (potentially) clamp it now */
1599    if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
1600                                               min_filter,
1601                                               mag_filter) ||
1602        (bld->dims > 1 &&
1603            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
1604                                                   min_filter,
1605                                                   mag_filter)) ||
1606        (bld->dims > 2 &&
1607            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
1608                                                   min_filter,
1609                                                   mag_filter))) {
1610       lp_build_clamp_border_color(bld, sampler_unit);
1611    }
1612
1613
1614    /*
1615     * Get/interpolate texture colors.
1616     */
1617
1618    for (chan = 0; chan < 4; ++chan) {
1619      texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
1620      lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
1621    }
1622
1623    if (min_filter == mag_filter) {
1624       /* no need to distinguish between minification and magnification */
1625       lp_build_sample_mipmap(bld, sampler_unit,
1626                              min_filter, mip_filter,
1627                              coords, offsets,
1628                              ilevel0, ilevel1, lod_fpart,
1629                              texels);
1630    }
1631    else {
1632       /* Emit conditional to choose min image filter or mag image filter
1633        * depending on the lod being > 0 or <= 0, respectively.
1634        */
1635       struct lp_build_if_state if_ctx;
1636       LLVMValueRef minify;
1637
1638       /*
1639        * XXX this should to all lods into account, if some are min
1640        * some max probably could hack up the coords/weights in the linear
1641        * path with selects to work for nearest.
1642        * If that's just two quads sitting next to each other it seems
1643        * quite ok to do the same filtering method on both though, at
1644        * least unless we have explicit lod (and who uses different
1645        * min/mag filter with that?)
1646        */
1647       if (bld->num_lods > 1)
1648          lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
1649                                              lp_build_const_int32(bld->gallivm, 0), "");
1650
1651       /* minify = lod >= 0.0 */
1652       minify = LLVMBuildICmp(builder, LLVMIntSGE,
1653                              lod_ipart, int_bld->zero, "");
1654
1655       lp_build_if(&if_ctx, bld->gallivm, minify);
1656       {
1657          /* Use the minification filter */
1658          lp_build_sample_mipmap(bld, sampler_unit,
1659                                 min_filter, mip_filter,
1660                                 coords, offsets,
1661                                 ilevel0, ilevel1, lod_fpart,
1662                                 texels);
1663       }
1664       lp_build_else(&if_ctx);
1665       {
1666          /* Use the magnification filter */
1667          lp_build_sample_mipmap(bld, sampler_unit,
1668                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1669                                 coords, offsets,
1670                                 ilevel0, NULL, NULL,
1671                                 texels);
1672       }
1673       lp_build_endif(&if_ctx);
1674    }
1675
1676    for (chan = 0; chan < 4; ++chan) {
1677      colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
1678      lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
1679    }
1680 }
1681
1682
1683 /**
1684  * Texel fetch function.
1685  * In contrast to general sampling there is no filtering, no coord minification,
1686  * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
1687  * directly to be applied to the selected mip level (after adding texel offsets).
1688  * This function handles texel fetch for all targets where texel fetch is supported
1689  * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
1690  */
1691 static void
1692 lp_build_fetch_texel(struct lp_build_sample_context *bld,
1693                      unsigned texture_unit,
1694                      const LLVMValueRef *coords,
1695                      LLVMValueRef explicit_lod,
1696                      const LLVMValueRef *offsets,
1697                      LLVMValueRef *colors_out)
1698 {
1699    struct lp_build_context *perquadi_bld = &bld->leveli_bld;
1700    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1701    unsigned dims = bld->dims, chan;
1702    unsigned target = bld->static_texture_state->target;
1703    boolean out_of_bound_ret_zero = TRUE;
1704    LLVMValueRef size, ilevel;
1705    LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
1706    LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
1707    LLVMValueRef width, height, depth, i, j;
1708    LLVMValueRef offset, out_of_bounds, out1;
1709
1710    out_of_bounds = int_coord_bld->zero;
1711
1712    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
1713       if (bld->num_lods != int_coord_bld->type.length) {
1714          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
1715                                             perquadi_bld->type, explicit_lod, 0);
1716       }
1717       else {
1718          ilevel = explicit_lod;
1719       }
1720       lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
1721                                  out_of_bound_ret_zero ? &out_of_bounds : NULL);
1722    }
1723    else {
1724       assert(bld->num_lods == 1);
1725       if (bld->static_texture_state->target != PIPE_BUFFER) {
1726          ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
1727                                                   bld->gallivm, texture_unit);
1728       }
1729       else {
1730          ilevel = lp_build_const_int32(bld->gallivm, 0);
1731       }
1732    }
1733    lp_build_mipmap_level_sizes(bld, ilevel,
1734                                &size,
1735                                &row_stride_vec, &img_stride_vec);
1736    lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
1737                                 size, &width, &height, &depth);
1738
1739    if (target == PIPE_TEXTURE_1D_ARRAY ||
1740        target == PIPE_TEXTURE_2D_ARRAY) {
1741       if (out_of_bound_ret_zero) {
1742          z = lp_build_layer_coord(bld, texture_unit, z, &out1);
1743          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1744       }
1745       else {
1746          z = lp_build_layer_coord(bld, texture_unit, z, NULL);
1747       }
1748    }
1749
1750    /* This is a lot like border sampling */
1751    if (offsets[0]) {
1752       /*
1753        * coords are really unsigned, offsets are signed, but I don't think
1754        * exceeding 31 bits is possible
1755        */
1756       x = lp_build_add(int_coord_bld, x, offsets[0]);
1757    }
1758    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
1759    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1760    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
1761    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1762
1763    if (dims >= 2) {
1764       if (offsets[1]) {
1765          y = lp_build_add(int_coord_bld, y, offsets[1]);
1766       }
1767       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
1768       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1769       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
1770       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1771
1772       if (dims >= 3) {
1773          if (offsets[2]) {
1774             z = lp_build_add(int_coord_bld, z, offsets[2]);
1775          }
1776          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
1777          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1778          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
1779          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1780       }
1781    }
1782
1783    lp_build_sample_offset(int_coord_bld,
1784                           bld->format_desc,
1785                           x, y, z, row_stride_vec, img_stride_vec,
1786                           &offset, &i, &j);
1787
1788    if (bld->static_texture_state->target != PIPE_BUFFER) {
1789       offset = lp_build_add(int_coord_bld, offset,
1790                             lp_build_get_mip_offsets(bld, ilevel));
1791    }
1792
1793    offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
1794
1795    lp_build_fetch_rgba_soa(bld->gallivm,
1796                            bld->format_desc,
1797                            bld->texel_type,
1798                            bld->base_ptr, offset,
1799                            i, j,
1800                            colors_out);
1801
1802    if (out_of_bound_ret_zero) {
1803       /*
1804        * Only needed for ARB_robust_buffer_access_behavior and d3d10.
1805        * Could use min/max above instead of out-of-bounds comparisons
1806        * if we don't care about the result returned for out-of-bounds.
1807        */
1808       for (chan = 0; chan < 4; chan++) {
1809          colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
1810                                             bld->texel_bld.zero, colors_out[chan]);
1811       }
1812    }
1813 }
1814
1815
1816 /**
1817  * Just set texels to white instead of actually sampling the texture.
1818  * For debugging.
1819  */
1820 void
1821 lp_build_sample_nop(struct gallivm_state *gallivm,
1822                     struct lp_type type,
1823                     const LLVMValueRef *coords,
1824                     LLVMValueRef texel_out[4])
1825 {
1826    LLVMValueRef one = lp_build_one(gallivm, type);
1827    unsigned chan;
1828
1829    for (chan = 0; chan < 4; chan++) {
1830       texel_out[chan] = one;
1831    }  
1832 }
1833
1834
1835 /**
1836  * Build texture sampling code.
1837  * 'texel' will return a vector of four LLVMValueRefs corresponding to
1838  * R, G, B, A.
1839  * \param type  vector float type to use for coords, etc.
1840  * \param is_fetch  if this is a texel fetch instruction.
1841  * \param derivs  partial derivatives of (s,t,r,q) with respect to x and y
1842  */
1843 void
1844 lp_build_sample_soa(struct gallivm_state *gallivm,
1845                     const struct lp_static_texture_state *static_texture_state,
1846                     const struct lp_static_sampler_state *static_sampler_state,
1847                     struct lp_sampler_dynamic_state *dynamic_state,
1848                     struct lp_type type,
1849                     boolean is_fetch,
1850                     unsigned texture_index,
1851                     unsigned sampler_index,
1852                     const LLVMValueRef *coords,
1853                     const LLVMValueRef *offsets,
1854                     const struct lp_derivatives *derivs, /* optional */
1855                     LLVMValueRef lod_bias, /* optional */
1856                     LLVMValueRef explicit_lod, /* optional */
1857                     enum lp_sampler_lod_property lod_property,
1858                     LLVMValueRef texel_out[4])
1859 {
1860    unsigned target = static_texture_state->target;
1861    unsigned dims = texture_dims(target);
1862    unsigned num_quads = type.length / 4;
1863    unsigned mip_filter, i;
1864    struct lp_build_sample_context bld;
1865    struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
1866    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
1867    LLVMBuilderRef builder = gallivm->builder;
1868    LLVMValueRef tex_width, newcoords[5];
1869
1870    if (0) {
1871       enum pipe_format fmt = static_texture_state->format;
1872       debug_printf("Sample from %s\n", util_format_name(fmt));
1873    }
1874
1875    assert(type.floating);
1876
1877    /* Setup our build context */
1878    memset(&bld, 0, sizeof bld);
1879    bld.gallivm = gallivm;
1880    bld.static_sampler_state = &derived_sampler_state;
1881    bld.static_texture_state = static_texture_state;
1882    bld.dynamic_state = dynamic_state;
1883    bld.format_desc = util_format_description(static_texture_state->format);
1884    bld.dims = dims;
1885
1886    bld.vector_width = lp_type_width(type);
1887
1888    bld.float_type = lp_type_float(32);
1889    bld.int_type = lp_type_int(32);
1890    bld.coord_type = type;
1891    bld.int_coord_type = lp_int_type(type);
1892    bld.float_size_in_type = lp_type_float(32);
1893    bld.float_size_in_type.length = dims > 1 ? 4 : 1;
1894    bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
1895    bld.texel_type = type;
1896
1897    /* always using the first channel hopefully should be safe,
1898     * if not things WILL break in other places anyway.
1899     */
1900    if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
1901        bld.format_desc->channel[0].pure_integer) {
1902       if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
1903          bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
1904       }
1905       else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1906          bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
1907       }
1908    }
1909    else if (util_format_has_stencil(bld.format_desc) &&
1910        !util_format_has_depth(bld.format_desc)) {
1911       /* for stencil only formats, sample stencil (uint) */
1912       bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
1913    }
1914
1915    if (!static_texture_state->level_zero_only) {
1916       derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
1917    } else {
1918       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
1919    }
1920    mip_filter = derived_sampler_state.min_mip_filter;
1921
1922    if (0) {
1923       debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
1924    }
1925
1926    /*
1927     * This is all a bit complicated different paths are chosen for performance
1928     * reasons.
1929     * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
1930     * everything (the last two options are equivalent for 4-wide case).
1931     * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
1932     * lod is calculated then the lod value extracted afterwards so making this
1933     * case basically the same as far as lod handling is concerned for the
1934     * further sample/filter code as the 1 lod for everything case.
1935     * Different lod handling mostly shows up when building mipmap sizes
1936     * (lp_build_mipmap_level_sizes() and friends) and also in filtering
1937     * (getting the fractional part of the lod to the right texels).
1938     */
1939
1940    /*
1941     * There are other situations where at least the multiple int lods could be
1942     * avoided like min and max lod being equal.
1943     */
1944    if (explicit_lod && lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
1945        ((is_fetch && target != PIPE_BUFFER) ||
1946         (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
1947       bld.num_lods = type.length;
1948    /* TODO: for true scalar_lod should only use 1 lod value */
1949    else if ((is_fetch && explicit_lod && target != PIPE_BUFFER ) ||
1950             (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
1951       bld.num_lods = num_quads;
1952    }
1953    else {
1954       bld.num_lods = 1;
1955    }
1956
1957    bld.levelf_type = type;
1958    /* we want native vector size to be able to use our intrinsics */
1959    if (bld.num_lods != type.length) {
1960       bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
1961    }
1962    bld.leveli_type = lp_int_type(bld.levelf_type);
1963    bld.float_size_type = bld.float_size_in_type;
1964    /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
1965     * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
1966    if (bld.num_lods > 1) {
1967       bld.float_size_type.length = bld.num_lods == type.length ?
1968                                       bld.num_lods * bld.float_size_in_type.length :
1969                                       type.length;
1970    }
1971    bld.int_size_type = lp_int_type(bld.float_size_type);
1972
1973    lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
1974    lp_build_context_init(&bld.float_vec_bld, gallivm, type);
1975    lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
1976    lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
1977    lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
1978    lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
1979    lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
1980    lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
1981    lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
1982    lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
1983    lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
1984    lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
1985
1986    /* Get the dynamic state */
1987    tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
1988    bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm, texture_index);
1989    bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm, texture_index);
1990    bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm, texture_index);
1991    bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm, texture_index);
1992    /* Note that mip_offsets is an array[level] of offsets to texture images */
1993
1994    /* width, height, depth as single int vector */
1995    if (dims <= 1) {
1996       bld.int_size = tex_width;
1997    }
1998    else {
1999       bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
2000                                             tex_width, LLVMConstInt(i32t, 0, 0), "");
2001       if (dims >= 2) {
2002          LLVMValueRef tex_height =
2003             dynamic_state->height(dynamic_state, gallivm, texture_index);
2004          bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2005                                                tex_height, LLVMConstInt(i32t, 1, 0), "");
2006          if (dims >= 3) {
2007             LLVMValueRef tex_depth =
2008                dynamic_state->depth(dynamic_state, gallivm, texture_index);
2009             bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2010                                                   tex_depth, LLVMConstInt(i32t, 2, 0), "");
2011          }
2012       }
2013    }
2014
2015    for (i = 0; i < 5; i++) {
2016       newcoords[i] = coords[i];
2017    }
2018
2019    if (0) {
2020       /* For debug: no-op texture sampling */
2021       lp_build_sample_nop(gallivm,
2022                           bld.texel_type,
2023                           newcoords,
2024                           texel_out);
2025    }
2026
2027    else if (is_fetch) {
2028       lp_build_fetch_texel(&bld, texture_index, newcoords,
2029                            explicit_lod, offsets,
2030                            texel_out);
2031    }
2032
2033    else {
2034       LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
2035       LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
2036       boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
2037                         /* not sure this is strictly needed or simply impossible */
2038                         static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE &&
2039                         lp_is_simple_wrap_mode(static_sampler_state->wrap_s);
2040       if (dims > 1) {
2041          use_aos &= lp_is_simple_wrap_mode(static_sampler_state->wrap_t);
2042          if (dims > 2) {
2043             use_aos &= lp_is_simple_wrap_mode(static_sampler_state->wrap_r);
2044          }
2045       }
2046
2047       if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
2048           !use_aos && util_format_fits_8unorm(bld.format_desc)) {
2049          debug_printf("%s: using floating point linear filtering for %s\n",
2050                       __FUNCTION__, bld.format_desc->short_name);
2051          debug_printf("  min_img %d  mag_img %d  mip %d  wraps %d  wrapt %d  wrapr %d\n",
2052                       static_sampler_state->min_img_filter,
2053                       static_sampler_state->mag_img_filter,
2054                       static_sampler_state->min_mip_filter,
2055                       static_sampler_state->wrap_s,
2056                       static_sampler_state->wrap_t,
2057                       static_sampler_state->wrap_r);
2058       }
2059
2060       lp_build_sample_common(&bld, texture_index, sampler_index,
2061                              newcoords,
2062                              derivs, lod_bias, explicit_lod,
2063                              &lod_ipart, &lod_fpart,
2064                              &ilevel0, &ilevel1);
2065
2066       /*
2067        * we only try 8-wide sampling with soa as it appears to
2068        * be a loss with aos with AVX (but it should work).
2069        * (It should be faster if we'd support avx2)
2070        */
2071       if (num_quads == 1 || !use_aos) {
2072
2073          if (num_quads > 1) {
2074             if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
2075                LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
2076                /*
2077                 * These parameters are the same for all quads,
2078                 * could probably simplify.
2079                 */
2080                lod_ipart = LLVMBuildExtractElement(builder, lod_ipart, index0, "");
2081                ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
2082             }
2083          }
2084          if (use_aos) {
2085             /* do sampling/filtering with fixed pt arithmetic */
2086             lp_build_sample_aos(&bld, sampler_index,
2087                                 newcoords[0], newcoords[1],
2088                                 newcoords[2],
2089                                 offsets, lod_ipart, lod_fpart,
2090                                 ilevel0, ilevel1,
2091                                 texel_out);
2092          }
2093
2094          else {
2095             lp_build_sample_general(&bld, sampler_index,
2096                                     newcoords, offsets,
2097                                     lod_ipart, lod_fpart,
2098                                     ilevel0, ilevel1,
2099                                     texel_out);
2100          }
2101       }
2102       else {
2103          unsigned j;
2104          struct lp_build_sample_context bld4;
2105          struct lp_type type4 = type;
2106          unsigned i;
2107          LLVMValueRef texelout4[4];
2108          LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
2109
2110          type4.length = 4;
2111
2112          /* Setup our build context */
2113          memset(&bld4, 0, sizeof bld4);
2114          bld4.gallivm = bld.gallivm;
2115          bld4.static_texture_state = bld.static_texture_state;
2116          bld4.static_sampler_state = bld.static_sampler_state;
2117          bld4.dynamic_state = bld.dynamic_state;
2118          bld4.format_desc = bld.format_desc;
2119          bld4.dims = bld.dims;
2120          bld4.row_stride_array = bld.row_stride_array;
2121          bld4.img_stride_array = bld.img_stride_array;
2122          bld4.base_ptr = bld.base_ptr;
2123          bld4.mip_offsets = bld.mip_offsets;
2124          bld4.int_size = bld.int_size;
2125
2126          bld4.vector_width = lp_type_width(type4);
2127
2128          bld4.float_type = lp_type_float(32);
2129          bld4.int_type = lp_type_int(32);
2130          bld4.coord_type = type4;
2131          bld4.int_coord_type = lp_int_type(type4);
2132          bld4.float_size_in_type = lp_type_float(32);
2133          bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
2134          bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
2135          bld4.texel_type = bld.texel_type;
2136          bld4.texel_type.length = 4;
2137          bld4.levelf_type = type4;
2138          /* we want native vector size to be able to use our intrinsics */
2139          bld4.levelf_type.length = 1;
2140          bld4.leveli_type = lp_int_type(bld4.levelf_type);
2141
2142          if (explicit_lod && lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
2143              ((is_fetch && target != PIPE_BUFFER) ||
2144               (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
2145             bld4.num_lods = type4.length;
2146          else
2147             bld4.num_lods = 1;
2148
2149          bld4.levelf_type = type4;
2150          /* we want native vector size to be able to use our intrinsics */
2151          if (bld4.num_lods != type4.length) {
2152             bld4.levelf_type.length = 1;
2153          }
2154          bld4.leveli_type = lp_int_type(bld4.levelf_type);
2155          bld4.float_size_type = bld4.float_size_in_type;
2156          if (bld4.num_lods > 1) {
2157             bld4.float_size_type.length = bld4.num_lods == type4.length ?
2158                                             bld4.num_lods * bld4.float_size_in_type.length :
2159                                             type4.length;
2160          }
2161          bld4.int_size_type = lp_int_type(bld4.float_size_type);
2162
2163          lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
2164          lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
2165          lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
2166          lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
2167          lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
2168          lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
2169          lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
2170          lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
2171          lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
2172          lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
2173          lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
2174          lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
2175
2176          for (i = 0; i < num_quads; i++) {
2177             LLVMValueRef s4, t4, r4;
2178             LLVMValueRef lod_ipart4, lod_fpart4 = NULL;
2179             LLVMValueRef ilevel04, ilevel14 = NULL;
2180             LLVMValueRef offsets4[4] = { NULL };
2181             unsigned num_lods = bld4.num_lods;
2182
2183             s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
2184             t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
2185             r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
2186
2187             if (offsets[0]) {
2188                offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
2189                if (dims > 1) {
2190                   offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
2191                   if (dims > 2) {
2192                      offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
2193                   }
2194                }
2195             }
2196             lod_ipart4 = lp_build_extract_range(gallivm, lod_ipart, num_lods * i, num_lods);
2197             ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
2198             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
2199                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
2200                lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
2201             }
2202
2203             if (use_aos) {
2204                /* do sampling/filtering with fixed pt arithmetic */
2205                lp_build_sample_aos(&bld4, sampler_index,
2206                                    s4, t4, r4, offsets4,
2207                                    lod_ipart4, lod_fpart4,
2208                                    ilevel04, ilevel14,
2209                                    texelout4);
2210             }
2211
2212             else {
2213                /* this path is currently unreachable and hence might break easily... */
2214                LLVMValueRef newcoords4[5];
2215                newcoords4[0] = s4;
2216                newcoords4[1] = t4;
2217                newcoords4[2] = r4;
2218                newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
2219                newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
2220
2221                lp_build_sample_general(&bld4, sampler_index,
2222                                        newcoords4, offsets4,
2223                                        lod_ipart4, lod_fpart4,
2224                                        ilevel04, ilevel14,
2225                                        texelout4);
2226             }
2227             for (j = 0; j < 4; j++) {
2228                texelouttmp[j][i] = texelout4[j];
2229             }
2230          }
2231
2232          for (j = 0; j < 4; j++) {
2233             texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
2234          }
2235       }
2236    }
2237
2238    if (target != PIPE_BUFFER) {
2239       apply_sampler_swizzle(&bld, texel_out);
2240    }
2241
2242    /*
2243     * texel type can be a (32bit) int/uint (for pure int formats only),
2244     * however we are expected to always return floats (storage is untyped).
2245     */
2246    if (!bld.texel_type.floating) {
2247       unsigned chan;
2248       for (chan = 0; chan < 4; chan++) {
2249          texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
2250                                             lp_build_vec_type(gallivm, type), "");
2251       }
2252    }
2253 }
2254
2255 void
2256 lp_build_size_query_soa(struct gallivm_state *gallivm,
2257                         const struct lp_static_texture_state *static_state,
2258                         struct lp_sampler_dynamic_state *dynamic_state,
2259                         struct lp_type int_type,
2260                         unsigned texture_unit,
2261                         unsigned target,
2262                         boolean is_sviewinfo,
2263                         enum lp_sampler_lod_property lod_property,
2264                         LLVMValueRef explicit_lod,
2265                         LLVMValueRef *sizes_out)
2266 {
2267    LLVMValueRef lod, level, size;
2268    LLVMValueRef first_level = NULL;
2269    int dims, i;
2270    boolean has_array;
2271    unsigned num_lods = 1;
2272    struct lp_build_context bld_int_vec4;
2273
2274    /*
2275     * Do some sanity verification about bound texture and shader dcl target.
2276     * Not entirely sure what's possible but assume array/non-array
2277     * always compatible (probably not ok for OpenGL but d3d10 has no
2278     * distinction of arrays at the resource level).
2279     * Everything else looks bogus (though not entirely sure about rect/2d).
2280     * Currently disabled because it causes assertion failures if there's
2281     * nothing bound (or rather a dummy texture, not that this case would
2282     * return the right values).
2283     */
2284    if (0 && static_state->target != target) {
2285       if (static_state->target == PIPE_TEXTURE_1D)
2286          assert(target == PIPE_TEXTURE_1D_ARRAY);
2287       else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
2288          assert(target == PIPE_TEXTURE_1D);
2289       else if (static_state->target == PIPE_TEXTURE_2D)
2290          assert(target == PIPE_TEXTURE_2D_ARRAY);
2291       else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
2292          assert(target == PIPE_TEXTURE_2D);
2293       else if (static_state->target == PIPE_TEXTURE_CUBE)
2294          assert(target == PIPE_TEXTURE_CUBE_ARRAY);
2295       else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2296          assert(target == PIPE_TEXTURE_CUBE);
2297       else
2298          assert(0);
2299    }
2300
2301    dims = texture_dims(target);
2302
2303    switch (target) {
2304    case PIPE_TEXTURE_1D_ARRAY:
2305    case PIPE_TEXTURE_2D_ARRAY:
2306       has_array = TRUE;
2307       break;
2308    default:
2309       has_array = FALSE;
2310       break;
2311    }
2312
2313    assert(!int_type.floating);
2314
2315    lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
2316
2317    if (explicit_lod) {
2318       /* FIXME: this needs to honor per-element lod */
2319       lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod, lp_build_const_int32(gallivm, 0), "");
2320       first_level = dynamic_state->first_level(dynamic_state, gallivm, texture_unit);
2321       level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
2322       lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
2323    } else {
2324       lod = bld_int_vec4.zero;
2325    }
2326
2327    size = bld_int_vec4.undef;
2328
2329    size = LLVMBuildInsertElement(gallivm->builder, size,
2330                                  dynamic_state->width(dynamic_state, gallivm, texture_unit),
2331                                  lp_build_const_int32(gallivm, 0), "");
2332
2333    if (dims >= 2) {
2334       size = LLVMBuildInsertElement(gallivm->builder, size,
2335                                     dynamic_state->height(dynamic_state, gallivm, texture_unit),
2336                                     lp_build_const_int32(gallivm, 1), "");
2337    }
2338
2339    if (dims >= 3) {
2340       size = LLVMBuildInsertElement(gallivm->builder, size,
2341                                     dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2342                                     lp_build_const_int32(gallivm, 2), "");
2343    }
2344
2345    size = lp_build_minify(&bld_int_vec4, size, lod);
2346
2347    if (has_array)
2348       size = LLVMBuildInsertElement(gallivm->builder, size,
2349                                     dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2350                                     lp_build_const_int32(gallivm, dims), "");
2351
2352    /*
2353     * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
2354     * if level is out of bounds (note this can't cover unbound texture
2355     * here, which also requires returning zero).
2356     */
2357    if (explicit_lod && is_sviewinfo) {
2358       LLVMValueRef last_level, out, out1;
2359       struct lp_build_context leveli_bld;
2360
2361       /* everything is scalar for now */
2362       lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
2363       last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2364
2365       out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
2366       out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
2367       out = lp_build_or(&leveli_bld, out, out1);
2368       if (num_lods == 1) {
2369          out = lp_build_broadcast_scalar(&bld_int_vec4, out);
2370       }
2371       else {
2372          /* TODO */
2373          assert(0);
2374       }
2375       size = lp_build_andnot(&bld_int_vec4, size, out);
2376    }
2377    for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
2378       sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, int_type,
2379                                                 size,
2380                                                 lp_build_const_int32(gallivm, i));
2381    }
2382    if (is_sviewinfo) {
2383       for (; i < 4; i++) {
2384          sizes_out[i] = lp_build_const_vec(gallivm, int_type, 0.0);
2385       }
2386    }
2387
2388    /*
2389     * if there's no explicit_lod (buffers, rects) queries requiring nr of
2390     * mips would be illegal.
2391     */
2392    if (is_sviewinfo && explicit_lod) {
2393       struct lp_build_context bld_int_scalar;
2394       LLVMValueRef num_levels;
2395       lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
2396
2397       if (static_state->level_zero_only) {
2398          num_levels = bld_int_scalar.one;
2399       }
2400       else {
2401          LLVMValueRef last_level;
2402
2403          last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2404          num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
2405          num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
2406       }
2407       sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, int_type),
2408                                         num_levels);
2409    }
2410 }