OSDN Git Service

gallivm: support per-pixel min/mag filter in SoA path
[android-x86/external-mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_soa.c
1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27
28 /**
29  * @file
30  * Texture sampling -- SoA.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  * @author Brian Paul <brianp@vmware.com>
34  */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_shader_tokens.h"
39 #include "util/u_debug.h"
40 #include "util/u_dump.h"
41 #include "util/u_memory.h"
42 #include "util/u_math.h"
43 #include "util/u_format.h"
44 #include "util/u_cpu_detect.h"
45 #include "util/u_format_rgb9e5.h"
46 #include "lp_bld_debug.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_const.h"
49 #include "lp_bld_conv.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_bitarit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_printf.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_format.h"
58 #include "lp_bld_sample.h"
59 #include "lp_bld_sample_aos.h"
60 #include "lp_bld_struct.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_pack.h"
63
64
65 /**
66  * Generate code to fetch a texel from a texture at int coords (x, y, z).
67  * The computation depends on whether the texture is 1D, 2D or 3D.
68  * The result, texel, will be float vectors:
69  *   texel[0] = red values
70  *   texel[1] = green values
71  *   texel[2] = blue values
72  *   texel[3] = alpha values
73  */
74 static void
75 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
76                           unsigned sampler_unit,
77                           LLVMValueRef width,
78                           LLVMValueRef height,
79                           LLVMValueRef depth,
80                           LLVMValueRef x,
81                           LLVMValueRef y,
82                           LLVMValueRef z,
83                           LLVMValueRef y_stride,
84                           LLVMValueRef z_stride,
85                           LLVMValueRef data_ptr,
86                           LLVMValueRef mipoffsets,
87                           LLVMValueRef texel_out[4])
88 {
89    const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
90    const unsigned dims = bld->dims;
91    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
92    LLVMBuilderRef builder = bld->gallivm->builder;
93    LLVMValueRef offset;
94    LLVMValueRef i, j;
95    LLVMValueRef use_border = NULL;
96
97    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
98    if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
99                                               static_state->min_img_filter,
100                                               static_state->mag_img_filter)) {
101       LLVMValueRef b1, b2;
102       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
103       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
104       use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
105    }
106
107    if (dims >= 2 &&
108        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
109                                               static_state->min_img_filter,
110                                               static_state->mag_img_filter)) {
111       LLVMValueRef b1, b2;
112       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
113       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
114       if (use_border) {
115          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
116          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
117       }
118       else {
119          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
120       }
121    }
122
123    if (dims == 3 &&
124        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
125                                               static_state->min_img_filter,
126                                               static_state->mag_img_filter)) {
127       LLVMValueRef b1, b2;
128       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
129       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
130       if (use_border) {
131          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
132          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
133       }
134       else {
135          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
136       }
137    }
138
139    /* convert x,y,z coords to linear offset from start of texture, in bytes */
140    lp_build_sample_offset(&bld->int_coord_bld,
141                           bld->format_desc,
142                           x, y, z, y_stride, z_stride,
143                           &offset, &i, &j);
144    if (mipoffsets) {
145       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
146    }
147
148    if (use_border) {
149       /* If we can sample the border color, it means that texcoords may
150        * lie outside the bounds of the texture image.  We need to do
151        * something to prevent reading out of bounds and causing a segfault.
152        *
153        * Simply AND the texture coords with !use_border.  This will cause
154        * coords which are out of bounds to become zero.  Zero's guaranteed
155        * to be inside the texture image.
156        */
157       offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
158    }
159
160    lp_build_fetch_rgba_soa(bld->gallivm,
161                            bld->format_desc,
162                            bld->texel_type,
163                            data_ptr, offset,
164                            i, j,
165                            texel_out);
166
167    /*
168     * Note: if we find an app which frequently samples the texture border
169     * we might want to implement a true conditional here to avoid sampling
170     * the texture whenever possible (since that's quite a bit of code).
171     * Ex:
172     *   if (use_border) {
173     *      texel = border_color;
174     *   }
175     *   else {
176     *      texel = sample_texture(coord);
177     *   }
178     * As it is now, we always sample the texture, then selectively replace
179     * the texel color results with the border color.
180     */
181
182    if (use_border) {
183       /* select texel color or border color depending on use_border. */
184       const struct util_format_description *format_desc = bld->format_desc;
185       int chan;
186       struct lp_type border_type = bld->texel_type;
187       border_type.length = 4;
188       /*
189        * Only replace channels which are actually present. The others should
190        * get optimized away eventually by sampler_view swizzle anyway but it's
191        * easier too.
192        */
193       for (chan = 0; chan < 4; chan++) {
194          unsigned chan_s;
195          /* reverse-map channel... */
196          for (chan_s = 0; chan_s < 4; chan_s++) {
197             if (chan_s == format_desc->swizzle[chan]) {
198                break;
199             }
200          }
201          if (chan_s <= 3) {
202             /* use the already clamped color */
203             LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
204             LLVMValueRef border_chan;
205
206             border_chan = lp_build_extract_broadcast(bld->gallivm,
207                                                      border_type,
208                                                      bld->texel_type,
209                                                      bld->border_color_clamped,
210                                                      idx);
211             texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
212                                               border_chan, texel_out[chan]);
213          }
214       }
215    }
216 }
217
218
219 /**
220  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
221  */
222 static LLVMValueRef
223 lp_build_coord_mirror(struct lp_build_sample_context *bld,
224                       LLVMValueRef coord)
225 {
226    struct lp_build_context *coord_bld = &bld->coord_bld;
227    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
228    LLVMValueRef fract, flr, isOdd;
229
230    lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
231
232    /* isOdd = flr & 1 */
233    isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
234
235    /* make coord positive or negative depending on isOdd */
236    coord = lp_build_set_sign(coord_bld, fract, isOdd);
237
238    /* convert isOdd to float */
239    isOdd = lp_build_int_to_float(coord_bld, isOdd);
240
241    /* add isOdd to coord */
242    coord = lp_build_add(coord_bld, coord, isOdd);
243
244    return coord;
245 }
246
247
248 /**
249  * Helper to compute the first coord and the weight for
250  * linear wrap repeat npot textures
251  */
252 void
253 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
254                                   LLVMValueRef coord_f,
255                                   LLVMValueRef length_i,
256                                   LLVMValueRef length_f,
257                                   LLVMValueRef *coord0_i,
258                                   LLVMValueRef *weight_f)
259 {
260    struct lp_build_context *coord_bld = &bld->coord_bld;
261    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
262    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
263    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
264                                                 int_coord_bld->one);
265    LLVMValueRef mask;
266    /* wrap with normalized floats is just fract */
267    coord_f = lp_build_fract(coord_bld, coord_f);
268    /* mul by size and subtract 0.5 */
269    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
270    coord_f = lp_build_sub(coord_bld, coord_f, half);
271    /*
272     * we avoided the 0.5/length division before the repeat wrap,
273     * now need to fix up edge cases with selects
274     */
275    /* convert to int, compute lerp weight */
276    lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
277    mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
278                            PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
279    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
280 }
281
282
283 /**
284  * Build LLVM code for texture wrap mode for linear filtering.
285  * \param x0_out  returns first integer texcoord
286  * \param x1_out  returns second integer texcoord
287  * \param weight_out  returns linear interpolation weight
288  */
289 static void
290 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
291                             LLVMValueRef coord,
292                             LLVMValueRef length,
293                             LLVMValueRef length_f,
294                             LLVMValueRef offset,
295                             boolean is_pot,
296                             unsigned wrap_mode,
297                             LLVMValueRef *x0_out,
298                             LLVMValueRef *x1_out,
299                             LLVMValueRef *weight_out)
300 {
301    struct lp_build_context *coord_bld = &bld->coord_bld;
302    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
303    LLVMBuilderRef builder = bld->gallivm->builder;
304    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
305    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
306    LLVMValueRef coord0, coord1, weight;
307
308    switch(wrap_mode) {
309    case PIPE_TEX_WRAP_REPEAT:
310       if (is_pot) {
311          /* mul by size and subtract 0.5 */
312          coord = lp_build_mul(coord_bld, coord, length_f);
313          coord = lp_build_sub(coord_bld, coord, half);
314          if (offset) {
315             offset = lp_build_int_to_float(coord_bld, offset);
316             coord = lp_build_add(coord_bld, coord, offset);
317          }
318          /* convert to int, compute lerp weight */
319          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
320          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
321          /* repeat wrap */
322          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
323          coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
324       }
325       else {
326          LLVMValueRef mask;
327          if (offset) {
328             offset = lp_build_int_to_float(coord_bld, offset);
329             offset = lp_build_div(coord_bld, offset, length_f);
330             coord = lp_build_add(coord_bld, coord, offset);
331          }
332          lp_build_coord_repeat_npot_linear(bld, coord,
333                                            length, length_f,
334                                            &coord0, &weight);
335          mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
336                                  PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
337          coord1 = LLVMBuildAnd(builder,
338                                lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
339                                mask, "");
340       }
341       break;
342
343    case PIPE_TEX_WRAP_CLAMP:
344       if (bld->static_sampler_state->normalized_coords) {
345          /* scale coord to length */
346          coord = lp_build_mul(coord_bld, coord, length_f);
347       }
348       if (offset) {
349          offset = lp_build_int_to_float(coord_bld, offset);
350          coord = lp_build_add(coord_bld, coord, offset);
351       }
352
353       /* clamp to [0, length] */
354       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
355
356       coord = lp_build_sub(coord_bld, coord, half);
357
358       /* convert to int, compute lerp weight */
359       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
360       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
361       break;
362
363    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
364       {
365          struct lp_build_context abs_coord_bld = bld->coord_bld;
366          abs_coord_bld.type.sign = FALSE;
367
368          if (bld->static_sampler_state->normalized_coords) {
369             /* mul by tex size */
370             coord = lp_build_mul(coord_bld, coord, length_f);
371          }
372          if (offset) {
373             offset = lp_build_int_to_float(coord_bld, offset);
374             coord = lp_build_add(coord_bld, coord, offset);
375          }
376
377          /* clamp to length max */
378          coord = lp_build_min(coord_bld, coord, length_f);
379          /* subtract 0.5 */
380          coord = lp_build_sub(coord_bld, coord, half);
381          /* clamp to [0, length - 0.5] */
382          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
383          /* convert to int, compute lerp weight */
384          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
385          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
386          /* coord1 = min(coord1, length-1) */
387          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
388          break;
389       }
390
391    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
392       if (bld->static_sampler_state->normalized_coords) {
393          /* scale coord to length */
394          coord = lp_build_mul(coord_bld, coord, length_f);
395       }
396       if (offset) {
397          offset = lp_build_int_to_float(coord_bld, offset);
398          coord = lp_build_add(coord_bld, coord, offset);
399       }
400       /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
401       /* can skip clamp (though might not work for very large coord values */
402       coord = lp_build_sub(coord_bld, coord, half);
403       /* convert to int, compute lerp weight */
404       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
405       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
406       break;
407
408    case PIPE_TEX_WRAP_MIRROR_REPEAT:
409       /* compute mirror function */
410       coord = lp_build_coord_mirror(bld, coord);
411
412       /* scale coord to length */
413       coord = lp_build_mul(coord_bld, coord, length_f);
414       coord = lp_build_sub(coord_bld, coord, half);
415       if (offset) {
416          offset = lp_build_int_to_float(coord_bld, offset);
417          coord = lp_build_add(coord_bld, coord, offset);
418       }
419
420       /* convert to int, compute lerp weight */
421       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
422       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
423
424       /* coord0 = max(coord0, 0) */
425       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
426       /* coord1 = min(coord1, length-1) */
427       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
428       break;
429
430    case PIPE_TEX_WRAP_MIRROR_CLAMP:
431       if (bld->static_sampler_state->normalized_coords) {
432          /* scale coord to length */
433          coord = lp_build_mul(coord_bld, coord, length_f);
434       }
435       if (offset) {
436          offset = lp_build_int_to_float(coord_bld, offset);
437          coord = lp_build_add(coord_bld, coord, offset);
438       }
439       coord = lp_build_abs(coord_bld, coord);
440
441       /* clamp to [0, length] */
442       coord = lp_build_min(coord_bld, coord, length_f);
443
444       coord = lp_build_sub(coord_bld, coord, half);
445
446       /* convert to int, compute lerp weight */
447       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
448       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
449       break;
450
451    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
452       {
453          struct lp_build_context abs_coord_bld = bld->coord_bld;
454          abs_coord_bld.type.sign = FALSE;
455
456          if (bld->static_sampler_state->normalized_coords) {
457             /* scale coord to length */
458             coord = lp_build_mul(coord_bld, coord, length_f);
459          }
460          if (offset) {
461             offset = lp_build_int_to_float(coord_bld, offset);
462             coord = lp_build_add(coord_bld, coord, offset);
463          }
464          coord = lp_build_abs(coord_bld, coord);
465
466          /* clamp to length max */
467          coord = lp_build_min(coord_bld, coord, length_f);
468          /* subtract 0.5 */
469          coord = lp_build_sub(coord_bld, coord, half);
470          /* clamp to [0, length - 0.5] */
471          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
472
473          /* convert to int, compute lerp weight */
474          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
475          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
476          /* coord1 = min(coord1, length-1) */
477          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
478       }
479       break;
480
481    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
482       {
483          if (bld->static_sampler_state->normalized_coords) {
484             /* scale coord to length */
485             coord = lp_build_mul(coord_bld, coord, length_f);
486          }
487          if (offset) {
488             offset = lp_build_int_to_float(coord_bld, offset);
489             coord = lp_build_add(coord_bld, coord, offset);
490          }
491          coord = lp_build_abs(coord_bld, coord);
492
493          /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
494          /* skip clamp - always positive, and other side
495             only potentially matters for very large coords */
496          coord = lp_build_sub(coord_bld, coord, half);
497
498          /* convert to int, compute lerp weight */
499          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
500          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
501       }
502       break;
503
504    default:
505       assert(0);
506       coord0 = NULL;
507       coord1 = NULL;
508       weight = NULL;
509    }
510
511    *x0_out = coord0;
512    *x1_out = coord1;
513    *weight_out = weight;
514 }
515
516
517 /**
518  * Build LLVM code for texture wrap mode for nearest filtering.
519  * \param coord  the incoming texcoord (nominally in [0,1])
520  * \param length  the texture size along one dimension, as int vector
521  * \param length_f  the texture size along one dimension, as float vector
522  * \param offset  texel offset along one dimension (as int vector)
523  * \param is_pot  if TRUE, length is a power of two
524  * \param wrap_mode  one of PIPE_TEX_WRAP_x
525  */
526 static LLVMValueRef
527 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
528                              LLVMValueRef coord,
529                              LLVMValueRef length,
530                              LLVMValueRef length_f,
531                              LLVMValueRef offset,
532                              boolean is_pot,
533                              unsigned wrap_mode)
534 {
535    struct lp_build_context *coord_bld = &bld->coord_bld;
536    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
537    LLVMBuilderRef builder = bld->gallivm->builder;
538    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
539    LLVMValueRef icoord;
540    
541    switch(wrap_mode) {
542    case PIPE_TEX_WRAP_REPEAT:
543       if (is_pot) {
544          coord = lp_build_mul(coord_bld, coord, length_f);
545          icoord = lp_build_ifloor(coord_bld, coord);
546          if (offset) {
547             icoord = lp_build_add(int_coord_bld, icoord, offset);
548          }
549          icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
550       }
551       else {
552           if (offset) {
553              offset = lp_build_int_to_float(coord_bld, offset);
554              offset = lp_build_div(coord_bld, offset, length_f);
555              coord = lp_build_add(coord_bld, coord, offset);
556           }
557           /* take fraction, unnormalize */
558           coord = lp_build_fract_safe(coord_bld, coord);
559           coord = lp_build_mul(coord_bld, coord, length_f);
560           icoord = lp_build_itrunc(coord_bld, coord);
561       }
562       break;
563
564    case PIPE_TEX_WRAP_CLAMP:
565    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
566       if (bld->static_sampler_state->normalized_coords) {
567          /* scale coord to length */
568          coord = lp_build_mul(coord_bld, coord, length_f);
569       }
570
571       /* floor */
572       /* use itrunc instead since we clamp to 0 anyway */
573       icoord = lp_build_itrunc(coord_bld, coord);
574       if (offset) {
575          icoord = lp_build_add(int_coord_bld, icoord, offset);
576       }
577
578       /* clamp to [0, length - 1]. */
579       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
580                               length_minus_one);
581       break;
582
583    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
584       if (bld->static_sampler_state->normalized_coords) {
585          /* scale coord to length */
586          coord = lp_build_mul(coord_bld, coord, length_f);
587       }
588       /* no clamp necessary, border masking will handle this */
589       icoord = lp_build_ifloor(coord_bld, coord);
590       if (offset) {
591          icoord = lp_build_add(int_coord_bld, icoord, offset);
592       }
593       break;
594
595    case PIPE_TEX_WRAP_MIRROR_REPEAT:
596       if (offset) {
597          offset = lp_build_int_to_float(coord_bld, offset);
598          offset = lp_build_div(coord_bld, offset, length_f);
599          coord = lp_build_add(coord_bld, coord, offset);
600       }
601       /* compute mirror function */
602       coord = lp_build_coord_mirror(bld, coord);
603
604       /* scale coord to length */
605       assert(bld->static_sampler_state->normalized_coords);
606       coord = lp_build_mul(coord_bld, coord, length_f);
607
608       /* itrunc == ifloor here */
609       icoord = lp_build_itrunc(coord_bld, coord);
610
611       /* clamp to [0, length - 1] */
612       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
613       break;
614
615    case PIPE_TEX_WRAP_MIRROR_CLAMP:
616    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
617       if (bld->static_sampler_state->normalized_coords) {
618          /* scale coord to length */
619          coord = lp_build_mul(coord_bld, coord, length_f);
620       }
621       if (offset) {
622          offset = lp_build_int_to_float(coord_bld, offset);
623          coord = lp_build_add(coord_bld, coord, offset);
624       }
625       coord = lp_build_abs(coord_bld, coord);
626
627       /* itrunc == ifloor here */
628       icoord = lp_build_itrunc(coord_bld, coord);
629
630       /* clamp to [0, length - 1] */
631       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
632       break;
633
634    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
635       if (bld->static_sampler_state->normalized_coords) {
636          /* scale coord to length */
637          coord = lp_build_mul(coord_bld, coord, length_f);
638       }
639       if (offset) {
640          offset = lp_build_int_to_float(coord_bld, offset);
641          coord = lp_build_add(coord_bld, coord, offset);
642       }
643       coord = lp_build_abs(coord_bld, coord);
644
645       /* itrunc == ifloor here */
646       icoord = lp_build_itrunc(coord_bld, coord);
647       break;
648
649    default:
650       assert(0);
651       icoord = NULL;
652    }
653
654    return icoord;
655 }
656
657
658 /**
659  * Do shadow test/comparison.
660  * \param p shadow ref value
661  * \param texel  the texel to compare against
662  */
663 static LLVMValueRef
664 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
665                             LLVMValueRef p,
666                             LLVMValueRef texel)
667 {
668    struct lp_build_context *texel_bld = &bld->texel_bld;
669    LLVMValueRef res;
670
671    if (0) {
672       //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
673       lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
674    }
675
676    /* result = (p FUNC texel) ? 1 : 0 */
677    /*
678     * honor d3d10 floating point rules here, which state that comparisons
679     * are ordered except NOT_EQUAL which is unordered.
680     */
681    if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
682       res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
683                                  p, texel);
684    }
685    else {
686       res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
687                          p, texel);
688    }
689    return res;
690 }
691
692
693 /**
694  * Generate code to sample a mipmap level with nearest filtering.
695  * If sampling a cube texture, r = cube face in [0,5].
696  */
697 static void
698 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
699                               unsigned sampler_unit,
700                               LLVMValueRef size,
701                               LLVMValueRef row_stride_vec,
702                               LLVMValueRef img_stride_vec,
703                               LLVMValueRef data_ptr,
704                               LLVMValueRef mipoffsets,
705                               LLVMValueRef *coords,
706                               const LLVMValueRef *offsets,
707                               LLVMValueRef colors_out[4])
708 {
709    const unsigned dims = bld->dims;
710    LLVMValueRef width_vec;
711    LLVMValueRef height_vec;
712    LLVMValueRef depth_vec;
713    LLVMValueRef flt_size;
714    LLVMValueRef flt_width_vec;
715    LLVMValueRef flt_height_vec;
716    LLVMValueRef flt_depth_vec;
717    LLVMValueRef x, y = NULL, z = NULL;
718
719    lp_build_extract_image_sizes(bld,
720                                 &bld->int_size_bld,
721                                 bld->int_coord_type,
722                                 size,
723                                 &width_vec, &height_vec, &depth_vec);
724
725    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
726
727    lp_build_extract_image_sizes(bld,
728                                 &bld->float_size_bld,
729                                 bld->coord_type,
730                                 flt_size,
731                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
732
733    /*
734     * Compute integer texcoords.
735     */
736    x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
737                                     flt_width_vec, offsets[0],
738                                     bld->static_texture_state->pot_width,
739                                     bld->static_sampler_state->wrap_s);
740    lp_build_name(x, "tex.x.wrapped");
741
742    if (dims >= 2) {
743       y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
744                                        flt_height_vec, offsets[1],
745                                        bld->static_texture_state->pot_height,
746                                        bld->static_sampler_state->wrap_t);
747       lp_build_name(y, "tex.y.wrapped");
748
749       if (dims == 3) {
750          z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
751                                           flt_depth_vec, offsets[2],
752                                           bld->static_texture_state->pot_depth,
753                                           bld->static_sampler_state->wrap_r);
754          lp_build_name(z, "tex.z.wrapped");
755       }
756    }
757    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
758        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
759        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
760       z = coords[2];
761       lp_build_name(z, "tex.z.layer");
762    }
763
764    /*
765     * Get texture colors.
766     */
767    lp_build_sample_texel_soa(bld, sampler_unit,
768                              width_vec, height_vec, depth_vec,
769                              x, y, z,
770                              row_stride_vec, img_stride_vec,
771                              data_ptr, mipoffsets, colors_out);
772
773    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
774       LLVMValueRef cmpval;
775       cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
776       /* this is really just a AND 1.0, cmpval but llvm is clever enough */
777       colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
778                                       bld->texel_bld.one, bld->texel_bld.zero);
779       colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
780    }
781
782 }
783
784
785 /**
786  * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
787  */
788 static LLVMValueRef
789 lp_build_masklerp(struct lp_build_context *bld,
790                  LLVMValueRef weight,
791                  LLVMValueRef mask0,
792                  LLVMValueRef mask1)
793 {
794    struct gallivm_state *gallivm = bld->gallivm;
795    LLVMBuilderRef builder = gallivm->builder;
796    LLVMValueRef weight2;
797
798    weight2 = lp_build_sub(bld, bld->one, weight);
799    weight = LLVMBuildBitCast(builder, weight,
800                               lp_build_int_vec_type(gallivm, bld->type), "");
801    weight2 = LLVMBuildBitCast(builder, weight2,
802                               lp_build_int_vec_type(gallivm, bld->type), "");
803    weight = LLVMBuildAnd(builder, weight, mask1, "");
804    weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
805    weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
806    weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
807    return lp_build_add(bld, weight, weight2);
808 }
809
810 /**
811  * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
812  */
813 static LLVMValueRef
814 lp_build_masklerp2d(struct lp_build_context *bld,
815                     LLVMValueRef weight0,
816                     LLVMValueRef weight1,
817                     LLVMValueRef mask00,
818                     LLVMValueRef mask01,
819                     LLVMValueRef mask10,
820                     LLVMValueRef mask11)
821 {
822    LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
823    LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
824    return lp_build_lerp(bld, weight1, val0, val1, 0);
825 }
826
827 /**
828  * Generate code to sample a mipmap level with linear filtering.
829  * If sampling a cube texture, r = cube face in [0,5].
830  * If linear_mask is present, only pixels having their mask set
831  * will receive linear filtering, the rest will use nearest.
832  */
833 static void
834 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
835                              unsigned sampler_unit,
836                              LLVMValueRef size,
837                              LLVMValueRef linear_mask,
838                              LLVMValueRef row_stride_vec,
839                              LLVMValueRef img_stride_vec,
840                              LLVMValueRef data_ptr,
841                              LLVMValueRef mipoffsets,
842                              LLVMValueRef *coords,
843                              const LLVMValueRef *offsets,
844                              LLVMValueRef colors_out[4])
845 {
846    const unsigned dims = bld->dims;
847    LLVMValueRef width_vec;
848    LLVMValueRef height_vec;
849    LLVMValueRef depth_vec;
850    LLVMValueRef flt_size;
851    LLVMValueRef flt_width_vec;
852    LLVMValueRef flt_height_vec;
853    LLVMValueRef flt_depth_vec;
854    LLVMValueRef x0, y0 = NULL, z0 = NULL, x1, y1 = NULL, z1 = NULL;
855    LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
856    LLVMValueRef neighbors[2][2][4];
857    int chan;
858
859    lp_build_extract_image_sizes(bld,
860                                 &bld->int_size_bld,
861                                 bld->int_coord_type,
862                                 size,
863                                 &width_vec, &height_vec, &depth_vec);
864
865    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
866
867    lp_build_extract_image_sizes(bld,
868                                 &bld->float_size_bld,
869                                 bld->coord_type,
870                                 flt_size,
871                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
872
873    /*
874     * Compute integer texcoords.
875     */
876    lp_build_sample_wrap_linear(bld, coords[0], width_vec,
877                                flt_width_vec, offsets[0],
878                                bld->static_texture_state->pot_width,
879                                bld->static_sampler_state->wrap_s,
880                                &x0, &x1, &s_fpart);
881    lp_build_name(x0, "tex.x0.wrapped");
882    lp_build_name(x1, "tex.x1.wrapped");
883
884    if (dims >= 2) {
885       lp_build_sample_wrap_linear(bld, coords[1], height_vec,
886                                   flt_height_vec, offsets[1],
887                                   bld->static_texture_state->pot_height,
888                                   bld->static_sampler_state->wrap_t,
889                                   &y0, &y1, &t_fpart);
890       lp_build_name(y0, "tex.y0.wrapped");
891       lp_build_name(y1, "tex.y1.wrapped");
892
893       if (dims == 3) {
894          lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
895                                      flt_depth_vec, offsets[2],
896                                      bld->static_texture_state->pot_depth,
897                                      bld->static_sampler_state->wrap_r,
898                                      &z0, &z1, &r_fpart);
899          lp_build_name(z0, "tex.z0.wrapped");
900          lp_build_name(z1, "tex.z1.wrapped");
901       }
902    }
903    if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
904        bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
905        bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
906       z0 = z1 = coords[2];  /* cube face or layer */
907       lp_build_name(z0, "tex.z0.layer");
908       lp_build_name(z1, "tex.z1.layer");
909    }
910
911    if (linear_mask) {
912       /*
913        * Whack filter weights into place. Whatever pixel had more weight is
914        * the one which should have been selected by nearest filtering hence
915        * just use 100% weight for it.
916        */
917       struct lp_build_context *c_bld = &bld->coord_bld;
918       LLVMValueRef w1_mask, w1_weight;
919       LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
920
921       w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
922       /* this select is really just a "and" */
923       w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
924       s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
925       if (dims >= 2) {
926          w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
927          w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
928          t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
929          if (dims == 3) {
930             w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
931             w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
932             r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
933          }
934       }
935    }
936
937    /*
938     * Get texture colors.
939     */
940    /* get x0/x1 texels */
941    lp_build_sample_texel_soa(bld, sampler_unit,
942                              width_vec, height_vec, depth_vec,
943                              x0, y0, z0,
944                              row_stride_vec, img_stride_vec,
945                              data_ptr, mipoffsets, neighbors[0][0]);
946    lp_build_sample_texel_soa(bld, sampler_unit,
947                              width_vec, height_vec, depth_vec,
948                              x1, y0, z0,
949                              row_stride_vec, img_stride_vec,
950                              data_ptr, mipoffsets, neighbors[0][1]);
951
952    if (dims == 1) {
953       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
954          /* Interpolate two samples from 1D image to produce one color */
955          for (chan = 0; chan < 4; chan++) {
956             colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
957                                              neighbors[0][0][chan],
958                                              neighbors[0][1][chan],
959                                              0);
960          }
961       }
962       else {
963          LLVMValueRef cmpval0, cmpval1;
964          cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
965          cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
966          /* simplified lerp, AND mask with weight and add */
967          colors_out[0] = lp_build_masklerp(&bld->texel_bld, s_fpart,
968                                            cmpval0, cmpval1);
969          colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
970       }
971    }
972    else {
973       /* 2D/3D texture */
974       LLVMValueRef colors0[4];
975
976       /* get x0/x1 texels at y1 */
977       lp_build_sample_texel_soa(bld, sampler_unit,
978                                 width_vec, height_vec, depth_vec,
979                                 x0, y1, z0,
980                                 row_stride_vec, img_stride_vec,
981                                 data_ptr, mipoffsets, neighbors[1][0]);
982       lp_build_sample_texel_soa(bld, sampler_unit,
983                                 width_vec, height_vec, depth_vec,
984                                 x1, y1, z0,
985                                 row_stride_vec, img_stride_vec,
986                                 data_ptr, mipoffsets, neighbors[1][1]);
987
988       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
989          /* Bilinear interpolate the four samples from the 2D image / 3D slice */
990          for (chan = 0; chan < 4; chan++) {
991             colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
992                                              s_fpart, t_fpart,
993                                              neighbors[0][0][chan],
994                                              neighbors[0][1][chan],
995                                              neighbors[1][0][chan],
996                                              neighbors[1][1][chan],
997                                              0);
998          }
999       }
1000       else {
1001          LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1002          cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1003          cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1004          cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1005          cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1006          colors0[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
1007                                           cmpval00, cmpval01, cmpval10, cmpval11);
1008          colors0[1] = colors0[2] = colors0[3] = colors0[0];
1009       }
1010
1011       if (dims == 3) {
1012          LLVMValueRef neighbors1[2][2][4];
1013          LLVMValueRef colors1[4];
1014
1015          /* get x0/x1/y0/y1 texels at z1 */
1016          lp_build_sample_texel_soa(bld, sampler_unit,
1017                                    width_vec, height_vec, depth_vec,
1018                                    x0, y0, z1,
1019                                    row_stride_vec, img_stride_vec,
1020                                    data_ptr, mipoffsets, neighbors1[0][0]);
1021          lp_build_sample_texel_soa(bld, sampler_unit,
1022                                    width_vec, height_vec, depth_vec,
1023                                    x1, y0, z1,
1024                                    row_stride_vec, img_stride_vec,
1025                                    data_ptr, mipoffsets, neighbors1[0][1]);
1026          lp_build_sample_texel_soa(bld, sampler_unit,
1027                                    width_vec, height_vec, depth_vec,
1028                                    x0, y1, z1,
1029                                    row_stride_vec, img_stride_vec,
1030                                    data_ptr, mipoffsets, neighbors1[1][0]);
1031          lp_build_sample_texel_soa(bld, sampler_unit,
1032                                    width_vec, height_vec, depth_vec,
1033                                    x1, y1, z1,
1034                                    row_stride_vec, img_stride_vec,
1035                                    data_ptr, mipoffsets, neighbors1[1][1]);
1036
1037          if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1038             /* Bilinear interpolate the four samples from the second Z slice */
1039             for (chan = 0; chan < 4; chan++) {
1040                colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1041                                                 s_fpart, t_fpart,
1042                                                 neighbors1[0][0][chan],
1043                                                 neighbors1[0][1][chan],
1044                                                 neighbors1[1][0][chan],
1045                                                 neighbors1[1][1][chan],
1046                                                 0);
1047             }
1048             /* Linearly interpolate the two samples from the two 3D slices */
1049             for (chan = 0; chan < 4; chan++) {
1050                colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1051                                                 r_fpart,
1052                                                 colors0[chan], colors1[chan],
1053                                                 0);
1054             }
1055          }
1056          else {
1057             LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1058             cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1059             cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1060             cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1061             cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1062             colors1[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
1063                                              cmpval00, cmpval01, cmpval10, cmpval11);
1064             /* Linearly interpolate the two samples from the two 3D slices */
1065             colors_out[0] = lp_build_lerp(&bld->texel_bld,
1066                                              r_fpart,
1067                                              colors0[0], colors1[0],
1068                                              0);
1069             colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1070          }
1071       }
1072       else {
1073          /* 2D tex */
1074          for (chan = 0; chan < 4; chan++) {
1075             colors_out[chan] = colors0[chan];
1076          }
1077       }
1078    }
1079 }
1080
1081
1082 /**
1083  * Sample the texture/mipmap using given image filter and mip filter.
1084  * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1085  * from (vectors or scalars).
1086  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1087  */
1088 static void
1089 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1090                        unsigned sampler_unit,
1091                        unsigned img_filter,
1092                        unsigned mip_filter,
1093                        LLVMValueRef *coords,
1094                        const LLVMValueRef *offsets,
1095                        LLVMValueRef ilevel0,
1096                        LLVMValueRef ilevel1,
1097                        LLVMValueRef lod_fpart,
1098                        LLVMValueRef *colors_out)
1099 {
1100    LLVMBuilderRef builder = bld->gallivm->builder;
1101    LLVMValueRef size0 = NULL;
1102    LLVMValueRef size1 = NULL;
1103    LLVMValueRef row_stride0_vec = NULL;
1104    LLVMValueRef row_stride1_vec = NULL;
1105    LLVMValueRef img_stride0_vec = NULL;
1106    LLVMValueRef img_stride1_vec = NULL;
1107    LLVMValueRef data_ptr0 = NULL;
1108    LLVMValueRef data_ptr1 = NULL;
1109    LLVMValueRef mipoff0 = NULL;
1110    LLVMValueRef mipoff1 = NULL;
1111    LLVMValueRef colors0[4], colors1[4];
1112    unsigned chan;
1113
1114    /* sample the first mipmap level */
1115    lp_build_mipmap_level_sizes(bld, ilevel0,
1116                                &size0,
1117                                &row_stride0_vec, &img_stride0_vec);
1118    if (bld->num_mips == 1) {
1119       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1120    }
1121    else {
1122       /* This path should work for num_lods 1 too but slightly less efficient */
1123       data_ptr0 = bld->base_ptr;
1124       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1125    }
1126    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1127       lp_build_sample_image_nearest(bld, sampler_unit,
1128                                     size0,
1129                                     row_stride0_vec, img_stride0_vec,
1130                                     data_ptr0, mipoff0, coords, offsets,
1131                                     colors0);
1132    }
1133    else {
1134       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1135       lp_build_sample_image_linear(bld, sampler_unit,
1136                                    size0, NULL,
1137                                    row_stride0_vec, img_stride0_vec,
1138                                    data_ptr0, mipoff0, coords, offsets,
1139                                    colors0);
1140    }
1141
1142    /* Store the first level's colors in the output variables */
1143    for (chan = 0; chan < 4; chan++) {
1144        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1145    }
1146
1147    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1148       struct lp_build_if_state if_ctx;
1149       LLVMValueRef need_lerp;
1150
1151       /* need_lerp = lod_fpart > 0 */
1152       if (bld->num_lods == 1) {
1153          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1154                                    lod_fpart, bld->lodf_bld.zero,
1155                                    "need_lerp");
1156       }
1157       else {
1158          /*
1159           * We'll do mip filtering if any of the quads (or individual
1160           * pixel in case of per-pixel lod) need it.
1161           * It might be better to split the vectors here and only fetch/filter
1162           * quads which need it (if there's one lod per quad).
1163           */
1164          need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
1165                                       PIPE_FUNC_GREATER,
1166                                       lod_fpart, bld->lodf_bld.zero);
1167          need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
1168       }
1169
1170       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1171       {
1172          /*
1173           * We unfortunately need to clamp lod_fpart here since we can get
1174           * negative values which would screw up filtering if not all
1175           * lod_fpart values have same sign.
1176           */
1177          lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1178                                   bld->lodf_bld.zero);
1179          /* sample the second mipmap level */
1180          lp_build_mipmap_level_sizes(bld, ilevel1,
1181                                      &size1,
1182                                      &row_stride1_vec, &img_stride1_vec);
1183          if (bld->num_mips == 1) {
1184             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1185          }
1186          else {
1187             data_ptr1 = bld->base_ptr;
1188             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1189          }
1190          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1191             lp_build_sample_image_nearest(bld, sampler_unit,
1192                                           size1,
1193                                           row_stride1_vec, img_stride1_vec,
1194                                           data_ptr1, mipoff1, coords, offsets,
1195                                           colors1);
1196          }
1197          else {
1198             lp_build_sample_image_linear(bld, sampler_unit,
1199                                          size1, NULL,
1200                                          row_stride1_vec, img_stride1_vec,
1201                                          data_ptr1, mipoff1, coords, offsets,
1202                                          colors1);
1203          }
1204
1205          /* interpolate samples from the two mipmap levels */
1206
1207          if (bld->num_lods != bld->coord_type.length)
1208             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1209                                                               bld->lodf_bld.type,
1210                                                               bld->texel_bld.type,
1211                                                               lod_fpart);
1212
1213          for (chan = 0; chan < 4; chan++) {
1214             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1215                                           colors0[chan], colors1[chan],
1216                                           0);
1217             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1218          }
1219       }
1220       lp_build_endif(&if_ctx);
1221    }
1222 }
1223
1224
1225 /**
1226  * Sample the texture/mipmap using given mip filter, and using
1227  * both nearest and linear filtering at the same time depending
1228  * on linear_mask.
1229  * lod can be per quad but linear_mask is always per pixel.
1230  * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1231  * from (vectors or scalars).
1232  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1233  */
1234 static void
1235 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
1236                             unsigned sampler_unit,
1237                             LLVMValueRef linear_mask,
1238                             unsigned mip_filter,
1239                             LLVMValueRef *coords,
1240                             const LLVMValueRef *offsets,
1241                             LLVMValueRef ilevel0,
1242                             LLVMValueRef ilevel1,
1243                             LLVMValueRef lod_fpart,
1244                             LLVMValueRef lod_positive,
1245                             LLVMValueRef *colors_out)
1246 {
1247    LLVMBuilderRef builder = bld->gallivm->builder;
1248    LLVMValueRef size0 = NULL;
1249    LLVMValueRef size1 = NULL;
1250    LLVMValueRef row_stride0_vec = NULL;
1251    LLVMValueRef row_stride1_vec = NULL;
1252    LLVMValueRef img_stride0_vec = NULL;
1253    LLVMValueRef img_stride1_vec = NULL;
1254    LLVMValueRef data_ptr0 = NULL;
1255    LLVMValueRef data_ptr1 = NULL;
1256    LLVMValueRef mipoff0 = NULL;
1257    LLVMValueRef mipoff1 = NULL;
1258    LLVMValueRef colors0[4], colors1[4];
1259    unsigned chan;
1260
1261    /* sample the first mipmap level */
1262    lp_build_mipmap_level_sizes(bld, ilevel0,
1263                                &size0,
1264                                &row_stride0_vec, &img_stride0_vec);
1265    if (bld->num_mips == 1) {
1266       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1267    }
1268    else {
1269       /* This path should work for num_lods 1 too but slightly less efficient */
1270       data_ptr0 = bld->base_ptr;
1271       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1272    }
1273
1274    lp_build_sample_image_linear(bld, sampler_unit,
1275                                 size0, linear_mask,
1276                                 row_stride0_vec, img_stride0_vec,
1277                                 data_ptr0, mipoff0, coords, offsets,
1278                                 colors0);
1279
1280    /* Store the first level's colors in the output variables */
1281    for (chan = 0; chan < 4; chan++) {
1282        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1283    }
1284
1285    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1286       struct lp_build_if_state if_ctx;
1287       LLVMValueRef need_lerp;
1288
1289       /*
1290        * We'll do mip filtering if any of the quads (or individual
1291        * pixel in case of per-pixel lod) need it.
1292        * Note using lod_positive here not lod_fpart since it may be the same
1293        * condition as that used in the outer "if" in the caller hence llvm
1294        * should be able to merge the branches in this case.
1295        */
1296       need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
1297
1298       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1299       {
1300          /*
1301           * We unfortunately need to clamp lod_fpart here since we can get
1302           * negative values which would screw up filtering if not all
1303           * lod_fpart values have same sign.
1304           */
1305          lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1306                                   bld->lodf_bld.zero);
1307          /* sample the second mipmap level */
1308          lp_build_mipmap_level_sizes(bld, ilevel1,
1309                                      &size1,
1310                                      &row_stride1_vec, &img_stride1_vec);
1311          if (bld->num_mips == 1) {
1312             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1313          }
1314          else {
1315             data_ptr1 = bld->base_ptr;
1316             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1317          }
1318
1319          lp_build_sample_image_linear(bld, sampler_unit,
1320                                       size1, linear_mask,
1321                                       row_stride1_vec, img_stride1_vec,
1322                                       data_ptr1, mipoff1, coords, offsets,
1323                                       colors1);
1324
1325          /* interpolate samples from the two mipmap levels */
1326
1327          if (bld->num_lods != bld->coord_type.length)
1328             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1329                                                               bld->lodf_bld.type,
1330                                                               bld->texel_bld.type,
1331                                                               lod_fpart);
1332
1333          for (chan = 0; chan < 4; chan++) {
1334             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1335                                           colors0[chan], colors1[chan],
1336                                           0);
1337             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1338          }
1339       }
1340       lp_build_endif(&if_ctx);
1341    }
1342 }
1343
1344
1345 /**
1346  * Build (per-coord) layer value.
1347  * Either clamp layer to valid values or fill in optional out_of_bounds
1348  * value and just return value unclamped.
1349  */
1350 static LLVMValueRef
1351 lp_build_layer_coord(struct lp_build_sample_context *bld,
1352                      unsigned texture_unit,
1353                      LLVMValueRef layer,
1354                      LLVMValueRef *out_of_bounds)
1355 {
1356    LLVMValueRef num_layers;
1357    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1358
1359    num_layers = bld->dynamic_state->depth(bld->dynamic_state,
1360                                           bld->gallivm, texture_unit);
1361
1362    if (out_of_bounds) {
1363       LLVMValueRef out1, out;
1364       num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
1365       out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
1366       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
1367       *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
1368       return layer;
1369    }
1370    else {
1371       LLVMValueRef maxlayer;
1372       maxlayer = lp_build_sub(&bld->int_bld, num_layers, bld->int_bld.one);
1373       maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
1374       return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
1375    }
1376 }
1377
1378
1379 /**
1380  * Calculate cube face, lod, mip levels.
1381  */
1382 static void
1383 lp_build_sample_common(struct lp_build_sample_context *bld,
1384                        unsigned texture_index,
1385                        unsigned sampler_index,
1386                        LLVMValueRef *coords,
1387                        const struct lp_derivatives *derivs, /* optional */
1388                        LLVMValueRef lod_bias, /* optional */
1389                        LLVMValueRef explicit_lod, /* optional */
1390                        LLVMValueRef *lod_pos_or_zero,
1391                        LLVMValueRef *lod_fpart,
1392                        LLVMValueRef *ilevel0,
1393                        LLVMValueRef *ilevel1)
1394 {
1395    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1396    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1397    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1398    const unsigned target = bld->static_texture_state->target;
1399    LLVMValueRef first_level, cube_rho = NULL;
1400    LLVMValueRef lod_ipart = NULL;
1401
1402    /*
1403    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1404           mip_filter, min_filter, mag_filter);
1405    */
1406
1407    /*
1408     * Choose cube face, recompute texcoords for the chosen face and
1409     * compute rho here too (as it requires transform of derivatives).
1410     */
1411    if (target == PIPE_TEXTURE_CUBE) {
1412       boolean need_derivs;
1413       need_derivs = ((min_filter != mag_filter ||
1414                       mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
1415                       !bld->static_sampler_state->min_max_lod_equal &&
1416                       !explicit_lod);
1417       lp_build_cube_lookup(bld, coords, derivs, &cube_rho, need_derivs);
1418    }
1419    else if (target == PIPE_TEXTURE_1D_ARRAY ||
1420             target == PIPE_TEXTURE_2D_ARRAY) {
1421       coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
1422       coords[2] = lp_build_layer_coord(bld, texture_index, coords[2], NULL);
1423    }
1424
1425    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1426       /*
1427        * Clamp p coords to [0,1] for fixed function depth texture format here.
1428        * Technically this is not entirely correct for unorm depth as the ref value
1429        * should be converted to the depth format (quantization!) and comparison
1430        * then done in texture format. This would actually help performance (since
1431        * only need to do it once and could save the per-sample conversion of texels
1432        * to floats instead), but it would need more messy code (would need to push
1433        * at least some bits down to actual fetch so conversion could be skipped,
1434        * and would have ugly interaction with border color, would need to convert
1435        * border color to that format too or do some other tricks to make it work).
1436        */
1437       const struct util_format_description *format_desc = bld->format_desc;
1438       unsigned chan_type;
1439       /* not entirely sure we couldn't end up with non-valid swizzle here */
1440       chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
1441                      format_desc->channel[format_desc->swizzle[0]].type :
1442                      UTIL_FORMAT_TYPE_FLOAT;
1443       if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
1444          coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
1445                                     bld->coord_bld.zero, bld->coord_bld.one);
1446       }
1447    }
1448
1449    /*
1450     * Compute the level of detail (float).
1451     */
1452    if (min_filter != mag_filter ||
1453        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1454       /* Need to compute lod either to choose mipmap levels or to
1455        * distinguish between minification/magnification with one mipmap level.
1456        */
1457       lp_build_lod_selector(bld, texture_index, sampler_index,
1458                             coords[0], coords[1], coords[2], cube_rho,
1459                             derivs, lod_bias, explicit_lod,
1460                             mip_filter,
1461                             &lod_ipart, lod_fpart, lod_pos_or_zero);
1462    } else {
1463       lod_ipart = bld->lodi_bld.zero;
1464       *lod_pos_or_zero = bld->lodi_bld.zero;
1465    }
1466
1467    if (bld->num_lods != bld->num_mips) {
1468       /* only makes sense if there's just a single mip level */
1469       assert(bld->num_mips == 1);
1470       lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
1471    }
1472
1473    /*
1474     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
1475     */
1476    switch (mip_filter) {
1477    default:
1478       assert(0 && "bad mip_filter value in lp_build_sample_soa()");
1479       /* fall-through */
1480    case PIPE_TEX_MIPFILTER_NONE:
1481       /* always use mip level 0 */
1482       if (HAVE_LLVM == 0x0207 && target == PIPE_TEXTURE_CUBE) {
1483          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1484           * We should be able to set ilevel0 = const(0) but that causes
1485           * bad x86 code to be emitted.
1486           */
1487          assert(lod_ipart);
1488          lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
1489       }
1490       else {
1491          first_level = bld->dynamic_state->first_level(bld->dynamic_state,
1492                                                        bld->gallivm, texture_index);
1493          first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
1494          *ilevel0 = first_level;
1495       }
1496       break;
1497    case PIPE_TEX_MIPFILTER_NEAREST:
1498       assert(lod_ipart);
1499       lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
1500       break;
1501    case PIPE_TEX_MIPFILTER_LINEAR:
1502       assert(lod_ipart);
1503       assert(*lod_fpart);
1504       lp_build_linear_mip_levels(bld, texture_index,
1505                                  lod_ipart, lod_fpart,
1506                                  ilevel0, ilevel1);
1507       break;
1508    }
1509 }
1510
1511 static void
1512 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
1513                             unsigned sampler_unit)
1514 {
1515    struct gallivm_state *gallivm = bld->gallivm;
1516    LLVMBuilderRef builder = gallivm->builder;
1517    LLVMValueRef border_color_ptr =
1518       bld->dynamic_state->border_color(bld->dynamic_state,
1519                                         gallivm, sampler_unit);
1520    LLVMValueRef border_color;
1521    const struct util_format_description *format_desc = bld->format_desc;
1522    struct lp_type vec4_type = bld->texel_type;
1523    struct lp_build_context vec4_bld;
1524    LLVMValueRef min_clamp = NULL;
1525    LLVMValueRef max_clamp = NULL;
1526
1527    /*
1528     * For normalized format need to clamp border color (technically
1529     * probably should also quantize the data). Really sucks doing this
1530     * here but can't avoid at least for now since this is part of
1531     * sampler state and texture format is part of sampler_view state.
1532     * GL expects also expects clamping for uint/sint formats too so
1533     * do that as well (d3d10 can't end up here with uint/sint since it
1534     * only supports them with ld).
1535     */
1536    vec4_type.length = 4;
1537    lp_build_context_init(&vec4_bld, gallivm, vec4_type);
1538
1539    /*
1540     * Vectorized clamping of border color. Loading is a bit of a hack since
1541     * we just cast the pointer to float array to pointer to vec4
1542     * (int or float).
1543     */
1544    border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr,
1545                                              lp_build_const_int32(gallivm, 0));
1546    border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
1547                                        LLVMPointerType(vec4_bld.vec_type, 0), "");
1548    border_color = LLVMBuildLoad(builder, border_color_ptr, "");
1549    /* we don't have aligned type in the dynamic state unfortunately */
1550    lp_set_load_alignment(border_color, 4);
1551
1552    /*
1553     * Instead of having some incredibly complex logic which will try to figure out
1554     * clamping necessary for each channel, simply use the first channel, and treat
1555     * mixed signed/unsigned normalized formats specially.
1556     * (Mixed non-normalized, which wouldn't work at all here, do not exist for a
1557     * good reason.)
1558     */
1559    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1560       int chan;
1561       /* d/s needs special handling because both present means just sampling depth */
1562       if (util_format_is_depth_and_stencil(format_desc->format)) {
1563          chan = format_desc->swizzle[0];
1564       }
1565       else {
1566          chan = util_format_get_first_non_void_channel(format_desc->format);
1567       }
1568       if (chan >= 0 && chan <= UTIL_FORMAT_SWIZZLE_W) {
1569          unsigned chan_type = format_desc->channel[chan].type;
1570          unsigned chan_norm = format_desc->channel[chan].normalized;
1571          unsigned chan_pure = format_desc->channel[chan].pure_integer;
1572          if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
1573             if (chan_norm) {
1574                min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1575                max_clamp = vec4_bld.one;
1576             }
1577             else if (chan_pure) {
1578                /*
1579                 * Border color was stored as int, hence need min/max clamp
1580                 * only if chan has less than 32 bits..
1581                 */
1582                unsigned chan_size = format_desc->channel[chan].size;
1583                if (chan_size < 32) {
1584                   min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1585                                                      0 - (1 << (chan_size - 1)));
1586                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1587                                                      (1 << (chan_size - 1)) - 1);
1588                }
1589             }
1590             /* TODO: no idea about non-pure, non-normalized! */
1591          }
1592          else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
1593             if (chan_norm) {
1594                min_clamp = vec4_bld.zero;
1595                max_clamp = vec4_bld.one;
1596             }
1597             /*
1598              * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
1599              * we use Z32_FLOAT_S8X24 to imply sampling depth component
1600              * and ignoring stencil, which will blow up here if we try to
1601              * do a uint clamp in a float texel build...
1602              * And even if we had that format, mesa st also thinks using z24s8
1603              * means depth sampling ignoring stencil.
1604              */
1605             else if (chan_pure) {
1606                /*
1607                 * Border color was stored as uint, hence never need min
1608                 * clamp, and only need max clamp if chan has less than 32 bits.
1609                 */
1610                unsigned chan_size = format_desc->channel[chan].size;
1611                if (chan_size < 32) {
1612                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1613                                                      (1 << chan_size) - 1);
1614                }
1615                /* TODO: no idea about non-pure, non-normalized! */
1616             }
1617          }
1618          else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
1619             /* TODO: I have no idea what clamp this would need if any! */
1620          }
1621       }
1622       /* mixed plain formats (or different pure size) */
1623       switch (format_desc->format) {
1624       case PIPE_FORMAT_B10G10R10A2_UINT:
1625       case PIPE_FORMAT_R10G10B10A2_UINT:
1626       {
1627          unsigned max10 = (1 << 10) - 1;
1628          max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
1629                                         max10, (1 << 2) - 1, NULL);
1630       }
1631          break;
1632       case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
1633          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
1634                                         -1.0F, 0.0F, NULL);
1635          max_clamp = vec4_bld.one;
1636          break;
1637       case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
1638       case PIPE_FORMAT_R5SG5SB6U_NORM:
1639          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
1640                                         0.0F, 0.0F, NULL);
1641          max_clamp = vec4_bld.one;
1642          break;
1643       default:
1644          break;
1645       }
1646    }
1647    else {
1648       /* cannot figure this out from format description */
1649       if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1650          /* s3tc formats are always unorm */
1651          min_clamp = vec4_bld.zero;
1652          max_clamp = vec4_bld.one;
1653       }
1654       else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
1655                format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
1656          switch (format_desc->format) {
1657          case PIPE_FORMAT_RGTC1_UNORM:
1658          case PIPE_FORMAT_RGTC2_UNORM:
1659          case PIPE_FORMAT_LATC1_UNORM:
1660          case PIPE_FORMAT_LATC2_UNORM:
1661          case PIPE_FORMAT_ETC1_RGB8:
1662             min_clamp = vec4_bld.zero;
1663             max_clamp = vec4_bld.one;
1664             break;
1665          case PIPE_FORMAT_RGTC1_SNORM:
1666          case PIPE_FORMAT_RGTC2_SNORM:
1667          case PIPE_FORMAT_LATC1_SNORM:
1668          case PIPE_FORMAT_LATC2_SNORM:
1669             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1670             max_clamp = vec4_bld.one;
1671             break;
1672          default:
1673             assert(0);
1674             break;
1675          }
1676       }
1677       /*
1678        * all others from subsampled/other group, though we don't care
1679        * about yuv (and should not have any from zs here)
1680        */
1681       else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
1682          switch (format_desc->format) {
1683          case PIPE_FORMAT_R8G8_B8G8_UNORM:
1684          case PIPE_FORMAT_G8R8_G8B8_UNORM:
1685          case PIPE_FORMAT_G8R8_B8R8_UNORM:
1686          case PIPE_FORMAT_R8G8_R8B8_UNORM:
1687          case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
1688             min_clamp = vec4_bld.zero;
1689             max_clamp = vec4_bld.one;
1690             break;
1691          case PIPE_FORMAT_R8G8Bx_SNORM:
1692             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1693             max_clamp = vec4_bld.one;
1694             break;
1695             /*
1696              * Note smallfloat formats usually don't need clamping
1697              * (they still have infinite range) however this is not
1698              * true for r11g11b10 and r9g9b9e5, which can't represent
1699              * negative numbers (and additionally r9g9b9e5 can't represent
1700              * very large numbers). d3d10 seems happy without clamping in
1701              * this case, but gl spec is pretty clear: "for floating
1702              * point and integer formats, border values are clamped to
1703              * the representable range of the format" so do that here.
1704              */
1705          case PIPE_FORMAT_R11G11B10_FLOAT:
1706             min_clamp = vec4_bld.zero;
1707             break;
1708          case PIPE_FORMAT_R9G9B9E5_FLOAT:
1709             min_clamp = vec4_bld.zero;
1710             max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
1711             break;
1712          default:
1713             assert(0);
1714             break;
1715          }
1716       }
1717    }
1718
1719    if (min_clamp) {
1720       border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
1721    }
1722    if (max_clamp) {
1723       border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
1724    }
1725
1726    bld->border_color_clamped = border_color;
1727 }
1728
1729
1730 /**
1731  * General texture sampling codegen.
1732  * This function handles texture sampling for all texture targets (1D,
1733  * 2D, 3D, cube) and all filtering modes.
1734  */
1735 static void
1736 lp_build_sample_general(struct lp_build_sample_context *bld,
1737                         unsigned sampler_unit,
1738                         LLVMValueRef *coords,
1739                         const LLVMValueRef *offsets,
1740                         LLVMValueRef lod_positive,
1741                         LLVMValueRef lod_fpart,
1742                         LLVMValueRef ilevel0,
1743                         LLVMValueRef ilevel1,
1744                         LLVMValueRef *colors_out)
1745 {
1746    LLVMBuilderRef builder = bld->gallivm->builder;
1747    const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
1748    const unsigned mip_filter = sampler_state->min_mip_filter;
1749    const unsigned min_filter = sampler_state->min_img_filter;
1750    const unsigned mag_filter = sampler_state->mag_img_filter;
1751    LLVMValueRef texels[4];
1752    unsigned chan;
1753
1754    /* if we need border color, (potentially) clamp it now */
1755    if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
1756                                               min_filter,
1757                                               mag_filter) ||
1758        (bld->dims > 1 &&
1759            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
1760                                                   min_filter,
1761                                                   mag_filter)) ||
1762        (bld->dims > 2 &&
1763            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
1764                                                   min_filter,
1765                                                   mag_filter))) {
1766       lp_build_clamp_border_color(bld, sampler_unit);
1767    }
1768
1769
1770    /*
1771     * Get/interpolate texture colors.
1772     */
1773
1774    for (chan = 0; chan < 4; ++chan) {
1775      texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
1776      lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
1777    }
1778
1779    if (min_filter == mag_filter) {
1780       /* no need to distinguish between minification and magnification */
1781       lp_build_sample_mipmap(bld, sampler_unit,
1782                              min_filter, mip_filter,
1783                              coords, offsets,
1784                              ilevel0, ilevel1, lod_fpart,
1785                              texels);
1786    }
1787    else {
1788       /*
1789        * Could also get rid of the if-logic and always use mipmap_both, both
1790        * for the single lod and multi-lod case if nothing really uses this.
1791        */
1792       if (bld->num_lods == 1) {
1793          /* Emit conditional to choose min image filter or mag image filter
1794           * depending on the lod being > 0 or <= 0, respectively.
1795           */
1796          struct lp_build_if_state if_ctx;
1797
1798          lod_positive = LLVMBuildTrunc(builder, lod_positive,
1799                                        LLVMInt1TypeInContext(bld->gallivm->context), "");
1800
1801          lp_build_if(&if_ctx, bld->gallivm, lod_positive);
1802          {
1803             /* Use the minification filter */
1804             lp_build_sample_mipmap(bld, sampler_unit,
1805                                    min_filter, mip_filter,
1806                                    coords, offsets,
1807                                    ilevel0, ilevel1, lod_fpart,
1808                                    texels);
1809          }
1810          lp_build_else(&if_ctx);
1811          {
1812             /* Use the magnification filter */
1813             lp_build_sample_mipmap(bld, sampler_unit,
1814                                    mag_filter, PIPE_TEX_MIPFILTER_NONE,
1815                                    coords, offsets,
1816                                    ilevel0, NULL, NULL,
1817                                    texels);
1818          }
1819          lp_build_endif(&if_ctx);
1820       }
1821       else {
1822          LLVMValueRef need_linear, linear_mask;
1823          unsigned mip_filter_for_nearest;
1824          struct lp_build_if_state if_ctx;
1825
1826          if (min_filter == PIPE_TEX_FILTER_LINEAR) {
1827             linear_mask = lod_positive;
1828             mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
1829          }
1830          else {
1831             linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
1832             mip_filter_for_nearest = mip_filter;
1833          }
1834          need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
1835                                                linear_mask);
1836
1837          if (bld->num_lods != bld->coord_type.length) {
1838             linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1839                                                                 bld->lodi_type,
1840                                                                 bld->int_coord_type,
1841                                                                 linear_mask);
1842          }
1843
1844          lp_build_if(&if_ctx, bld->gallivm, need_linear);
1845          {
1846             /*
1847              * Do sampling with both filters simultaneously. This means using
1848              * a linear filter and doing some tricks (with weights) for the pixels
1849              * which need nearest filter.
1850              * Note that it's probably rare some pixels need nearest and some
1851              * linear filter but the fixups required for the nearest pixels
1852              * aren't all that complicated so just always run a combined path
1853              * if at least some pixels require linear.
1854              */
1855             lp_build_sample_mipmap_both(bld, sampler_unit,
1856                                         linear_mask, mip_filter,
1857                                         coords, offsets,
1858                                         ilevel0, ilevel1,
1859                                         lod_fpart, lod_positive,
1860                                         texels);
1861          }
1862          lp_build_else(&if_ctx);
1863          {
1864             /*
1865              * All pixels require just nearest filtering, which is way
1866              * cheaper than linear, hence do a separate path for that.
1867              */
1868             lp_build_sample_mipmap(bld, sampler_unit,
1869                                    PIPE_TEX_FILTER_NEAREST, mip_filter_for_nearest,
1870                                    coords, offsets,
1871                                    ilevel0, ilevel1, lod_fpart,
1872                                    texels);
1873          }
1874          lp_build_endif(&if_ctx);
1875       }
1876    }
1877
1878    for (chan = 0; chan < 4; ++chan) {
1879      colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
1880      lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
1881    }
1882 }
1883
1884
1885 /**
1886  * Texel fetch function.
1887  * In contrast to general sampling there is no filtering, no coord minification,
1888  * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
1889  * directly to be applied to the selected mip level (after adding texel offsets).
1890  * This function handles texel fetch for all targets where texel fetch is supported
1891  * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
1892  */
1893 static void
1894 lp_build_fetch_texel(struct lp_build_sample_context *bld,
1895                      unsigned texture_unit,
1896                      const LLVMValueRef *coords,
1897                      LLVMValueRef explicit_lod,
1898                      const LLVMValueRef *offsets,
1899                      LLVMValueRef *colors_out)
1900 {
1901    struct lp_build_context *perquadi_bld = &bld->lodi_bld;
1902    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1903    unsigned dims = bld->dims, chan;
1904    unsigned target = bld->static_texture_state->target;
1905    boolean out_of_bound_ret_zero = TRUE;
1906    LLVMValueRef size, ilevel;
1907    LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
1908    LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
1909    LLVMValueRef width, height, depth, i, j;
1910    LLVMValueRef offset, out_of_bounds, out1;
1911
1912    out_of_bounds = int_coord_bld->zero;
1913
1914    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
1915       if (bld->num_mips != int_coord_bld->type.length) {
1916          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
1917                                             perquadi_bld->type, explicit_lod, 0);
1918       }
1919       else {
1920          ilevel = explicit_lod;
1921       }
1922       lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
1923                                  out_of_bound_ret_zero ? &out_of_bounds : NULL);
1924    }
1925    else {
1926       assert(bld->num_mips == 1);
1927       if (bld->static_texture_state->target != PIPE_BUFFER) {
1928          ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
1929                                                   bld->gallivm, texture_unit);
1930       }
1931       else {
1932          ilevel = lp_build_const_int32(bld->gallivm, 0);
1933       }
1934    }
1935    lp_build_mipmap_level_sizes(bld, ilevel,
1936                                &size,
1937                                &row_stride_vec, &img_stride_vec);
1938    lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
1939                                 size, &width, &height, &depth);
1940
1941    if (target == PIPE_TEXTURE_1D_ARRAY ||
1942        target == PIPE_TEXTURE_2D_ARRAY) {
1943       if (out_of_bound_ret_zero) {
1944          z = lp_build_layer_coord(bld, texture_unit, z, &out1);
1945          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1946       }
1947       else {
1948          z = lp_build_layer_coord(bld, texture_unit, z, NULL);
1949       }
1950    }
1951
1952    /* This is a lot like border sampling */
1953    if (offsets[0]) {
1954       /*
1955        * coords are really unsigned, offsets are signed, but I don't think
1956        * exceeding 31 bits is possible
1957        */
1958       x = lp_build_add(int_coord_bld, x, offsets[0]);
1959    }
1960    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
1961    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1962    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
1963    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1964
1965    if (dims >= 2) {
1966       if (offsets[1]) {
1967          y = lp_build_add(int_coord_bld, y, offsets[1]);
1968       }
1969       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
1970       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1971       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
1972       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1973
1974       if (dims >= 3) {
1975          if (offsets[2]) {
1976             z = lp_build_add(int_coord_bld, z, offsets[2]);
1977          }
1978          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
1979          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1980          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
1981          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
1982       }
1983    }
1984
1985    lp_build_sample_offset(int_coord_bld,
1986                           bld->format_desc,
1987                           x, y, z, row_stride_vec, img_stride_vec,
1988                           &offset, &i, &j);
1989
1990    if (bld->static_texture_state->target != PIPE_BUFFER) {
1991       offset = lp_build_add(int_coord_bld, offset,
1992                             lp_build_get_mip_offsets(bld, ilevel));
1993    }
1994
1995    offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
1996
1997    lp_build_fetch_rgba_soa(bld->gallivm,
1998                            bld->format_desc,
1999                            bld->texel_type,
2000                            bld->base_ptr, offset,
2001                            i, j,
2002                            colors_out);
2003
2004    if (out_of_bound_ret_zero) {
2005       /*
2006        * Only needed for ARB_robust_buffer_access_behavior and d3d10.
2007        * Could use min/max above instead of out-of-bounds comparisons
2008        * if we don't care about the result returned for out-of-bounds.
2009        */
2010       for (chan = 0; chan < 4; chan++) {
2011          colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
2012                                             bld->texel_bld.zero, colors_out[chan]);
2013       }
2014    }
2015 }
2016
2017
2018 /**
2019  * Just set texels to white instead of actually sampling the texture.
2020  * For debugging.
2021  */
2022 void
2023 lp_build_sample_nop(struct gallivm_state *gallivm,
2024                     struct lp_type type,
2025                     const LLVMValueRef *coords,
2026                     LLVMValueRef texel_out[4])
2027 {
2028    LLVMValueRef one = lp_build_one(gallivm, type);
2029    unsigned chan;
2030
2031    for (chan = 0; chan < 4; chan++) {
2032       texel_out[chan] = one;
2033    }  
2034 }
2035
2036
2037 /**
2038  * Build texture sampling code.
2039  * 'texel' will return a vector of four LLVMValueRefs corresponding to
2040  * R, G, B, A.
2041  * \param type  vector float type to use for coords, etc.
2042  * \param is_fetch  if this is a texel fetch instruction.
2043  * \param derivs  partial derivatives of (s,t,r,q) with respect to x and y
2044  */
2045 void
2046 lp_build_sample_soa(struct gallivm_state *gallivm,
2047                     const struct lp_static_texture_state *static_texture_state,
2048                     const struct lp_static_sampler_state *static_sampler_state,
2049                     struct lp_sampler_dynamic_state *dynamic_state,
2050                     struct lp_type type,
2051                     boolean is_fetch,
2052                     unsigned texture_index,
2053                     unsigned sampler_index,
2054                     const LLVMValueRef *coords,
2055                     const LLVMValueRef *offsets,
2056                     const struct lp_derivatives *derivs, /* optional */
2057                     LLVMValueRef lod_bias, /* optional */
2058                     LLVMValueRef explicit_lod, /* optional */
2059                     enum lp_sampler_lod_property lod_property,
2060                     LLVMValueRef texel_out[4])
2061 {
2062    unsigned target = static_texture_state->target;
2063    unsigned dims = texture_dims(target);
2064    unsigned num_quads = type.length / 4;
2065    unsigned mip_filter, min_img_filter, mag_img_filter, i;
2066    struct lp_build_sample_context bld;
2067    struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
2068    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
2069    LLVMBuilderRef builder = gallivm->builder;
2070    LLVMValueRef tex_width, newcoords[5];
2071
2072    if (0) {
2073       enum pipe_format fmt = static_texture_state->format;
2074       debug_printf("Sample from %s\n", util_format_name(fmt));
2075    }
2076
2077    assert(type.floating);
2078
2079    /* Setup our build context */
2080    memset(&bld, 0, sizeof bld);
2081    bld.gallivm = gallivm;
2082    bld.static_sampler_state = &derived_sampler_state;
2083    bld.static_texture_state = static_texture_state;
2084    bld.dynamic_state = dynamic_state;
2085    bld.format_desc = util_format_description(static_texture_state->format);
2086    bld.dims = dims;
2087
2088    bld.vector_width = lp_type_width(type);
2089
2090    bld.float_type = lp_type_float(32);
2091    bld.int_type = lp_type_int(32);
2092    bld.coord_type = type;
2093    bld.int_coord_type = lp_int_type(type);
2094    bld.float_size_in_type = lp_type_float(32);
2095    bld.float_size_in_type.length = dims > 1 ? 4 : 1;
2096    bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
2097    bld.texel_type = type;
2098
2099    /* always using the first channel hopefully should be safe,
2100     * if not things WILL break in other places anyway.
2101     */
2102    if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
2103        bld.format_desc->channel[0].pure_integer) {
2104       if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
2105          bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2106       }
2107       else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2108          bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
2109       }
2110    }
2111    else if (util_format_has_stencil(bld.format_desc) &&
2112        !util_format_has_depth(bld.format_desc)) {
2113       /* for stencil only formats, sample stencil (uint) */
2114       bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2115    }
2116
2117    if (!static_texture_state->level_zero_only) {
2118       derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
2119    } else {
2120       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2121    }
2122    mip_filter = derived_sampler_state.min_mip_filter;
2123
2124    if (0) {
2125       debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
2126    }
2127
2128    min_img_filter = static_sampler_state->min_img_filter;
2129    mag_img_filter = static_sampler_state->mag_img_filter;
2130
2131
2132    /*
2133     * This is all a bit complicated different paths are chosen for performance
2134     * reasons.
2135     * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
2136     * everything (the last two options are equivalent for 4-wide case).
2137     * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
2138     * lod is calculated then the lod value extracted afterwards so making this
2139     * case basically the same as far as lod handling is concerned for the
2140     * further sample/filter code as the 1 lod for everything case.
2141     * Different lod handling mostly shows up when building mipmap sizes
2142     * (lp_build_mipmap_level_sizes() and friends) and also in filtering
2143     * (getting the fractional part of the lod to the right texels).
2144     */
2145
2146    /*
2147     * There are other situations where at least the multiple int lods could be
2148     * avoided like min and max lod being equal.
2149     */
2150    bld.num_mips = bld.num_lods = 1;
2151    if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
2152        (explicit_lod || lod_bias ||
2153         (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
2154       if ((is_fetch && target != PIPE_BUFFER) ||
2155           (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2156          bld.num_mips = type.length;
2157          bld.num_lods = type.length;
2158       }
2159       else if (!is_fetch && min_img_filter != mag_img_filter) {
2160          bld.num_mips = 1;
2161          bld.num_lods = type.length;
2162       }
2163    }
2164    /* TODO: for true scalar_lod should only use 1 lod value */
2165    else if ((is_fetch && explicit_lod && target != PIPE_BUFFER) ||
2166             (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2167       bld.num_mips = num_quads;
2168       bld.num_lods = num_quads;
2169    }
2170    else if (!is_fetch && min_img_filter != mag_img_filter) {
2171       bld.num_mips = 1;
2172       bld.num_lods = num_quads;
2173    }
2174
2175
2176    bld.lodf_type = type;
2177    /* we want native vector size to be able to use our intrinsics */
2178    if (bld.num_lods != type.length) {
2179       /* TODO: this currently always has to be per-quad or per-element */
2180       bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
2181    }
2182    bld.lodi_type = lp_int_type(bld.lodf_type);
2183    bld.levelf_type = bld.lodf_type;
2184    if (bld.num_mips == 1) {
2185       bld.levelf_type.length = 1;
2186    }
2187    bld.leveli_type = lp_int_type(bld.levelf_type);
2188    bld.float_size_type = bld.float_size_in_type;
2189    /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
2190     * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
2191    if (bld.num_mips > 1) {
2192       bld.float_size_type.length = bld.num_mips == type.length ?
2193                                       bld.num_mips * bld.float_size_in_type.length :
2194                                       type.length;
2195    }
2196    bld.int_size_type = lp_int_type(bld.float_size_type);
2197
2198    lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
2199    lp_build_context_init(&bld.float_vec_bld, gallivm, type);
2200    lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
2201    lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
2202    lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
2203    lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
2204    lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
2205    lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
2206    lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
2207    lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
2208    lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
2209    lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
2210    lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
2211    lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
2212
2213    /* Get the dynamic state */
2214    tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
2215    bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm, texture_index);
2216    bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm, texture_index);
2217    bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm, texture_index);
2218    bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm, texture_index);
2219    /* Note that mip_offsets is an array[level] of offsets to texture images */
2220
2221    /* width, height, depth as single int vector */
2222    if (dims <= 1) {
2223       bld.int_size = tex_width;
2224    }
2225    else {
2226       bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
2227                                             tex_width, LLVMConstInt(i32t, 0, 0), "");
2228       if (dims >= 2) {
2229          LLVMValueRef tex_height =
2230             dynamic_state->height(dynamic_state, gallivm, texture_index);
2231          bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2232                                                tex_height, LLVMConstInt(i32t, 1, 0), "");
2233          if (dims >= 3) {
2234             LLVMValueRef tex_depth =
2235                dynamic_state->depth(dynamic_state, gallivm, texture_index);
2236             bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2237                                                   tex_depth, LLVMConstInt(i32t, 2, 0), "");
2238          }
2239       }
2240    }
2241
2242    for (i = 0; i < 5; i++) {
2243       newcoords[i] = coords[i];
2244    }
2245
2246    if (0) {
2247       /* For debug: no-op texture sampling */
2248       lp_build_sample_nop(gallivm,
2249                           bld.texel_type,
2250                           newcoords,
2251                           texel_out);
2252    }
2253
2254    else if (is_fetch) {
2255       lp_build_fetch_texel(&bld, texture_index, newcoords,
2256                            explicit_lod, offsets,
2257                            texel_out);
2258    }
2259
2260    else {
2261       LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
2262       LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
2263       boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
2264                         /* not sure this is strictly needed or simply impossible */
2265                         static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE &&
2266                         lp_is_simple_wrap_mode(static_sampler_state->wrap_s);
2267       if (dims > 1) {
2268          use_aos &= lp_is_simple_wrap_mode(static_sampler_state->wrap_t);
2269          if (dims > 2) {
2270             use_aos &= lp_is_simple_wrap_mode(static_sampler_state->wrap_r);
2271          }
2272       }
2273
2274       if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
2275           !use_aos && util_format_fits_8unorm(bld.format_desc)) {
2276          debug_printf("%s: using floating point linear filtering for %s\n",
2277                       __FUNCTION__, bld.format_desc->short_name);
2278          debug_printf("  min_img %d  mag_img %d  mip %d  wraps %d  wrapt %d  wrapr %d\n",
2279                       static_sampler_state->min_img_filter,
2280                       static_sampler_state->mag_img_filter,
2281                       static_sampler_state->min_mip_filter,
2282                       static_sampler_state->wrap_s,
2283                       static_sampler_state->wrap_t,
2284                       static_sampler_state->wrap_r);
2285       }
2286
2287       lp_build_sample_common(&bld, texture_index, sampler_index,
2288                              newcoords,
2289                              derivs, lod_bias, explicit_lod,
2290                              &lod_positive, &lod_fpart,
2291                              &ilevel0, &ilevel1);
2292
2293       /*
2294        * we only try 8-wide sampling with soa as it appears to
2295        * be a loss with aos with AVX (but it should work).
2296        * (It should be faster if we'd support avx2)
2297        */
2298       if (num_quads == 1 || !use_aos) {
2299          if (use_aos) {
2300             /* do sampling/filtering with fixed pt arithmetic */
2301             lp_build_sample_aos(&bld, sampler_index,
2302                                 newcoords[0], newcoords[1],
2303                                 newcoords[2],
2304                                 offsets, lod_positive, lod_fpart,
2305                                 ilevel0, ilevel1,
2306                                 texel_out);
2307          }
2308
2309          else {
2310             lp_build_sample_general(&bld, sampler_index,
2311                                     newcoords, offsets,
2312                                     lod_positive, lod_fpart,
2313                                     ilevel0, ilevel1,
2314                                     texel_out);
2315          }
2316       }
2317       else {
2318          unsigned j;
2319          struct lp_build_sample_context bld4;
2320          struct lp_type type4 = type;
2321          unsigned i;
2322          LLVMValueRef texelout4[4];
2323          LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
2324
2325          type4.length = 4;
2326
2327          /* Setup our build context */
2328          memset(&bld4, 0, sizeof bld4);
2329          bld4.gallivm = bld.gallivm;
2330          bld4.static_texture_state = bld.static_texture_state;
2331          bld4.static_sampler_state = bld.static_sampler_state;
2332          bld4.dynamic_state = bld.dynamic_state;
2333          bld4.format_desc = bld.format_desc;
2334          bld4.dims = bld.dims;
2335          bld4.row_stride_array = bld.row_stride_array;
2336          bld4.img_stride_array = bld.img_stride_array;
2337          bld4.base_ptr = bld.base_ptr;
2338          bld4.mip_offsets = bld.mip_offsets;
2339          bld4.int_size = bld.int_size;
2340
2341          bld4.vector_width = lp_type_width(type4);
2342
2343          bld4.float_type = lp_type_float(32);
2344          bld4.int_type = lp_type_int(32);
2345          bld4.coord_type = type4;
2346          bld4.int_coord_type = lp_int_type(type4);
2347          bld4.float_size_in_type = lp_type_float(32);
2348          bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
2349          bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
2350          bld4.texel_type = bld.texel_type;
2351          bld4.texel_type.length = 4;
2352
2353          bld4.num_mips = bld4.num_lods = 1;
2354          if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
2355              (explicit_lod || lod_bias ||
2356               (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
2357             if ((is_fetch && target != PIPE_BUFFER) ||
2358                 (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2359                bld4.num_mips = type4.length;
2360                bld4.num_lods = type4.length;
2361             }
2362             else if (!is_fetch && min_img_filter != mag_img_filter) {
2363                bld4.num_mips = 1;
2364                bld4.num_lods = type4.length;
2365             }
2366          }
2367
2368          /* we want native vector size to be able to use our intrinsics */
2369          bld4.lodf_type = type4;
2370          if (bld4.num_lods != type4.length) {
2371             bld4.lodf_type.length = 1;
2372          }
2373          bld4.lodi_type = lp_int_type(bld4.lodf_type);
2374          bld4.levelf_type = type4;
2375          if (bld4.num_mips != type4.length) {
2376             bld4.levelf_type.length = 1;
2377          }
2378          bld4.leveli_type = lp_int_type(bld4.levelf_type);
2379          bld4.float_size_type = bld4.float_size_in_type;
2380          if (bld4.num_mips > 1) {
2381             bld4.float_size_type.length = bld4.num_mips == type4.length ?
2382                                             bld4.num_mips * bld4.float_size_in_type.length :
2383                                             type4.length;
2384          }
2385          bld4.int_size_type = lp_int_type(bld4.float_size_type);
2386
2387          lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
2388          lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
2389          lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
2390          lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
2391          lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
2392          lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
2393          lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
2394          lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
2395          lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
2396          lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
2397          lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
2398          lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
2399          lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
2400          lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
2401
2402          for (i = 0; i < num_quads; i++) {
2403             LLVMValueRef s4, t4, r4;
2404             LLVMValueRef lod_positive4, lod_fpart4 = NULL;
2405             LLVMValueRef ilevel04, ilevel14 = NULL;
2406             LLVMValueRef offsets4[4] = { NULL };
2407             unsigned num_lods = bld4.num_lods;
2408
2409             s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
2410             t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
2411             r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
2412
2413             if (offsets[0]) {
2414                offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
2415                if (dims > 1) {
2416                   offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
2417                   if (dims > 2) {
2418                      offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
2419                   }
2420                }
2421             }
2422             lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
2423             ilevel04 = bld.num_mips == 1 ? ilevel0 :
2424                           lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
2425             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
2426                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
2427                lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
2428             }
2429
2430             if (use_aos) {
2431                /* do sampling/filtering with fixed pt arithmetic */
2432                lp_build_sample_aos(&bld4, sampler_index,
2433                                    s4, t4, r4, offsets4,
2434                                    lod_positive4, lod_fpart4,
2435                                    ilevel04, ilevel14,
2436                                    texelout4);
2437             }
2438
2439             else {
2440                /* this path is currently unreachable and hence might break easily... */
2441                LLVMValueRef newcoords4[5];
2442                newcoords4[0] = s4;
2443                newcoords4[1] = t4;
2444                newcoords4[2] = r4;
2445                newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
2446                newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
2447
2448                lp_build_sample_general(&bld4, sampler_index,
2449                                        newcoords4, offsets4,
2450                                        lod_positive4, lod_fpart4,
2451                                        ilevel04, ilevel14,
2452                                        texelout4);
2453             }
2454             for (j = 0; j < 4; j++) {
2455                texelouttmp[j][i] = texelout4[j];
2456             }
2457          }
2458
2459          for (j = 0; j < 4; j++) {
2460             texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
2461          }
2462       }
2463    }
2464
2465    if (target != PIPE_BUFFER) {
2466       apply_sampler_swizzle(&bld, texel_out);
2467    }
2468
2469    /*
2470     * texel type can be a (32bit) int/uint (for pure int formats only),
2471     * however we are expected to always return floats (storage is untyped).
2472     */
2473    if (!bld.texel_type.floating) {
2474       unsigned chan;
2475       for (chan = 0; chan < 4; chan++) {
2476          texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
2477                                             lp_build_vec_type(gallivm, type), "");
2478       }
2479    }
2480 }
2481
2482 void
2483 lp_build_size_query_soa(struct gallivm_state *gallivm,
2484                         const struct lp_static_texture_state *static_state,
2485                         struct lp_sampler_dynamic_state *dynamic_state,
2486                         struct lp_type int_type,
2487                         unsigned texture_unit,
2488                         unsigned target,
2489                         boolean is_sviewinfo,
2490                         enum lp_sampler_lod_property lod_property,
2491                         LLVMValueRef explicit_lod,
2492                         LLVMValueRef *sizes_out)
2493 {
2494    LLVMValueRef lod, level, size;
2495    LLVMValueRef first_level = NULL;
2496    int dims, i;
2497    boolean has_array;
2498    unsigned num_lods = 1;
2499    struct lp_build_context bld_int_vec4;
2500
2501    /*
2502     * Do some sanity verification about bound texture and shader dcl target.
2503     * Not entirely sure what's possible but assume array/non-array
2504     * always compatible (probably not ok for OpenGL but d3d10 has no
2505     * distinction of arrays at the resource level).
2506     * Everything else looks bogus (though not entirely sure about rect/2d).
2507     * Currently disabled because it causes assertion failures if there's
2508     * nothing bound (or rather a dummy texture, not that this case would
2509     * return the right values).
2510     */
2511    if (0 && static_state->target != target) {
2512       if (static_state->target == PIPE_TEXTURE_1D)
2513          assert(target == PIPE_TEXTURE_1D_ARRAY);
2514       else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
2515          assert(target == PIPE_TEXTURE_1D);
2516       else if (static_state->target == PIPE_TEXTURE_2D)
2517          assert(target == PIPE_TEXTURE_2D_ARRAY);
2518       else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
2519          assert(target == PIPE_TEXTURE_2D);
2520       else if (static_state->target == PIPE_TEXTURE_CUBE)
2521          assert(target == PIPE_TEXTURE_CUBE_ARRAY);
2522       else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2523          assert(target == PIPE_TEXTURE_CUBE);
2524       else
2525          assert(0);
2526    }
2527
2528    dims = texture_dims(target);
2529
2530    switch (target) {
2531    case PIPE_TEXTURE_1D_ARRAY:
2532    case PIPE_TEXTURE_2D_ARRAY:
2533       has_array = TRUE;
2534       break;
2535    default:
2536       has_array = FALSE;
2537       break;
2538    }
2539
2540    assert(!int_type.floating);
2541
2542    lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
2543
2544    if (explicit_lod) {
2545       /* FIXME: this needs to honor per-element lod */
2546       lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod, lp_build_const_int32(gallivm, 0), "");
2547       first_level = dynamic_state->first_level(dynamic_state, gallivm, texture_unit);
2548       level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
2549       lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
2550    } else {
2551       lod = bld_int_vec4.zero;
2552    }
2553
2554    size = bld_int_vec4.undef;
2555
2556    size = LLVMBuildInsertElement(gallivm->builder, size,
2557                                  dynamic_state->width(dynamic_state, gallivm, texture_unit),
2558                                  lp_build_const_int32(gallivm, 0), "");
2559
2560    if (dims >= 2) {
2561       size = LLVMBuildInsertElement(gallivm->builder, size,
2562                                     dynamic_state->height(dynamic_state, gallivm, texture_unit),
2563                                     lp_build_const_int32(gallivm, 1), "");
2564    }
2565
2566    if (dims >= 3) {
2567       size = LLVMBuildInsertElement(gallivm->builder, size,
2568                                     dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2569                                     lp_build_const_int32(gallivm, 2), "");
2570    }
2571
2572    size = lp_build_minify(&bld_int_vec4, size, lod);
2573
2574    if (has_array)
2575       size = LLVMBuildInsertElement(gallivm->builder, size,
2576                                     dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2577                                     lp_build_const_int32(gallivm, dims), "");
2578
2579    /*
2580     * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
2581     * if level is out of bounds (note this can't cover unbound texture
2582     * here, which also requires returning zero).
2583     */
2584    if (explicit_lod && is_sviewinfo) {
2585       LLVMValueRef last_level, out, out1;
2586       struct lp_build_context leveli_bld;
2587
2588       /* everything is scalar for now */
2589       lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
2590       last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2591
2592       out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
2593       out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
2594       out = lp_build_or(&leveli_bld, out, out1);
2595       if (num_lods == 1) {
2596          out = lp_build_broadcast_scalar(&bld_int_vec4, out);
2597       }
2598       else {
2599          /* TODO */
2600          assert(0);
2601       }
2602       size = lp_build_andnot(&bld_int_vec4, size, out);
2603    }
2604    for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
2605       sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, int_type,
2606                                                 size,
2607                                                 lp_build_const_int32(gallivm, i));
2608    }
2609    if (is_sviewinfo) {
2610       for (; i < 4; i++) {
2611          sizes_out[i] = lp_build_const_vec(gallivm, int_type, 0.0);
2612       }
2613    }
2614
2615    /*
2616     * if there's no explicit_lod (buffers, rects) queries requiring nr of
2617     * mips would be illegal.
2618     */
2619    if (is_sviewinfo && explicit_lod) {
2620       struct lp_build_context bld_int_scalar;
2621       LLVMValueRef num_levels;
2622       lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
2623
2624       if (static_state->level_zero_only) {
2625          num_levels = bld_int_scalar.one;
2626       }
2627       else {
2628          LLVMValueRef last_level;
2629
2630          last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2631          num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
2632          num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
2633       }
2634       sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, int_type),
2635                                         num_levels);
2636    }
2637 }