1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
31 * Position and shader input interpolation.
33 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "pipe/p_shader_tokens.h"
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_math.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "gallivm/lp_bld_flow.h"
46 #include "lp_bld_interp.h"
50 * The shader JIT function operates on blocks of quads.
51 * Each block has 2x2 quads and each quad has 2x2 pixels.
53 * We iterate over the quads in order 0, 1, 2, 3:
65 * If we iterate over multiple quads at once, quads 01 and 23 are processed
68 * Within each quad, we have four pixels which are represented in SOA
77 * So the green channel (for example) of the four pixels is stored in
78 * a single vector register: {g0, g1, g2, g3}.
79 * The order stays the same even with multiple quads:
87 * Do one perspective divide per quad.
89 * For perspective interpolation, the final attribute value is given
95 * a = a0 + dadx*x + dady*y
96 * w = w0 + dwdx*x + dwdy*y
97 * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
99 * Instead of computing the division per pixel, with this macro we compute the
100 * division on the upper left pixel of each quad, and use a linear
101 * approximation in the remaining pixels, given by:
103 * da'dx = (dadx - dwdx*a)*oow
104 * da'dy = (dady - dwdy*a)*oow
106 * Ironically, this actually makes things slower -- probably because the
107 * divide hardware unit is rarely used, whereas the multiply unit is typically
110 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
113 static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
114 static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
118 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
121 lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
123 lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
127 calc_offsets(struct lp_build_context *coeff_bld,
128 unsigned quad_start_index,
129 LLVMValueRef *pixoffx,
130 LLVMValueRef *pixoffy)
133 unsigned num_pix = coeff_bld->type.length;
134 struct gallivm_state *gallivm = coeff_bld->gallivm;
135 LLVMBuilderRef builder = coeff_bld->gallivm->builder;
136 LLVMValueRef nr, pixxf, pixyf;
138 *pixoffx = coeff_bld->undef;
139 *pixoffy = coeff_bld->undef;
141 for (i = 0; i < num_pix; i++) {
142 nr = lp_build_const_int32(gallivm, i);
143 pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
144 (quad_start_index & 1) * 2);
145 pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
146 (quad_start_index & 2));
147 *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
148 *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
153 /* Much easier, and significantly less instructions in the per-stamp
154 * part (less than half) but overall more instructions so a loss if
155 * most quads are active. Might be a win though with larger vectors.
156 * No ability to do per-quad divide (doable but not implemented)
157 * Could be made to work with passed in pixel offsets (i.e. active quad merging).
160 coeffs_init_simple(struct lp_build_interp_soa_context *bld,
162 LLVMValueRef dadx_ptr,
163 LLVMValueRef dady_ptr)
165 struct lp_build_context *coeff_bld = &bld->coeff_bld;
166 struct lp_build_context *setup_bld = &bld->setup_bld;
167 struct gallivm_state *gallivm = coeff_bld->gallivm;
168 LLVMBuilderRef builder = gallivm->builder;
171 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
173 * always fetch all 4 values for performance/simplicity
174 * Note: we do that here because it seems to generate better
175 * code. It generates a lot of moves initially but less
176 * moves later. As far as I can tell this looks like a
177 * llvm issue, instead of simply reloading the values from
178 * the passed in pointers it if it runs out of registers
179 * it spills/reloads them. Maybe some optimization passes
181 * Might want to investigate this again later.
183 const unsigned interp = bld->interp[attrib];
184 LLVMValueRef index = lp_build_const_int32(gallivm,
185 attrib * TGSI_NUM_CHANNELS);
187 LLVMValueRef dadxaos = setup_bld->zero;
188 LLVMValueRef dadyaos = setup_bld->zero;
189 LLVMValueRef a0aos = setup_bld->zero;
192 case LP_INTERP_PERSPECTIVE:
195 case LP_INTERP_LINEAR:
196 ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
197 ptr = LLVMBuildBitCast(builder, ptr,
198 LLVMPointerType(setup_bld->vec_type, 0), "");
199 dadxaos = LLVMBuildLoad(builder, ptr, "");
201 ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
202 ptr = LLVMBuildBitCast(builder, ptr,
203 LLVMPointerType(setup_bld->vec_type, 0), "");
204 dadyaos = LLVMBuildLoad(builder, ptr, "");
206 attrib_name(dadxaos, attrib, 0, ".dadxaos");
207 attrib_name(dadyaos, attrib, 0, ".dadyaos");
210 case LP_INTERP_CONSTANT:
211 case LP_INTERP_FACING:
212 ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
213 ptr = LLVMBuildBitCast(builder, ptr,
214 LLVMPointerType(setup_bld->vec_type, 0), "");
215 a0aos = LLVMBuildLoad(builder, ptr, "");
216 attrib_name(a0aos, attrib, 0, ".a0aos");
219 case LP_INTERP_POSITION:
220 /* Nothing to do as the position coeffs are already setup in slot 0 */
227 bld->a0aos[attrib] = a0aos;
228 bld->dadxaos[attrib] = dadxaos;
229 bld->dadyaos[attrib] = dadyaos;
234 * Interpolate the shader input attribute values.
235 * This is called for each (group of) quad(s).
238 attribs_update_simple(struct lp_build_interp_soa_context *bld,
239 struct gallivm_state *gallivm,
240 LLVMValueRef loop_iter,
244 LLVMBuilderRef builder = gallivm->builder;
245 struct lp_build_context *coeff_bld = &bld->coeff_bld;
246 struct lp_build_context *setup_bld = &bld->setup_bld;
247 LLVMValueRef oow = NULL;
249 LLVMValueRef pixoffx;
250 LLVMValueRef pixoffy;
253 /* could do this with code-generated passed in pixel offsets too */
256 ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
257 pixoffx = LLVMBuildLoad(builder, ptr, "");
258 ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
259 pixoffy = LLVMBuildLoad(builder, ptr, "");
261 pixoffx = LLVMBuildFAdd(builder, pixoffx,
262 lp_build_broadcast_scalar(coeff_bld, bld->x), "");
263 pixoffy = LLVMBuildFAdd(builder, pixoffy,
264 lp_build_broadcast_scalar(coeff_bld, bld->y), "");
266 for (attrib = start; attrib < end; attrib++) {
267 const unsigned mask = bld->mask[attrib];
268 const unsigned interp = bld->interp[attrib];
271 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
272 if (mask & (1 << chan)) {
274 LLVMValueRef dadx = coeff_bld->zero;
275 LLVMValueRef dady = coeff_bld->zero;
276 LLVMValueRef a = coeff_bld->zero;
278 index = lp_build_const_int32(gallivm, chan);
280 case LP_INTERP_PERSPECTIVE:
283 case LP_INTERP_LINEAR:
284 if (attrib == 0 && chan == 0) {
285 dadx = coeff_bld->one;
287 else if (attrib == 0 && chan == 1) {
288 dady = coeff_bld->one;
291 dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
292 coeff_bld->type, bld->dadxaos[attrib],
294 dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
295 coeff_bld->type, bld->dadyaos[attrib],
297 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
298 coeff_bld->type, bld->a0aos[attrib],
302 * a = a0 + (x * dadx + y * dady)
304 dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
305 dady = LLVMBuildFMul(builder, dady, pixoffy, "");
306 a = LLVMBuildFAdd(builder, a, dadx, "");
307 a = LLVMBuildFAdd(builder, a, dady, "");
309 if (interp == LP_INTERP_PERSPECTIVE) {
311 LLVMValueRef w = bld->attribs[0][3];
313 assert(bld->mask[0] & TGSI_WRITEMASK_W);
314 oow = lp_build_rcp(coeff_bld, w);
316 a = lp_build_mul(coeff_bld, a, oow);
320 case LP_INTERP_CONSTANT:
321 case LP_INTERP_FACING:
322 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
323 coeff_bld->type, bld->a0aos[attrib],
327 case LP_INTERP_POSITION:
329 a = bld->attribs[0][chan];
337 if ((attrib == 0) && (chan == 2)){
338 /* FIXME: Depth values can exceed 1.0, due to the fact that
339 * setup interpolation coefficients refer to (0,0) which causes
340 * precision loss. So we must clamp to 1.0 here to avoid artifacts
342 a = lp_build_min(coeff_bld, a, coeff_bld->one);
344 bld->attribs[attrib][chan] = a;
351 * Initialize the bld->a, dadq fields. This involves fetching
352 * those values from the arrays which are passed into the JIT function.
355 coeffs_init(struct lp_build_interp_soa_context *bld,
357 LLVMValueRef dadx_ptr,
358 LLVMValueRef dady_ptr)
360 struct lp_build_context *coeff_bld = &bld->coeff_bld;
361 struct lp_build_context *setup_bld = &bld->setup_bld;
362 struct gallivm_state *gallivm = coeff_bld->gallivm;
363 LLVMBuilderRef builder = gallivm->builder;
364 LLVMValueRef pixoffx, pixoffy;
369 pixoffx = coeff_bld->undef;
370 pixoffy = coeff_bld->undef;
371 for (i = 0; i < coeff_bld->type.length; i++) {
372 LLVMValueRef nr = lp_build_const_int32(gallivm, i);
373 LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]);
374 LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]);
375 pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, "");
376 pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, "");
380 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
381 const unsigned mask = bld->mask[attrib];
382 const unsigned interp = bld->interp[attrib];
383 LLVMValueRef index = lp_build_const_int32(gallivm,
384 attrib * TGSI_NUM_CHANNELS);
386 LLVMValueRef dadxaos = setup_bld->zero;
387 LLVMValueRef dadyaos = setup_bld->zero;
388 LLVMValueRef a0aos = setup_bld->zero;
390 /* always fetch all 4 values for performance/simplicity */
392 case LP_INTERP_PERSPECTIVE:
395 case LP_INTERP_LINEAR:
396 ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
397 ptr = LLVMBuildBitCast(builder, ptr,
398 LLVMPointerType(setup_bld->vec_type, 0), "");
399 dadxaos = LLVMBuildLoad(builder, ptr, "");
401 ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
402 ptr = LLVMBuildBitCast(builder, ptr,
403 LLVMPointerType(setup_bld->vec_type, 0), "");
404 dadyaos = LLVMBuildLoad(builder, ptr, "");
406 attrib_name(dadxaos, attrib, 0, ".dadxaos");
407 attrib_name(dadyaos, attrib, 0, ".dadyaos");
410 case LP_INTERP_CONSTANT:
411 case LP_INTERP_FACING:
412 ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
413 ptr = LLVMBuildBitCast(builder, ptr,
414 LLVMPointerType(setup_bld->vec_type, 0), "");
415 a0aos = LLVMBuildLoad(builder, ptr, "");
416 attrib_name(a0aos, attrib, 0, ".a0aos");
419 case LP_INTERP_POSITION:
420 /* Nothing to do as the position coeffs are already setup in slot 0 */
429 * a = a0 + (x * dadx + y * dady)
430 * a0aos is the attrib value at top left corner of stamp
432 if (interp != LP_INTERP_CONSTANT &&
433 interp != LP_INTERP_FACING) {
434 LLVMValueRef axaos, ayaos;
435 axaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->x),
437 ayaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->y),
439 a0aos = LLVMBuildFAdd(builder, a0aos, ayaos, "");
440 a0aos = LLVMBuildFAdd(builder, a0aos, axaos, "");
444 * dadq = {0, dadx, dady, dadx + dady}
445 * for two quads (side by side) this is:
446 * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady}
448 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
449 /* this generates a CRAPLOAD of shuffles... */
450 if (mask & (1 << chan)) {
451 LLVMValueRef dadx, dady;
452 LLVMValueRef dadq, dadq2;
454 LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan);
456 if (attrib == 0 && chan == 0) {
457 a = lp_build_broadcast_scalar(coeff_bld, bld->x);
458 dadx = coeff_bld->one;
459 dady = coeff_bld->zero;
461 else if (attrib == 0 && chan == 1) {
462 a = lp_build_broadcast_scalar(coeff_bld, bld->y);
463 dady = coeff_bld->one;
464 dadx = coeff_bld->zero;
467 dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
468 coeff_bld->type, dadxaos, chan_index);
469 dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
470 coeff_bld->type, dadyaos, chan_index);
475 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
476 coeff_bld->type, a0aos, chan_index);
479 dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
480 dady = LLVMBuildFMul(builder, dady, pixoffy, "");
481 dadq = LLVMBuildFAdd(builder, dadx, dady, "");
484 * Compute the attrib values on the upper-left corner of each
486 * Note that if we process 2 quads at once this doesn't
487 * really exactly to what we want.
488 * We need to access elem 0 and 2 respectively later if we process
492 if (interp != LP_INTERP_CONSTANT &&
493 interp != LP_INTERP_FACING) {
494 dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
495 a = LLVMBuildFAdd(builder, a, dadq2, "");
498 #if PERSPECTIVE_DIVIDE_PER_QUAD
504 * XXX since we're only going to access elements 0,2 out of 8
505 * if we have 8-wide vectors we should do the division only 4-wide.
506 * a is really a 2-elements in a 4-wide vector disguised as 8-wide
509 if (interp == LP_INTERP_PERSPECTIVE) {
510 LLVMValueRef w = bld->a[0][3];
512 assert(bld->mask[0] & TGSI_WRITEMASK_W);
514 bld->oow = lp_build_rcp(coeff_bld, w);
515 lp_build_name(bld->oow, "oow");
517 a = lp_build_mul(coeff_bld, a, bld->oow);
521 attrib_name(a, attrib, chan, ".a");
522 attrib_name(dadq, attrib, chan, ".dadq");
524 bld->a[attrib][chan] = lp_build_alloca(gallivm,
526 LLVMBuildStore(builder, a, bld->a[attrib][chan]);
527 bld->dadq[attrib][chan] = dadq;
535 * Increment the shader input attribute values.
536 * This is called when we move from one quad to the next.
539 attribs_update(struct lp_build_interp_soa_context *bld,
540 struct gallivm_state *gallivm,
541 LLVMValueRef loop_iter,
545 LLVMBuilderRef builder = gallivm->builder;
546 struct lp_build_context *coeff_bld = &bld->coeff_bld;
547 LLVMValueRef oow = NULL;
551 for(attrib = start; attrib < end; ++attrib) {
552 const unsigned mask = bld->mask[attrib];
553 const unsigned interp = bld->interp[attrib];
554 for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
555 if(mask & (1 << chan)) {
557 if (interp == LP_INTERP_CONSTANT ||
558 interp == LP_INTERP_FACING) {
559 a = LLVMBuildLoad(builder, bld->a[attrib][chan], "");
561 else if (interp == LP_INTERP_POSITION) {
563 a = bld->attribs[0][chan];
568 a = bld->a[attrib][chan];
571 * Broadcast the attribute value for this quad into all elements
575 /* stored as vector load as float */
576 LLVMTypeRef ptr_type = LLVMPointerType(LLVMFloatTypeInContext(
577 gallivm->context), 0);
579 a = LLVMBuildBitCast(builder, a, ptr_type, "");
580 ptr = LLVMBuildGEP(builder, a, &loop_iter, 1, "");
581 a = LLVMBuildLoad(builder, ptr, "");
582 a = lp_build_broadcast_scalar(&bld->coeff_bld, a);
586 * Get the derivatives.
589 dadq = bld->dadq[attrib][chan];
591 #if PERSPECTIVE_DIVIDE_PER_QUAD
592 if (interp == LP_INTERP_PERSPECTIVE) {
593 LLVMValueRef dwdq = bld->dadq[0][3];
597 oow = LLVMBuildShuffleVector(coeff_bld->builder,
598 bld->oow, coeff_bld->undef,
602 dadq = lp_build_sub(coeff_bld,
604 lp_build_mul(coeff_bld, a, dwdq));
605 dadq = lp_build_mul(coeff_bld, dadq, oow);
610 * Add the derivatives
613 a = lp_build_add(coeff_bld, a, dadq);
615 #if !PERSPECTIVE_DIVIDE_PER_QUAD
616 if (interp == LP_INTERP_PERSPECTIVE) {
618 LLVMValueRef w = bld->attribs[0][3];
620 assert(bld->mask[0] & TGSI_WRITEMASK_W);
621 oow = lp_build_rcp(coeff_bld, w);
623 a = lp_build_mul(coeff_bld, a, oow);
627 if (attrib == 0 && chan == 2) {
628 /* FIXME: Depth values can exceed 1.0, due to the fact that
629 * setup interpolation coefficients refer to (0,0) which causes
630 * precision loss. So we must clamp to 1.0 here to avoid artifacts
632 a = lp_build_min(coeff_bld, a, coeff_bld->one);
635 attrib_name(a, attrib, chan, "");
637 bld->attribs[attrib][chan] = a;
645 * Generate the position vectors.
647 * Parameter x0, y0 are the integer values with upper left coordinates.
650 pos_init(struct lp_build_interp_soa_context *bld,
654 LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
655 struct lp_build_context *coeff_bld = &bld->coeff_bld;
657 bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
658 bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
663 * Initialize fragment shader input attribute info.
666 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
667 struct gallivm_state *gallivm,
669 const struct lp_shader_input *inputs,
670 LLVMBuilderRef builder,
673 LLVMValueRef dadx_ptr,
674 LLVMValueRef dady_ptr,
678 struct lp_type coeff_type;
679 struct lp_type setup_type;
683 memset(bld, 0, sizeof *bld);
685 memset(&coeff_type, 0, sizeof coeff_type);
686 coeff_type.floating = TRUE;
687 coeff_type.sign = TRUE;
688 coeff_type.width = 32;
689 coeff_type.length = type.length;
691 memset(&setup_type, 0, sizeof setup_type);
692 setup_type.floating = TRUE;
693 setup_type.sign = TRUE;
694 setup_type.width = 32;
695 setup_type.length = TGSI_NUM_CHANNELS;
698 /* XXX: we don't support interpolating into any other types */
699 assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
701 lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
702 lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
704 /* For convenience */
705 bld->pos = bld->attribs[0];
706 bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
709 bld->mask[0] = TGSI_WRITEMASK_XYZW;
710 bld->interp[0] = LP_INTERP_LINEAR;
713 for (attrib = 0; attrib < num_inputs; ++attrib) {
714 bld->mask[1 + attrib] = inputs[attrib].usage_mask;
715 bld->interp[1 + attrib] = inputs[attrib].interp;
717 bld->num_attribs = 1 + num_inputs;
719 /* Ensure all masked out input channels have a valid value */
720 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
721 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
722 bld->attribs[attrib][chan] = bld->coeff_bld.undef;
726 pos_init(bld, x0, y0);
728 if (coeff_type.length > 4) {
729 bld->simple_interp = TRUE;
731 /* XXX this should use a global static table */
733 unsigned num_loops = 16 / type.length;
734 LLVMValueRef pixoffx, pixoffy, index;
737 bld->xoffset_store = lp_build_array_alloca(gallivm,
738 lp_build_vec_type(gallivm, type),
739 lp_build_const_int32(gallivm, num_loops),
741 bld->yoffset_store = lp_build_array_alloca(gallivm,
742 lp_build_vec_type(gallivm, type),
743 lp_build_const_int32(gallivm, num_loops),
745 for (i = 0; i < num_loops; i++) {
746 index = lp_build_const_int32(gallivm, i);
747 calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
748 ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");
749 LLVMBuildStore(builder, pixoffx, ptr);
750 ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");
751 LLVMBuildStore(builder, pixoffy, ptr);
754 coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
757 bld->simple_interp = FALSE;
758 coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
765 * Advance the position and inputs to the given quad within the block.
769 lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
770 struct gallivm_state *gallivm,
771 LLVMValueRef quad_start_index)
773 if (bld->simple_interp) {
774 attribs_update_simple(bld, gallivm, quad_start_index, 1, bld->num_attribs);
777 attribs_update(bld, gallivm, quad_start_index, 1, bld->num_attribs);
782 lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
783 struct gallivm_state *gallivm,
784 LLVMValueRef quad_start_index)
786 if (bld->simple_interp) {
787 attribs_update_simple(bld, gallivm, quad_start_index, 0, 1);
790 attribs_update(bld, gallivm, quad_start_index, 0, 1);