1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * Triangle rendering within a tile.
32 #include <transpose_matrix4x4.h>
33 #include "pipe/p_compiler.h"
34 #include "pipe/p_format.h"
35 #include "util/u_math.h"
36 #include "spu_colorpack.h"
38 #include "spu_texture.h"
41 #include "spu_per_fragment_op.h"
44 /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
45 typedef vector unsigned int mask_t;
55 * Simplified types taken from other parts of Gallium
57 struct vertex_header {
65 #define CEILF(X) ((float) (int) ((X) + 0.99999))
68 #define QUAD_TOP_LEFT 0
69 #define QUAD_TOP_RIGHT 1
70 #define QUAD_BOTTOM_LEFT 2
71 #define QUAD_BOTTOM_RIGHT 3
72 #define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
73 #define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
74 #define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
75 #define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
85 float dx; /**< X(v1) - X(v0), used only during setup */
86 float dy; /**< Y(v1) - Y(v0), used only during setup */
87 float dxdy; /**< dx/dy */
88 float sx, sy; /**< first sample point coord */
89 int lines; /**< number of lines on this edge */
102 * Triangle setup info (derived from draw_stage).
103 * Also used for line drawing (taking some liberties).
107 /* Vertices are just an array of floats making up each attribute in
108 * turn. Currently fixed at 4 floats, but should change in time.
109 * Codegen will help cope with this.
111 const struct vertex_header *vmax;
112 const struct vertex_header *vmid;
113 const struct vertex_header *vmin;
114 const struct vertex_header *vprovoke;
124 int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
127 struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
129 struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
133 struct quad_header quad;
137 int left[2]; /**< [0] = row0, [1] = row1 */
141 unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
147 static struct setup_stage setup;
154 * Basically a cast wrapper.
156 static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
158 return (struct setup_stage *)stage;
164 * Clip setup.quad against the scissor/surface bounds.
167 quad_clip(struct setup_stage *setup)
169 const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect;
170 const int minx = (int) cliprect->minx;
171 const int maxx = (int) cliprect->maxx;
172 const int miny = (int) cliprect->miny;
173 const int maxy = (int) cliprect->maxy;
175 if (setup.quad.x0 >= maxx ||
176 setup.quad.y0 >= maxy ||
177 setup.quad.x0 + 1 < minx ||
178 setup.quad.y0 + 1 < miny) {
179 /* totally clipped */
180 setup.quad.mask = 0x0;
183 if (setup.quad.x0 < minx)
184 setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
185 if (setup.quad.y0 < miny)
186 setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
187 if (setup.quad.x0 == maxx - 1)
188 setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
189 if (setup.quad.y0 == maxy - 1)
190 setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
196 * Emit a quad (pass to next stage) with clipping.
199 clip_emit_quad(struct setup_stage *setup)
202 if (setup.quad.mask) {
203 struct softpipe_context *sp = setup.softpipe;
204 sp->quad.first->run(sp->quad.first, &setup.quad);
210 * Evaluate attribute coefficients (plane equations) to compute
211 * attribute values for the four fragments in a quad.
212 * Eg: four colors will be computed (in AoS format).
215 eval_coeff(uint slot, float x, float y, vector float result[4])
217 switch (spu.vertex_info.interp_mode[slot]) {
218 case INTERP_CONSTANT:
219 result[QUAD_TOP_LEFT] =
220 result[QUAD_TOP_RIGHT] =
221 result[QUAD_BOTTOM_LEFT] =
222 result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v;
226 /* fall-through, for now */
229 register vector float dadx = setup.coef[slot].dadx.v;
230 register vector float dady = setup.coef[slot].dady.v;
231 register vector float topLeft
232 = spu_add(setup.coef[slot].a0.v,
233 spu_add(spu_mul(spu_splats(x), dadx),
234 spu_mul(spu_splats(y), dady)));
236 result[QUAD_TOP_LEFT] = topLeft;
237 result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx);
238 result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady);
239 result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady);
245 static INLINE vector float
246 eval_z(float x, float y)
249 const float dzdx = setup.coef[slot].dadx.f[2];
250 const float dzdy = setup.coef[slot].dady.f[2];
251 const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy;
252 const vector float topLeftv = spu_splats(topLeft);
253 const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy };
254 return spu_add(topLeftv, derivs);
259 do_depth_test(int x, int y, mask_t quadmask)
264 if (spu.fb.depth_format == PIPE_FORMAT_NONE)
267 zvals.v = eval_z((float) x, (float) y);
269 mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx,
270 y - setup.cliprect_miny,
273 (qword) spu_splats((unsigned char) 0x0ffu),
274 (qword) spu_splats((unsigned int) 0x01u));
276 if (spu_extract(spu_orx(mask), 0))
277 spu.cur_ztile_status = TILE_STATUS_DIRTY;
284 * Emit a quad (pass to next stage). No clipping is done.
285 * Note: about 1/5 to 1/7 of the time, mask is zero and this function
286 * should be skipped. But adding the test for that slows things down
290 emit_quad( int x, int y, mask_t mask )
293 struct softpipe_context *sp = setup.softpipe;
296 setup.quad.mask = mask;
297 sp->quad.first->run(sp->quad.first, &setup.quad);
300 #define NEW_FRAGMENT_FUNCTION 01
301 #if !NEW_FRAGMENT_FUNCTION
302 if (spu.read_depth) {
303 mask = do_depth_test(x, y, mask);
307 /* If any bits in mask are set... */
308 if (spu_extract(spu_orx(mask), 0)) {
309 const int ix = x - setup.cliprect_minx;
310 const int iy = y - setup.cliprect_miny;
311 vector float colors[4];
313 spu.cur_ctile_status = TILE_STATUS_DIRTY;
314 spu.cur_ztile_status = TILE_STATUS_DIRTY;
316 if (spu.texture[0].start) {
317 /* texture mapping */
319 vector float texcoords[4];
320 eval_coeff(2, (float) x, (float) y, texcoords);
322 if (spu_extract(mask, 0))
323 colors[0] = spu.sample_texture[unit](unit, texcoords[0]);
324 if (spu_extract(mask, 1))
325 colors[1] = spu.sample_texture[unit](unit, texcoords[1]);
326 if (spu_extract(mask, 2))
327 colors[2] = spu.sample_texture[unit](unit, texcoords[2]);
328 if (spu_extract(mask, 3))
329 colors[3] = spu.sample_texture[unit](unit, texcoords[3]);
332 if (spu.texture[1].start) {
333 /* multi-texture mapping */
335 vector float colors1[4];
337 eval_coeff(2, (float) x, (float) y, texcoords);
339 if (spu_extract(mask, 0))
340 colors1[0] = spu.sample_texture[unit](unit, texcoords[0]);
341 if (spu_extract(mask, 1))
342 colors1[1] = spu.sample_texture[unit](unit, texcoords[1]);
343 if (spu_extract(mask, 2))
344 colors1[2] = spu.sample_texture[unit](unit, texcoords[2]);
345 if (spu_extract(mask, 3))
346 colors1[3] = spu.sample_texture[unit](unit, texcoords[3]);
348 /* hack: modulate first texture by second */
349 colors[0] = spu_mul(colors[0], colors1[0]);
350 colors[1] = spu_mul(colors[1], colors1[1]);
351 colors[2] = spu_mul(colors[2], colors1[2]);
352 colors[3] = spu_mul(colors[3], colors1[3]);
358 eval_coeff(1, (float) x, (float) y, colors);
362 #if NEW_FRAGMENT_FUNCTION
364 /* Convert fragment data from AoS to SoA format.
365 * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
368 vector float soa_frag[4];
369 _transpose_matrix4x4(soa_frag, colors);
373 fragZ.v = eval_z((float) x, (float) y);
375 /* Do all per-fragment/quad operations here, including:
376 * alpha test, z test, stencil test, blend and framebuffer writing.
378 spu.fragment_ops.func(ix, iy, &spu.ctile, &spu.ztile,
380 soa_frag[0], soa_frag[1],
381 soa_frag[2], soa_frag[3],
385 /* Convert fragment data from AoS to SoA format.
386 * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
389 _transpose_matrix4x4((vec_float4 *) soa_frag, colors);
391 /* Read the current framebuffer values.
393 const qword pix[4] = {
394 (qword) spu_splats(spu.ctile.ui[iy+0][ix+0]),
395 (qword) spu_splats(spu.ctile.ui[iy+0][ix+1]),
396 (qword) spu_splats(spu.ctile.ui[iy+1][ix+0]),
397 (qword) spu_splats(spu.ctile.ui[iy+1][ix+1]),
403 /* Convert pixel data from AoS to SoA format.
404 * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
406 vec_float4 aos_pix[4] = {
407 spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]),
408 spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]),
409 spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]),
410 spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]),
413 _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix);
417 struct spu_blend_results result =
418 (*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3],
419 soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3],
420 spu.const_blend_color[0], spu.const_blend_color[1],
421 spu.const_blend_color[2], spu.const_blend_color[3]);
424 /* Convert final pixel data from SoA to AoS format.
425 * I.e. (RRRR,GGGG,BBBB,AAAA) -> (RGBA,RGBA,RGBA,RGBA)
427 result = (*spu.logicop)(pix[0], pix[1], pix[2], pix[3],
428 result.r, result.g, result.b, result.a,
431 spu.ctile.ui[iy+0][ix+0] = spu_extract((vec_uint4) result.r, 0);
432 spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0);
433 spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0);
434 spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0);
436 #endif /* NEW_FRAGMENT_FUNCTION */
444 * Given an X or Y coordinate, return the block/quad coordinate that it
447 static INLINE int block( int x )
454 * Compute mask which indicates which pixels in the 2x2 quad are actually inside
455 * the triangle's bounds.
456 * The mask is a uint4 vector and each element will be 0 or 0xffffffff.
458 static INLINE mask_t calculate_mask( int x )
460 /* This is a little tricky.
461 * Use & instead of && to avoid branches.
462 * Use negation to convert true/false to ~0/0 values.
465 mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0);
466 mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1);
467 mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2);
468 mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3);
474 * Render a horizontal span of quads
476 static void flush_spans( void )
478 int minleft, maxright;
481 switch (setup.span.y_flags) {
483 /* both odd and even lines written (both quad rows) */
484 minleft = MIN2(setup.span.left[0], setup.span.left[1]);
485 maxright = MAX2(setup.span.right[0], setup.span.right[1]);
489 /* only even line written (quad top row) */
490 minleft = setup.span.left[0];
491 maxright = setup.span.right[0];
495 /* only odd line written (quad bottom row) */
496 minleft = setup.span.left[1];
497 maxright = setup.span.right[1];
505 /* OK, we're very likely to need the tile data now.
506 * clear or finish waiting if needed.
508 if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
509 /* wait for mfc_get() to complete */
510 //printf("SPU: %u: waiting for ctile\n", spu.init.id);
511 wait_on_mask(1 << TAG_READ_TILE_COLOR);
512 spu.cur_ctile_status = TILE_STATUS_CLEAN;
514 else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
515 //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
516 clear_c_tile(&spu.ctile);
517 spu.cur_ctile_status = TILE_STATUS_DIRTY;
519 ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
521 if (spu.read_depth) {
522 if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
523 /* wait for mfc_get() to complete */
524 //printf("SPU: %u: waiting for ztile\n", spu.init.id);
525 wait_on_mask(1 << TAG_READ_TILE_Z);
526 spu.cur_ztile_status = TILE_STATUS_CLEAN;
528 else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) {
529 //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
530 clear_z_tile(&spu.ztile);
531 spu.cur_ztile_status = TILE_STATUS_DIRTY;
533 ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
536 /* XXX this loop could be moved into the above switch cases and
537 * calculate_mask() could be simplified a bit...
539 for (x = block(minleft); x <= block(maxright); x += 2) {
541 emit_quad( x, setup.span.y, calculate_mask( x ) );
546 setup.span.y_flags = 0;
547 setup.span.right[0] = 0;
548 setup.span.right[1] = 0;
552 static void print_vertex(const struct vertex_header *v)
555 fprintf(stderr, "Vertex: (%p)\n", v);
556 for (i = 0; i < setup.quad.nr_attrs; i++) {
557 fprintf(stderr, " %d: %f %f %f %f\n", i,
558 v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]);
564 static boolean setup_sort_vertices(const struct vertex_header *v0,
565 const struct vertex_header *v1,
566 const struct vertex_header *v2)
570 fprintf(stderr, "Triangle:\n");
578 /* determine bottom to top order of vertices */
580 float y0 = spu_extract(v0->data[0], 1);
581 float y1 = spu_extract(v1->data[0], 1);
582 float y2 = spu_extract(v2->data[0], 1);
625 /* Check if triangle is completely outside the tile bounds */
626 if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy)
628 if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny)
630 if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx &&
631 spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx &&
632 spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx)
634 if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx &&
635 spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx &&
636 spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx)
639 setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
640 setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
641 setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
642 setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
643 setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0);
644 setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1);
647 * Compute triangle's area. Use 1/area to compute partial
648 * derivatives of attributes later.
650 * The area will be the same as prim->det, but the sign may be
651 * different depending on how the vertices get sorted above.
653 * To determine whether the primitive is front or back facing we
654 * use the prim->det value because its sign is correct.
657 const float area = (setup.emaj.dx * setup.ebot.dy -
658 setup.ebot.dx * setup.emaj.dy);
660 setup.oneoverarea = 1.0f / area;
662 _mesa_printf("%s one-over-area %f area %f det %f\n",
663 __FUNCTION__, setup.oneoverarea, area, prim->det );
668 /* We need to know if this is a front or back-facing triangle for:
669 * - the GLSL gl_FrontFacing fragment attribute (bool)
670 * - two-sided stencil test
672 setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
680 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
681 * The value value comes from vertex->data[slot].
682 * The result will be put into setup.coef[slot].a0.
683 * \param slot which attribute slot
686 const_coeff(uint slot)
688 setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0};
689 setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0};
690 setup.coef[slot].a0.v = setup.vprovoke->data[slot];
695 * Compute a0, dadx and dady for a linearly interpolated coefficient,
699 tri_linear_coeff(uint slot, uint firstComp, uint lastComp)
702 const float *vmin_d = (float *) &setup.vmin->data[slot];
703 const float *vmid_d = (float *) &setup.vmid->data[slot];
704 const float *vmax_d = (float *) &setup.vmax->data[slot];
705 const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f;
706 const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
708 for (i = firstComp; i < lastComp; i++) {
709 float botda = vmid_d[i] - vmin_d[i];
710 float majda = vmax_d[i] - vmin_d[i];
711 float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
712 float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
714 ASSERT(slot < PIPE_MAX_SHADER_INPUTS);
716 setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
717 setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
719 /* calculate a0 as the value which would be sampled for the
720 * fragment at (0,0), taking into account that we want to sample at
721 * pixel centers, in other words (0.5, 0.5).
723 * this is neat but unfortunately not a good way to do things for
724 * triangles with very large values of dadx or dady as it will
725 * result in the subtraction and re-addition from a0 of a very
726 * large number, which means we'll end up loosing a lot of the
727 * fractional bits and precision from a0. the way to fix this is
728 * to define a0 as the sample at a pixel center somewhere near vmin
729 * instead - i'll switch to this later.
731 setup.coef[slot].a0.f[i] = (vmin_d[i] -
732 (setup.coef[slot].dadx.f[i] * x +
733 setup.coef[slot].dady.f[i] * y));
737 _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
739 setup.coef[slot].a0[i],
740 setup.coef[slot].dadx.f[i],
741 setup.coef[slot].dady.f[i]);
747 * As above, but interp setup all four vector components.
750 tri_linear_coeff4(uint slot)
752 const vector float vmin_d = setup.vmin->data[slot];
753 const vector float vmid_d = setup.vmid->data[slot];
754 const vector float vmax_d = setup.vmax->data[slot];
755 const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
756 const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
758 vector float botda = vmid_d - vmin_d;
759 vector float majda = vmax_d - vmin_d;
761 vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
762 spu_mul(botda, spu_splats(setup.emaj.dy)));
763 vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
764 spu_mul(majda, spu_splats(setup.ebot.dx)));
766 setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea));
767 setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea));
769 vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx);
770 vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy);
772 setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy));
779 * Compute a0, dadx and dady for a perspective-corrected interpolant,
781 * We basically multiply the vertex value by 1/w before computing
782 * the plane coefficients (a0, dadx, dady).
783 * Later, when we compute the value at a particular fragment position we'll
784 * divide the interpolated value by the interpolated W at that fragment.
786 static void tri_persp_coeff( unsigned slot,
789 /* premultiply by 1/w:
791 float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3];
792 float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3];
793 float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3];
795 float botda = mida - mina;
796 float majda = maxa - mina;
797 float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
798 float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
801 printf("tri persp %d,%d: %f %f %f\n", slot, i,
802 setup.vmin->data[slot][i],
803 setup.vmid->data[slot][i],
804 setup.vmax->data[slot][i]
808 assert(slot < PIPE_MAX_SHADER_INPUTS);
811 setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
812 setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
813 setup.coef[slot].a0.f[i] = (mina -
814 (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) +
815 setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f)));
821 * Compute the setup.coef[] array dadx, dady, a0 values.
822 * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
824 static void setup_tri_coefficients(void)
829 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
830 switch (spu.vertex_info.interp_mode[i]) {
834 /*tri_linear_coeff(i, 2, 3);*/
835 /* XXX interp W if PERSPECTIVE... */
836 tri_linear_coeff4(i);
838 case INTERP_CONSTANT:
842 tri_linear_coeff4(i);
844 case INTERP_PERSPECTIVE:
845 tri_linear_coeff4(i); /* temporary */
852 ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS);
853 ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR ||
854 spu.vertex_info.interp_mode[1] == INTERP_CONSTANT);
855 tri_linear_coeff(0, 2, 3); /* slot 0, z */
856 tri_linear_coeff(1, 0, 4); /* slot 1, color */
861 static void setup_tri_edges(void)
863 float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
864 float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
866 float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
867 float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f;
868 float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f;
870 setup.emaj.sy = CEILF(vmin_y);
871 setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
872 setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
873 setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
875 setup.etop.sy = CEILF(vmid_y);
876 setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
877 setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
878 setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
880 setup.ebot.sy = CEILF(vmin_y);
881 setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
882 setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
883 setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
888 * Render the upper or lower half of a triangle.
889 * Scissoring/cliprect is applied here too.
891 static void subtriangle( struct edge *eleft,
895 const int minx = setup.cliprect_minx;
896 const int maxx = setup.cliprect_maxx;
897 const int miny = setup.cliprect_miny;
898 const int maxy = setup.cliprect_maxy;
899 int y, start_y, finish_y;
900 int sy = (int)eleft->sy;
902 ASSERT((int)eleft->sy == (int) eright->sy);
904 /* clip top/bottom */
906 finish_y = sy + lines;
918 _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
921 for (y = start_y; y < finish_y; y++) {
923 /* avoid accumulating adds as floats don't have the precision to
924 * accurately iterate large triangle edges that way. luckily we
925 * can just multiply these days.
927 * this is all drowned out by the attribute interpolation anyway.
929 int left = (int)(eleft->sx + y * eleft->dxdy);
930 int right = (int)(eright->sx + y * eright->dxdy);
932 /* clip left/right */
940 if (block(_y) != setup.span.y) {
942 setup.span.y = block(_y);
945 setup.span.left[_y&1] = left;
946 setup.span.right[_y&1] = right;
947 setup.span.y_flags |= 1<<(_y&1);
952 /* save the values so that emaj can be restarted:
954 eleft->sx += lines * eleft->dxdy;
955 eright->sx += lines * eright->dxdy;
962 * Draw triangle into tile at (tx, ty) (tile coords)
963 * The tile data should have already been fetched.
966 tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty)
971 /* set clipping bounds to tile bounds */
972 setup.cliprect_minx = tx * TILE_SIZE;
973 setup.cliprect_miny = ty * TILE_SIZE;
974 setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
975 setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
977 if (!setup_sort_vertices((struct vertex_header *) v0,
978 (struct vertex_header *) v1,
979 (struct vertex_header *) v2)) {
980 return FALSE; /* totally clipped */
983 setup_tri_coefficients();
987 setup.span.y_flags = 0;
988 setup.span.right[0] = 0;
989 setup.span.right[1] = 0;
990 /* setup.span.z_mode = tri_z_mode( setup.ctx ); */
992 /* init_constant_attribs( setup ); */
994 if (setup.oneoverarea < 0.0) {
997 subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
998 subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
1003 subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
1004 subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );