From 160bda24ed3b8f74d58cfcf55349c6d9e92fb442 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 8 Mar 2010 17:06:38 +1000 Subject: [PATCH] nv50: re-add immediate mode vertex submission paths --- src/gallium/drivers/nv50/Makefile | 3 +- src/gallium/drivers/nv50/nv50_context.h | 9 + src/gallium/drivers/nv50/nv50_push.c | 357 ++++++++++++++++++++++++++++++++ src/gallium/drivers/nv50/nv50_vbo.c | 38 +++- 4 files changed, 399 insertions(+), 8 deletions(-) create mode 100644 src/gallium/drivers/nv50/nv50_push.c diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile index 612aea28a34..5d622e1c13c 100644 --- a/src/gallium/drivers/nv50/Makefile +++ b/src/gallium/drivers/nv50/Makefile @@ -16,6 +16,7 @@ C_SOURCES = \ nv50_surface.c \ nv50_tex.c \ nv50_transfer.c \ - nv50_vbo.c + nv50_vbo.c \ + nv50_push.c include ../../Makefile.template diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 15f33fa4f4f..35abfba93b9 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -163,6 +163,8 @@ struct nv50_context { unsigned sampler_nr[PIPE_SHADER_TYPES]; struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; unsigned miptree_nr[PIPE_SHADER_TYPES]; + + unsigned vbo_fifo; }; static INLINE struct nv50_context * @@ -207,6 +209,13 @@ extern void nv50_draw_elements_instanced(struct pipe_context *pipe, extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso); extern struct nouveau_stateobj *nv50_vbo_validate(struct nv50_context *nv50); +/* nv50_push.c */ +extern void +nv50_push_elements_instanced(struct pipe_context *, struct pipe_buffer *, + unsigned idxsize, unsigned mode, unsigned start, + unsigned count, unsigned i_start, + unsigned i_count); + /* nv50_clear.c */ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil); diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c new file mode 100644 index 00000000000..b615f4e0549 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_push.c @@ -0,0 +1,357 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nouveau/nouveau_util.h" +#include "nv50_context.h" + +static INLINE unsigned +nv50_prim(unsigned mode) +{ + switch (mode) { + case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS; + case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES; + case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP; + case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP; + case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES; + case PIPE_PRIM_TRIANGLE_STRIP: + return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; + case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; + case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; + case PIPE_PRIM_LINES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY; + default: + break; + } + + NOUVEAU_ERR("invalid primitive type %d\n", mode); + return NV50TCL_VERTEX_BEGIN_POINTS; +} + +struct push_context { + struct nv50_context *nv50; + + unsigned vtx_size; + + void *idxbuf; + unsigned idxsize; + + float edgeflag; + int edgeflag_attr; + + struct { + void *map; + unsigned stride; + unsigned divisor; + unsigned step; + void (*push)(struct nouveau_channel *, void *); + } attr[16]; + unsigned attr_nr; +}; + +static void +emit_b32_1(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b32_2(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); +} + +static void +emit_b32_3(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); + OUT_RING(chan, v[2]); +} + +static void +emit_b32_4(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); + OUT_RING(chan, v[2]); + OUT_RING(chan, v[3]); +} + +static void +emit_b16_1(struct nouveau_channel *chan, void *data) +{ + uint16_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b16_3(struct nouveau_channel *chan, void *data) +{ + uint16_t *v = data; + + OUT_RING(chan, (v[1] << 16) | v[0]); + OUT_RING(chan, v[2]); +} + +static void +emit_b08_1(struct nouveau_channel *chan, void *data) +{ + uint8_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b08_3(struct nouveau_channel *chan, void *data) +{ + uint8_t *v = data; + + OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]); +} + +static INLINE void +emit_vertex(struct push_context *ctx, unsigned n) +{ + struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + int i; + + if (ctx->edgeflag_attr < 16) { + float *edgeflag = ctx->attr[ctx->edgeflag_attr].map + + ctx->attr[ctx->edgeflag_attr].stride * n; + + if (*edgeflag != ctx->edgeflag) { + BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, *edgeflag ? 1 : 0); + ctx->edgeflag = *edgeflag; + } + } + + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size); + for (i = 0; i < ctx->attr_nr; i++) + ctx->attr[i].push(chan, ctx->attr[i].map + ctx->attr[i].stride * n); +} + +static void +emit_edgeflag(void *priv, boolean enabled) +{ + struct push_context *ctx = priv; + struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + + BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, enabled ? 1 : 0); +} + +static void +emit_elt08(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + uint8_t *idxbuf = ctx->idxbuf; + + while (count--) + emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_elt16(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + uint16_t *idxbuf = ctx->idxbuf; + + while (count--) + emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_elt32(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + uint32_t *idxbuf = ctx->idxbuf; + + while (count--) + emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_verts(void *priv, unsigned start, unsigned count) +{ + while (count--) + emit_vertex(priv, start++); +} + +void +nv50_push_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, unsigned idxsize, + unsigned mode, unsigned start, unsigned count, + unsigned i_start, unsigned i_count) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + struct push_context ctx; + const unsigned p_overhead = 4 + /* begin/end */ + 4; /* potential edgeflag enable/disable */ + const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */ + 2; /* potential edgeflag modification */ + struct u_split_prim s; + unsigned vtx_size; + boolean nzi = FALSE; + int i; + + ctx.nv50 = nv50; + ctx.attr_nr = 0; + ctx.idxbuf = NULL; + ctx.vtx_size = 0; + ctx.edgeflag = 0.5f; + ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in; + + /* map vertex buffers, determine vertex size */ + for (i = 0; i < nv50->vtxelt->num_elements; i++) { + struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i]; + struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index]; + struct nouveau_bo *bo = nouveau_bo(vb->buffer); + unsigned size, nr_components, n; + + if (!(nv50->vbo_fifo & (1 << i))) + continue; + n = ctx.attr_nr++; + + if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) { + assert(bo->map); + return; + } + ctx.attr[n].map = bo->map + vb->buffer_offset + ve->src_offset; + nouveau_bo_unmap(bo); + + ctx.attr[n].stride = vb->stride; + ctx.attr[n].divisor = ve->instance_divisor; + if (ctx.attr[n].divisor) { + ctx.attr[n].step = i_start % ve->instance_divisor; + ctx.attr[n].map += i_start * vb->stride; + } + + size = util_format_get_component_bits(ve->src_format, + UTIL_FORMAT_COLORSPACE_RGB, 0); + nr_components = util_format_get_nr_components(ve->src_format); + switch (size) { + case 8: + switch (nr_components) { + case 1: ctx.attr[n].push = emit_b08_1; break; + case 2: ctx.attr[n].push = emit_b16_1; break; + case 3: ctx.attr[n].push = emit_b08_3; break; + case 4: ctx.attr[n].push = emit_b32_1; break; + } + ctx.vtx_size++; + break; + case 16: + switch (nr_components) { + case 1: ctx.attr[n].push = emit_b16_1; break; + case 2: ctx.attr[n].push = emit_b32_1; break; + case 3: ctx.attr[n].push = emit_b16_3; break; + case 4: ctx.attr[n].push = emit_b32_2; break; + } + ctx.vtx_size += (nr_components + 1) >> 1; + break; + case 32: + switch (nr_components) { + case 1: ctx.attr[n].push = emit_b32_1; break; + case 2: ctx.attr[n].push = emit_b32_2; break; + case 3: ctx.attr[n].push = emit_b32_3; break; + case 4: ctx.attr[n].push = emit_b32_4; break; + } + ctx.vtx_size += nr_components; + break; + default: + assert(0); + return; + } + } + vtx_size = ctx.vtx_size + v_overhead; + + /* map index buffer, if present */ + if (idxbuf) { + struct nouveau_bo *bo = nouveau_bo(idxbuf); + + if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) { + assert(bo->map); + return; + } + ctx.idxbuf = bo->map; + ctx.idxsize = idxsize; + nouveau_bo_unmap(bo); + } + + s.priv = &ctx; + s.edge = emit_edgeflag; + if (idxbuf) { + if (idxsize == 1) + s.emit = emit_elt08; + else + if (idxsize == 2) + s.emit = emit_elt16; + else + s.emit = emit_elt32; + } else + s.emit = emit_verts; + + /* per-instance loop */ + BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); + OUT_RING (chan, NV50_CB_AUX | (24 << 8)); + OUT_RING (chan, i_start); + while (i_count--) { + unsigned max_verts; + boolean done; + + for (i = 0; i < ctx.attr_nr; i++) { + if (!ctx.attr[i].divisor || + ctx.attr[i].divisor != ++ctx.attr[i].step) + continue; + ctx.attr[i].step = 0; + ctx.attr[i].map += ctx.attr[i].stride; + } + + u_split_prim_init(&s, mode, start, count); + do { + if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) { + FIRE_RING(chan); + if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) { + assert(0); + return; + } + } + + max_verts = AVAIL_RING(chan); + max_verts -= p_overhead; + max_verts /= vtx_size; + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0)); + done = u_split_prim_next(&s, max_verts); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + } while (!done); + + nzi = TRUE; + } +} diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 8f3c1aaf465..9d49ad6db24 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -220,6 +220,13 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe, if (!nv50_state_validate(nv50, 10 + 16*3)) return; + if (nv50->vbo_fifo) { + nv50_push_elements_instanced(pipe, NULL, 0, mode, start, + count, startInstance, + instanceCount); + return; + } + BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); OUT_RING (chan, NV50_CB_AUX | (24 << 8)); OUT_RING (chan, startInstance); @@ -422,18 +429,23 @@ nv50_draw_elements_instanced(struct pipe_context *pipe, struct instance a[16]; unsigned prim = nv50_prim(mode); - if (!(indexBuffer->usage & PIPE_BUFFER_USAGE_INDEX) || - indexSize == 1) { + instance_init(nv50, a, startInstance); + if (!nv50_state_validate(nv50, 13 + 16*3)) + return; + + if (nv50->vbo_fifo) { + nv50_push_elements_instanced(pipe, indexBuffer, indexSize, + mode, start, count, startInstance, + instanceCount); + return; + } else + if (!(indexBuffer->usage & PIPE_BUFFER_USAGE_INDEX) || indexSize == 1) { nv50_draw_elements_inline(pipe, indexBuffer, indexSize, mode, start, count, startInstance, instanceCount); return; } - instance_init(nv50, a, startInstance); - if (!nv50_state_validate(nv50, 13 + 16*3)) - return; - BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); OUT_RING (chan, NV50_CB_AUX | (24 << 8)); OUT_RING (chan, startInstance); @@ -570,7 +582,9 @@ nv50_vbo_validate(struct nv50_context *nv50) if (nv50->vtxbuf_nr == 0) return NULL; - assert(!NV50_USING_LOATHED_EDGEFLAG(nv50)); + if (NV50_USING_LOATHED_EDGEFLAG(nv50)) + nv50->vbo_fifo = 0xffff; + nv50->vbo_fifo = 0xffff; n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr); @@ -593,6 +607,16 @@ nv50_vbo_validate(struct nv50_context *nv50) so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); so_data (vtxbuf, 0); + + nv50->vbo_fifo &= ~(1 << i); + continue; + } + + if (nv50->vbo_fifo) { + so_data (vtxfmt, hw | (ve->instance_divisor ? (1 << 4) : i)); + so_method(vtxbuf, tesla, + NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); + so_data (vtxbuf, 0); continue; } -- 2.11.0