From f320176b71c8f69aa811a6fefa9d23fdf924cd23 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 14 Mar 2010 20:52:18 +0100 Subject: [PATCH] nvfx: so->RING_3D: fb --- src/gallium/drivers/nvfx/nvfx_context.h | 12 ++ src/gallium/drivers/nvfx/nvfx_state_emit.c | 4 +- src/gallium/drivers/nvfx/nvfx_state_fb.c | 178 +++++++++++++++++------------ 3 files changed, 117 insertions(+), 77 deletions(-) diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 46b5db03648..371c61f54dc 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -111,6 +111,12 @@ struct nvfx_vtxelt_state { unsigned num_elements; }; +struct nvfx_render_target { + struct nouveau_bo* bo; + unsigned offset; + unsigned pitch; +}; + struct nvfx_context { struct pipe_context pipe; @@ -169,6 +175,8 @@ struct nvfx_context { unsigned hw_vtxelt_nr; uint8_t hw_samplers; uint32_t hw_txf[8]; + struct nvfx_render_target hw_rt[4]; + struct nvfx_render_target hw_zeta; }; static INLINE struct nvfx_context * @@ -218,6 +226,10 @@ extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe, unsigned ib_size, unsigned mode, unsigned start, unsigned count); +/* nvfx_fb.c */ +void +nvfx_framebuffer_relocate(struct nvfx_context *nvfx); + /* nvfx_fragprog.c */ extern void nvfx_fragprog_destroy(struct nvfx_context *, struct nvfx_fragment_program *); diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 0709c563505..2bc7927a697 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -98,9 +98,7 @@ nvfx_state_emit(struct nvfx_context *nvfx) void nvfx_state_relocate(struct nvfx_context *nvfx) { - struct nouveau_channel *chan = nvfx->screen->base.channel; - struct nvfx_state *state = &nvfx->state; - so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FB]); + nvfx_framebuffer_relocate(nvfx); nvfx_fragtex_relocate(nvfx); nvfx_fragprog_relocate(nvfx); if (nvfx->render_mode == HW) diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c index c64ba27eaaa..93bc0a3feef 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_fb.c +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -4,17 +4,14 @@ -static boolean +static void nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) { struct pipe_framebuffer_state *fb = &nvfx->framebuffer; struct nouveau_channel *chan = nvfx->screen->base.channel; - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - struct nv04_surface *rt[4], *zeta = NULL; uint32_t rt_enable = 0, rt_format = 0; int i, colour_format = 0, zeta_format = 0; int depth_only = 0; - struct nouveau_stateobj *so = so_new(18, 24, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; @@ -26,14 +23,18 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) assert(fb->nr_cbufs <= 4); for (i = 0; i < fb->nr_cbufs; i++) { - if (colour_format) { + if (colour_format) assert(colour_format == fb->cbufs[i]->format); - } else { + else colour_format = fb->cbufs[i]->format; - rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); - rt[i] = (struct nv04_surface *)fb->cbufs[i]; - } + + rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); + nvfx->hw_rt[i].bo = nvfx_surface_buffer(fb->cbufs[i]); + nvfx->hw_rt[i].offset = fb->cbufs[i]->offset; + nvfx->hw_rt[i].pitch = ((struct nv04_surface *)fb->cbufs[i])->pitch; } + for(; i < 4; ++i) + nvfx->hw_rt[i].bo = 0; if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) @@ -41,20 +42,24 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) if (fb->zsbuf) { zeta_format = fb->zsbuf->format; - zeta = (struct nv04_surface *)fb->zsbuf; + nvfx->hw_zeta.bo = nvfx_surface_buffer(fb->zsbuf); + nvfx->hw_zeta.offset = fb->zsbuf->offset; + nvfx->hw_zeta.pitch = ((struct nv04_surface *)fb->zsbuf)->pitch; } + else + nvfx->hw_zeta.bo = 0; if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0 | NV34TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) { /* Render to at least a colour buffer */ - if (!(rt[0]->base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) { + if (!(fb->cbufs[0]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) { assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); for (i = 1; i < fb->nr_cbufs; i++) - assert(!(rt[i]->base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR)); + assert(!(fb->cbufs[i]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)); rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + (log2i(fb->cbufs[0]->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(fb->cbufs[0]->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); } else rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; @@ -62,17 +67,17 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) depth_only = 1; /* Render to depth buffer only */ - if (!(zeta->base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) { + if (!(fb->zsbuf->texture->_usage & NVFX_RESOURCE_FLAG_LINEAR)) { assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + (log2i(fb->zsbuf->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(fb->zsbuf->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); } else rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; } else { - return FALSE; + return; } switch (colour_format) { @@ -108,115 +113,140 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) if ((!nvfx->is_nv4x) && colour_bits > zeta_bits) { /* TODO: does this limitation really exist? TODO: can it be worked around somehow? */ - return FALSE; + assert(0); } if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) || ((!nvfx->is_nv4x) && depth_only)) { - struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]); + struct nvfx_render_target *rt0 = (depth_only ? &nvfx->hw_zeta : &nvfx->hw_rt[0]); uint32_t pitch = rt0->pitch; if(!nvfx->is_nv4x) { - if (zeta) { - pitch |= (zeta->pitch << 16); + if (nvfx->hw_zeta.bo) { + pitch |= (nvfx->hw_zeta.pitch << 16); } else { pitch |= (pitch << 16); } } - so_method(so, eng3d, NV34TCL_DMA_COLOR0, 1); - so_reloc (so, nvfx_surface_buffer(&rt0->base), 0, + OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 1)); + OUT_RELOC(chan, rt0->bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV34TCL_COLOR0_PITCH, 2); - so_data (so, pitch); - so_reloc (so, nvfx_surface_buffer(&rt[0]->base), - rt0->base.offset, rt_flags | NOUVEAU_BO_LOW, + OUT_RING(chan, RING_3D(NV34TCL_COLOR0_PITCH, 2)); + OUT_RING(chan, pitch); + OUT_RELOC(chan, rt0->bo, + rt0->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); } if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { - so_method(so, eng3d, NV34TCL_DMA_COLOR1, 1); - so_reloc (so, nvfx_surface_buffer(&rt[1]->base), 0, + OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR1, 1)); + OUT_RELOC(chan, nvfx->hw_rt[1].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV34TCL_COLOR1_OFFSET, 2); - so_reloc (so, nvfx_surface_buffer(&rt[1]->base), - rt[1]->base.offset, rt_flags | NOUVEAU_BO_LOW, + OUT_RING(chan, RING_3D(NV34TCL_COLOR1_OFFSET, 2)); + OUT_RELOC(chan, nvfx->hw_rt[1].bo, + nvfx->hw_rt[1].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_data (so, rt[1]->pitch); + OUT_RING(chan, nvfx->hw_rt[1].pitch); } if(nvfx->is_nv4x) { if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { - so_method(so, eng3d, NV40TCL_DMA_COLOR2, 1); - so_reloc (so, nvfx_surface_buffer(&rt[2]->base), 0, + OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR2, 1)); + OUT_RELOC(chan, nvfx->hw_rt[2].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV40TCL_COLOR2_OFFSET, 1); - so_reloc (so, nvfx_surface_buffer(&rt[2]->base), - rt[2]->base.offset, rt_flags | NOUVEAU_BO_LOW, + OUT_RING(chan, RING_3D(NV40TCL_COLOR2_OFFSET, 1)); + OUT_RELOC(chan, nvfx->hw_rt[2].bo, + nvfx->hw_rt[2].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_method(so, eng3d, NV40TCL_COLOR2_PITCH, 1); - so_data (so, rt[2]->pitch); + OUT_RING(chan, RING_3D(NV40TCL_COLOR2_PITCH, 1)); + OUT_RING(chan, nvfx->hw_rt[2].pitch); } if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { - so_method(so, eng3d, NV40TCL_DMA_COLOR3, 1); - so_reloc (so, nvfx_surface_buffer(&rt[3]->base), 0, + OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR3, 1)); + OUT_RELOC(chan, nvfx->hw_rt[3].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV40TCL_COLOR3_OFFSET, 1); - so_reloc (so, nvfx_surface_buffer(&rt[3]->base), - rt[3]->base.offset, rt_flags | NOUVEAU_BO_LOW, + OUT_RING(chan, RING_3D(NV40TCL_COLOR3_OFFSET, 1)); + OUT_RELOC(chan, nvfx->hw_rt[3].bo, + nvfx->hw_rt[3].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_method(so, eng3d, NV40TCL_COLOR3_PITCH, 1); - so_data (so, rt[3]->pitch); + OUT_RING(chan, RING_3D(NV40TCL_COLOR3_PITCH, 1)); + OUT_RING(chan, nvfx->hw_rt[3].pitch); } } if (zeta_format) { - so_method(so, eng3d, NV34TCL_DMA_ZETA, 1); - so_reloc (so, nvfx_surface_buffer(&zeta->base), 0, + OUT_RING(chan, RING_3D(NV34TCL_DMA_ZETA, 1)); + OUT_RELOC(chan, nvfx->hw_zeta.bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV34TCL_ZETA_OFFSET, 1); + OUT_RING(chan, RING_3D(NV34TCL_ZETA_OFFSET, 1)); /* TODO: reverse engineer LMA */ - so_reloc (so, nvfx_surface_buffer(&zeta->base), - zeta->base.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, nvfx->hw_zeta.bo, + nvfx->hw_zeta.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); if(nvfx->is_nv4x) { - so_method(so, eng3d, NV40TCL_ZETA_PITCH, 1); - so_data (so, zeta->pitch); + OUT_RING(chan, RING_3D(NV40TCL_ZETA_PITCH, 1)); + OUT_RING(chan, nvfx->hw_zeta.pitch); } } - so_method(so, eng3d, NV34TCL_RT_ENABLE, 1); - so_data (so, rt_enable); - so_method(so, eng3d, NV34TCL_RT_HORIZ, 3); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_data (so, rt_format); - so_method(so, eng3d, NV34TCL_VIEWPORT_HORIZ, 2); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_method(so, eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); - so_data (so, ((w - 1) << 16) | 0); - so_data (so, ((h - 1) << 16) | 0); - so_method(so, eng3d, 0x1d88, 1); - so_data (so, (1 << 12) | h); + OUT_RING(chan, RING_3D(NV34TCL_RT_ENABLE, 1)); + OUT_RING(chan, rt_enable); + OUT_RING(chan, RING_3D(NV34TCL_RT_HORIZ, 3)); + OUT_RING(chan, (w << 16) | 0); + OUT_RING(chan, (h << 16) | 0); + OUT_RING(chan, rt_format); + OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_HORIZ, 2)); + OUT_RING(chan, (w << 16) | 0); + OUT_RING(chan, (h << 16) | 0); + OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2)); + OUT_RING(chan, ((w - 1) << 16) | 0); + OUT_RING(chan, ((h - 1) << 16) | 0); + OUT_RING(chan, RING_3D(0x1d88, 1)); + OUT_RING(chan, (1 << 12) | h); if(!nvfx->is_nv4x) { /* Wonder why this is needed, context should all be set to zero on init */ /* TODO: we can most likely remove this, after putting it in context init */ - so_method(so, eng3d, NV34TCL_VIEWPORT_TX_ORIGIN, 1); - so_data (so, 0); + OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TX_ORIGIN, 1)); + OUT_RING(chan, 0); } +} + +void +nvfx_framebuffer_relocate(struct nvfx_context *nvfx) +{ + struct nouveau_channel *chan = nvfx->screen->base.channel; + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + rt_flags |= NOUVEAU_BO_DUMMY; + MARK_RING(chan, 20, 20); + +#define DO_(var, pfx, name) \ + if(var.bo) { \ + OUT_RELOC(chan, var.bo, RING_3D(pfx##TCL_DMA_##name, 1), rt_flags, 0, 0); \ + OUT_RELOC(chan, var.bo, 0, \ + rt_flags | NOUVEAU_BO_OR, \ + chan->vram->handle, chan->gart->handle); \ + OUT_RELOC(chan, var.bo, RING_3D(pfx##TCL_##name##_OFFSET, 1), rt_flags, 0, 0); \ + OUT_RELOC(chan, var.bo, \ + var.offset, rt_flags | NOUVEAU_BO_LOW, \ + 0, 0); \ + } + +#define DO(pfx, num) DO_(nvfx->hw_rt[num], pfx, COLOR##num) + DO(NV34, 0); + DO(NV34, 1); + DO(NV40, 2); + DO(NV40, 3); - so_ref(so, &nvfx->state.hw[NVFX_STATE_FB]); - so_ref(NULL, &so); - return TRUE; + DO_(nvfx->hw_zeta, NV34, ZETA); } struct nvfx_state_entry nvfx_state_framebuffer = { -- 2.11.0