From a9971e85d9a4038645bdc7496d73906fc324b805 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 30 Aug 2015 14:13:10 +0200 Subject: [PATCH] radeonsi: rework uploading border colors MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The border colors are uploaded only once when the state is created. This brings truly immutable sampler descriptors, because they don't have to be updated every time a sampler state is re-bound. It also moves the TA_BC_BASE_ADDR registers to init_config, removing one more state. The catch is there is now a limit: only 4096 border colors can be used by one context. I don't think that will be a problem. Reviewed-by: Alex Deucher Acked-by: Christian König --- src/gallium/drivers/radeonsi/si_descriptors.c | 9 +- src/gallium/drivers/radeonsi/si_pipe.c | 23 ++++- src/gallium/drivers/radeonsi/si_pipe.h | 8 +- src/gallium/drivers/radeonsi/si_state.c | 124 +++++++++----------------- src/gallium/drivers/radeonsi/si_state.h | 3 - 5 files changed, 75 insertions(+), 92 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 3041da621c3..92a7068e715 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -273,13 +273,17 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); } -void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader, - unsigned start, unsigned count, void **states) +static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, + unsigned start, unsigned count, void **states) { + struct si_context *sctx = (struct si_context *)ctx; struct si_sampler_states *samplers = &sctx->samplers[shader].states; struct si_sampler_state **sstates = (struct si_sampler_state**)states; int i; + if (!count || shader >= SI_NUM_SHADERS) + return; + if (start == 0) samplers->saved_states[0] = states[0]; if (start == 1) @@ -1022,6 +1026,7 @@ void si_init_all_descriptors(struct si_context *sctx) 4, SI_NUM_VERTEX_BUFFERS); /* Set pipe_context functions. */ + sctx->b.b.bind_sampler_states = si_bind_sampler_states; sctx->b.b.set_constant_buffer = si_set_constant_buffer; sctx->b.b.set_sampler_views = si_set_sampler_views; sctx->b.b.set_stream_output_targets = si_set_streamout_targets; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index da774789525..d68ea5fb31d 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -44,7 +44,8 @@ static void si_destroy_context(struct pipe_context *context) pipe_resource_reference(&sctx->gsvs_ring, NULL); pipe_resource_reference(&sctx->tf_ring, NULL); pipe_resource_reference(&sctx->null_const_buf.buffer, NULL); - r600_resource_reference(&sctx->border_color_table, NULL); + r600_resource_reference(&sctx->border_color_buffer, NULL); + free(sctx->border_color_table); r600_resource_reference(&sctx->scratch_buffer, NULL); sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL); @@ -139,6 +140,25 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sscreen->b.trace_bo->cs_buf : NULL); sctx->b.rings.gfx.flush = si_context_gfx_flush; + /* Border colors. */ + sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS * + sizeof(*sctx->border_color_table)); + if (!sctx->border_color_table) + goto fail; + + sctx->border_color_buffer = (struct r600_resource*) + pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, + SI_MAX_BORDER_COLORS * + sizeof(*sctx->border_color_table)); + if (!sctx->border_color_buffer) + goto fail; + + sctx->border_color_map = + ws->buffer_map(sctx->border_color_buffer->cs_buf, + NULL, PIPE_TRANSFER_WRITE); + if (!sctx->border_color_map) + goto fail; + si_init_all_descriptors(sctx); si_init_state_functions(sctx); si_init_shader_functions(sctx); @@ -197,6 +217,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, return &sctx->b.b; fail: + fprintf(stderr, "radeonsi: Failed to create a context.\n"); si_destroy_context(&sctx->b.b); return NULL; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 02d75f00f98..847853e59e9 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -79,6 +79,7 @@ #define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff) #define SI_MAX_VIEWPORTS 16 +#define SI_MAX_BORDER_COLORS 4096 struct si_compute; @@ -103,7 +104,6 @@ struct si_sampler_view { struct si_sampler_state { uint32_t val[4]; - uint32_t border_color[4]; }; struct si_cs_shader_state { @@ -219,8 +219,10 @@ struct si_context { struct pipe_resource *esgs_ring; struct pipe_resource *gsvs_ring; struct pipe_resource *tf_ring; - struct r600_resource *border_color_table; - unsigned border_color_offset; + union pipe_color_union *border_color_table; /* in CPU memory, any endian */ + struct r600_resource *border_color_buffer; + union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */ + unsigned border_color_count; /* Vertex and index buffers. */ bool vertex_buffers_dirty; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 52fa8fec033..e31895d6933 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2701,9 +2701,10 @@ static bool sampler_state_needs_border_color(const struct pipe_sampler_state *st static void *si_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { + struct si_context *sctx = (struct si_context *)ctx; struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0; - unsigned border_color_type; + unsigned border_color_type, border_color_index = 0; if (rstate == NULL) { return NULL; @@ -2726,9 +2727,38 @@ static void *si_create_sampler_state(struct pipe_context *ctx, state->border_color.f[2] == 1 && state->border_color.f[3] == 1) border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE; - else + else { + int i; + border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; + /* Check if the border has been uploaded already. */ + for (i = 0; i < sctx->border_color_count; i++) + if (memcmp(&sctx->border_color_table[i], &state->border_color, + sizeof(state->border_color)) == 0) + break; + + if (i >= SI_MAX_BORDER_COLORS) { + /* Getting 4096 unique border colors is very unlikely. */ + fprintf(stderr, "radeonsi: The border color table is full. " + "Any new border colors will be just black. " + "Please file a bug.\n"); + border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; + } else { + if (i == sctx->border_color_count) { + /* Upload a new border color. */ + memcpy(&sctx->border_color_table[i], &state->border_color, + sizeof(state->border_color)); + util_memcpy_cpu_to_le32(&sctx->border_color_map[i], + &state->border_color, + sizeof(state->border_color)); + sctx->border_color_count++; + } + + border_color_index = i; + } + } + rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | @@ -2742,89 +2772,11 @@ static void *si_create_sampler_state(struct pipe_context *ctx, S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) | S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) | S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter))); - rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type); - - if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { - memcpy(rstate->border_color, state->border_color.ui, - sizeof(rstate->border_color)); - } - + rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) | + S_008F3C_BORDER_COLOR_TYPE(border_color_type); return rstate; } -/* Upload border colors and update the pointers in resource descriptors. - * There can only be 4096 border colors per context. - * - * XXX: This is broken if the buffer gets reallocated. - */ -static void si_set_border_colors(struct si_context *sctx, unsigned count, - void **states) -{ - struct si_sampler_state **rstates = (struct si_sampler_state **)states; - uint32_t *border_color_table = NULL; - int i, j; - - for (i = 0; i < count; i++) { - if (rstates[i] && - G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) == - V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { - if (!sctx->border_color_table || - ((sctx->border_color_offset + count - i) & - C_008F3C_BORDER_COLOR_PTR)) { - r600_resource_reference(&sctx->border_color_table, NULL); - sctx->border_color_offset = 0; - - sctx->border_color_table = - si_resource_create_custom(&sctx->screen->b.b, - PIPE_USAGE_DYNAMIC, - 4096 * 4 * 4); - } - - if (!border_color_table) { - border_color_table = - sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf, - sctx->b.rings.gfx.cs, - PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_UNSYNCHRONIZED); - } - - for (j = 0; j < 4; j++) { - border_color_table[4 * sctx->border_color_offset + j] = - util_le32_to_cpu(rstates[i]->border_color[j]); - } - - rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR; - rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++); - } - } - - if (border_color_table) { - struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); - - uint64_t va_offset = sctx->border_color_table->gpu_address; - - si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8); - if (sctx->b.chip_class >= CIK) - si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40); - si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ, - RADEON_PRIO_SHADER_DATA); - si_pm4_set_state(sctx, ta_bordercolor_base, pm4); - } -} - -static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, - unsigned start, unsigned count, - void **states) -{ - struct si_context *sctx = (struct si_context *)ctx; - - if (!count || shader >= SI_NUM_SHADERS) - return; - - si_set_border_colors(sctx, count, states); - si_set_sampler_descriptors(sctx, shader, start, count, states); -} - static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) { struct si_context *sctx = (struct si_context *)ctx; @@ -3105,7 +3057,6 @@ void si_init_state_functions(struct si_context *sctx) sctx->b.b.get_sample_position = cayman_get_sample_position; sctx->b.b.create_sampler_state = si_create_sampler_state; - sctx->b.b.bind_sampler_states = si_bind_sampler_states; sctx->b.b.delete_sampler_state = si_delete_sampler_state; sctx->b.b.create_sampler_view = si_create_sampler_view; @@ -3270,6 +3221,7 @@ static void si_init_config(struct si_context *sctx) unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16); unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; unsigned raster_config, raster_config_1; + uint64_t border_color_va = sctx->border_color_buffer->gpu_address; struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); int i; @@ -3434,5 +3386,11 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); } + si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); + if (sctx->b.chip_class >= CIK) + si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); + si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, + RADEON_PRIO_SHADER_DATA); + sctx->init_config = pm4; } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 49f9f65bc14..f5726f0c5de 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -91,7 +91,6 @@ union si_state { struct si_state_rasterizer *rasterizer; struct si_state_dsa *dsa; struct si_pm4_state *poly_offset; - struct si_pm4_state *ta_bordercolor_base; struct si_pm4_state *ls; struct si_pm4_state *hs; struct si_pm4_state *es; @@ -246,8 +245,6 @@ struct si_buffer_resources { } while(0) /* si_descriptors.c */ -void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader, - unsigned start, unsigned count, void **states); void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot, struct pipe_resource *buffer, unsigned stride, unsigned num_records, -- 2.11.0