From 698821bda32eb9958e105c38087b49b6f307128d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 19 Apr 2016 01:19:54 +0200 Subject: [PATCH] radeonsi: rework polygon stippling to use constant buffer instead of texture MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit add it to the RW_BUFFERS descriptor array now the slot masks don't have to have 64 bits Reviewed-by: Bas Nieuwenhuizen Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_descriptors.c | 23 +++++++++ src/gallium/drivers/radeonsi/si_pipe.c | 2 - src/gallium/drivers/radeonsi/si_pipe.h | 1 - src/gallium/drivers/radeonsi/si_shader.c | 67 ++++++++++++--------------- src/gallium/drivers/radeonsi/si_state.c | 55 ---------------------- src/gallium/drivers/radeonsi/si_state.h | 8 ++-- 6 files changed, 55 insertions(+), 101 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 161cfdcf809..034794467a6 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1129,6 +1129,26 @@ static void si_desc_reset_buffer_offset(struct pipe_context *ctx, S_008F04_BASE_ADDRESS_HI(va >> 32); } +/* INTERNAL CONST BUFFERS */ + +static void si_set_polygon_stipple(struct pipe_context *ctx, + const struct pipe_poly_stipple *state) +{ + struct si_context *sctx = (struct si_context *)ctx; + struct pipe_constant_buffer cb = {}; + unsigned stipple[32]; + int i; + + for (i = 0; i < 32; i++) + stipple[i] = util_bitreverse(state->stipple[i]); + + cb.user_buffer = stipple; + cb.buffer_size = sizeof(stipple); + + si_set_constant_buffer(sctx, &sctx->rw_buffers, + SI_PS_CONST_POLY_STIPPLE, &cb); +} + /* TEXTURE METADATA ENABLE/DISABLE */ /* CMASK can be enabled (for fast clear) and disabled (for texture export) @@ -1403,6 +1423,8 @@ void si_emit_graphics_shader_userdata(struct si_context *sctx, if (sctx->rw_buffers.desc.pointer_dirty) { si_emit_shader_pointer(sctx, &sctx->rw_buffers.desc, + R_00B030_SPI_SHADER_USER_DATA_PS_0, true); + si_emit_shader_pointer(sctx, &sctx->rw_buffers.desc, R_00B130_SPI_SHADER_USER_DATA_VS_0, true); si_emit_shader_pointer(sctx, &sctx->rw_buffers.desc, R_00B230_SPI_SHADER_USER_DATA_GS_0, true); @@ -1480,6 +1502,7 @@ void si_init_all_descriptors(struct si_context *sctx) sctx->b.b.bind_sampler_states = si_bind_sampler_states; sctx->b.b.set_shader_images = si_set_shader_images; sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer; + sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; sctx->b.b.set_shader_buffers = si_set_shader_buffers; sctx->b.b.set_sampler_views = si_set_sampler_views; sctx->b.b.set_stream_output_targets = si_set_streamout_targets; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 17d59b60d06..2a5cf0aee1e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -61,8 +61,6 @@ static void si_destroy_context(struct pipe_context *context) for (i = 0; i < Elements(sctx->vgt_shader_config); i++) si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]); - if (sctx->pstipple_sampler_state) - sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state); if (sctx->fixed_func_tcs_shader.cso) sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso); if (sctx->custom_dsa_flush) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 48095b00993..85bf10f0102 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -190,7 +190,6 @@ struct si_context { void *custom_blend_decompress; void *custom_blend_fastclear; void *custom_blend_dcc_decompress; - void *pstipple_sampler_state; struct si_screen *screen; struct radeon_winsys_cs *ce_ib; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index e8393869f9f..f698c635393 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5291,15 +5291,14 @@ static void preload_ring_buffers(struct si_shader_context *ctx) } static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx, - LLVMValueRef param_sampler_views, + LLVMValueRef param_rw_buffers, unsigned param_pos_fixed_pt) { struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; - struct lp_build_emit_data result = {}; - struct tgsi_full_instruction inst = {}; - LLVMValueRef desc, sampler_index, address[2], pix; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef slot, desc, offset, row, bit, address[2]; /* Use the fixed-point gl_FragCoord input. * Since the stipple pattern is 32x32 and it repeats, just get 5 bits @@ -5308,29 +5307,21 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx, address[0] = unpack_param(ctx, param_pos_fixed_pt, 0, 5); address[1] = unpack_param(ctx, param_pos_fixed_pt, 16, 5); - /* Load the sampler view descriptor. */ - sampler_index = lp_build_const_int32(gallivm, SI_POLY_STIPPLE_SAMPLER); - desc = get_sampler_desc_custom(ctx, param_sampler_views, - sampler_index, DESC_IMAGE); - - /* Load the texel. */ - inst.Instruction.Opcode = TGSI_OPCODE_TXF; - inst.Texture.Texture = TGSI_TEXTURE_2D_MSAA; /* = use load, not load_mip */ - result.inst = &inst; - set_tex_fetch_args(ctx, &result, TGSI_OPCODE_TXF, - inst.Texture.Texture, - desc, NULL, address, ARRAY_SIZE(address), 0xf); - build_tex_intrinsic(&tex_action, bld_base, &result); - - /* Kill the thread accordingly. */ - pix = LLVMBuildExtractElement(gallivm->builder, result.output[0], - lp_build_const_int32(gallivm, 3), ""); - pix = bitcast(bld_base, TGSI_TYPE_FLOAT, pix); - pix = LLVMBuildFNeg(gallivm->builder, pix, ""); + /* Load the buffer descriptor. */ + slot = lp_build_const_int32(gallivm, SI_PS_CONST_POLY_STIPPLE); + desc = build_indexed_load_const(ctx, param_rw_buffers, slot); - lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill", - LLVMVoidTypeInContext(gallivm->context), - &pix, 1, 0); + /* The stipple pattern is 32x32, each row has 32 bits. */ + offset = LLVMBuildMul(builder, address[1], + LLVMConstInt(ctx->i32, 4, 0), ""); + row = buffer_load_const(builder, desc, offset, ctx->i32); + bit = LLVMBuildLShr(builder, row, address[0], ""); + bit = LLVMBuildTrunc(builder, bit, ctx->i1, ""); + + /* The intrinsic kills the thread if arg < 0. */ + bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0), + LLVMConstReal(ctx->f32, -1), ""); + lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, 0); } void si_shader_binary_read_config(struct radeon_shader_binary *binary, @@ -6039,9 +6030,9 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT && shader->key.ps.prolog.poly_stipple) { - LLVMValueRef views = LLVMGetParam(ctx.radeon_bld.main_fn, - SI_PARAM_SAMPLERS); - si_llvm_emit_polygon_stipple(&ctx, views, + LLVMValueRef list = LLVMGetParam(ctx.radeon_bld.main_fn, + SI_PARAM_RW_BUFFERS); + si_llvm_emit_polygon_stipple(&ctx, list, SI_PARAM_POS_FIXED_PT); } @@ -6619,17 +6610,17 @@ static bool si_compile_ps_prolog(struct si_screen *sscreen, /* POS_FIXED_PT is always last. */ unsigned pos = key->ps_prolog.num_input_sgprs + key->ps_prolog.num_input_vgprs - 1; - LLVMValueRef ptr[2], views; + LLVMValueRef ptr[2], list; - /* Get the pointer to sampler views. */ - ptr[0] = LLVMGetParam(func, SI_SGPR_SAMPLERS); - ptr[1] = LLVMGetParam(func, SI_SGPR_SAMPLERS+1); - views = lp_build_gather_values(gallivm, ptr, 2); - views = LLVMBuildBitCast(gallivm->builder, views, ctx.i64, ""); - views = LLVMBuildIntToPtr(gallivm->builder, views, - const_array(ctx.v8i32, SI_NUM_SAMPLERS), ""); + /* Get the pointer to rw buffers. */ + ptr[0] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS); + ptr[1] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS_HI); + list = lp_build_gather_values(gallivm, ptr, 2); + list = LLVMBuildBitCast(gallivm->builder, list, ctx.i64, ""); + list = LLVMBuildIntToPtr(gallivm->builder, list, + const_array(ctx.v16i8, SI_NUM_RW_BUFFERS), ""); - si_llvm_emit_polygon_stipple(&ctx, views, pos); + si_llvm_emit_polygon_stipple(&ctx, list, pos); } /* Interpolate colors. */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 8603f3ecb6b..e726196d07a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3374,60 +3374,6 @@ static void si_set_index_buffer(struct pipe_context *ctx, /* * Misc */ -static void si_set_polygon_stipple(struct pipe_context *ctx, - const struct pipe_poly_stipple *state) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct pipe_resource *tex; - struct pipe_sampler_view *view; - bool is_zero = true; - bool is_one = true; - int i; - - /* The hardware obeys 0 and 1 swizzles in the descriptor even if - * the resource is NULL/invalid. Take advantage of this fact and skip - * texture allocation if the stipple pattern is constant. - * - * This is an optimization for the common case when stippling isn't - * used but set_polygon_stipple is still called by st/mesa. - */ - for (i = 0; i < Elements(state->stipple); i++) { - is_zero = is_zero && state->stipple[i] == 0; - is_one = is_one && state->stipple[i] == 0xffffffff; - } - - if (is_zero || is_one) { - struct pipe_sampler_view templ = {{0}}; - - templ.swizzle_r = PIPE_SWIZZLE_ZERO; - templ.swizzle_g = PIPE_SWIZZLE_ZERO; - templ.swizzle_b = PIPE_SWIZZLE_ZERO; - /* The pattern should be inverted in the texture. */ - templ.swizzle_a = is_zero ? PIPE_SWIZZLE_ONE : PIPE_SWIZZLE_ZERO; - - view = ctx->create_sampler_view(ctx, NULL, &templ); - } else { - /* Create a new texture. */ - tex = util_pstipple_create_stipple_texture(ctx, state->stipple); - if (!tex) - return; - - view = util_pstipple_create_sampler_view(ctx, tex); - pipe_resource_reference(&tex, NULL); - } - - ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT, - SI_POLY_STIPPLE_SAMPLER, 1, &view); - pipe_sampler_view_reference(&view, NULL); - - /* Bind the sampler state if needed. */ - if (!sctx->pstipple_sampler_state) { - sctx->pstipple_sampler_state = util_pstipple_create_sampler(ctx); - ctx->bind_sampler_states(ctx, PIPE_SHADER_FRAGMENT, - SI_POLY_STIPPLE_SAMPLER, 1, - &sctx->pstipple_sampler_state); - } -} static void si_set_tess_state(struct pipe_context *ctx, const float default_outer_level[4], @@ -3590,7 +3536,6 @@ void si_init_state_functions(struct si_context *sctx) sctx->b.b.texture_barrier = si_texture_barrier; sctx->b.b.memory_barrier = si_memory_barrier; - sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; sctx->b.b.set_min_samples = si_set_min_samples; sctx->b.b.set_tess_state = si_set_tess_state; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 1571bad9845..9a2a7fd2137 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -146,12 +146,8 @@ struct si_shader_data { uint32_t sh_base[SI_NUM_SHADERS]; }; -/* User sampler views: 0..31 - * Polygon stipple tex: 32 - */ #define SI_NUM_USER_SAMPLERS 32 /* AKA OpenGL textures units per shader */ -#define SI_POLY_STIPPLE_SAMPLER SI_NUM_USER_SAMPLERS -#define SI_NUM_SAMPLERS (SI_POLY_STIPPLE_SAMPLER + 1) +#define SI_NUM_SAMPLERS SI_NUM_USER_SAMPLERS /* User constant buffers: 0..15 * Driver state constants: 16 @@ -182,6 +178,8 @@ enum { SI_VS_STREAMOUT_BUF2, SI_VS_STREAMOUT_BUF3, + SI_PS_CONST_POLY_STIPPLE, + SI_NUM_RW_BUFFERS, }; -- 2.11.0