From 6686d8a130bb195d0e926d52f0e1e0f9243569be Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 20 Jun 2019 18:24:19 -0400 Subject: [PATCH] gallium/u_blitter: implement copying from ZS to color and vice versa MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is for drivers that can't map depth and stencil and need to blit them to a color texture for CPU access. This also useful for drivers using separate depth and stencil. Tested-by: Dieter Nützel --- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 6 + src/gallium/auxiliary/util/u_blitter.c | 147 +++++++++++++++++------ src/gallium/auxiliary/util/u_blitter.h | 29 +++++ src/gallium/auxiliary/util/u_simple_shaders.c | 161 ++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_simple_shaders.h | 6 + 5 files changed, 314 insertions(+), 35 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index 54a1ee15b68..2ade618db00 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -165,6 +165,12 @@ OP11(SAMP2HND) OP12(IMUL_HI) OP12(UMUL_HI) +OP13(UBFE) +OP11(F2D) +OP11(D2F) +OP11(U2D) +OP11(D2U) +OP12(DMUL) #undef OP00 #undef OP01 diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 3504f5c31de..b07267724e5 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -102,6 +102,11 @@ struct blitter_context_priv /* FS which outputs an average of all samples. */ void *fs_resolve[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2]; + /* FS which unpacks color to ZS or packs ZS to color, matching + * the ZS format. See util_blitter_get_color_format_for_zs(). + */ + void *fs_pack_color_zs[TGSI_TEXTURE_COUNT][10]; + /* Blend state. */ void *blend[PIPE_MASK_RGBA+1][2]; /**< blend state with writemask */ void *blend_clear[GET_CLEAR_BLEND_STATE_IDX(PIPE_CLEAR_COLOR)+1]; @@ -523,6 +528,13 @@ void util_blitter_destroy(struct blitter_context *blitter) ctx->delete_fs_state(pipe, ctx->fs_resolve[i][j][f]); } + for (i = 0; i < ARRAY_SIZE(ctx->fs_pack_color_zs); i++) { + for (j = 0; j < ARRAY_SIZE(ctx->fs_pack_color_zs[0]); j++) { + if (ctx->fs_pack_color_zs[i][j]) + ctx->delete_fs_state(pipe, ctx->fs_pack_color_zs[i][j]); + } + } + if (ctx->fs_empty) ctx->delete_fs_state(pipe, ctx->fs_empty); if (ctx->fs_write_one_cbuf) @@ -995,6 +1007,44 @@ static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx, } static inline +void *blitter_get_fs_pack_color_zs(struct blitter_context_priv *ctx, + enum pipe_texture_target target, + unsigned nr_samples, + enum pipe_format zs_format, + bool dst_is_color) +{ + struct pipe_context *pipe = ctx->base.pipe; + enum tgsi_texture_type tgsi_tex = + util_pipe_tex_to_tgsi_tex(target, nr_samples); + int format_index = zs_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ? 0 : + zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM ? 1 : + zs_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ? 2 : + zs_format == PIPE_FORMAT_Z24X8_UNORM ? 3 : + zs_format == PIPE_FORMAT_X8Z24_UNORM ? 4 : -1; + + if (format_index == -1) { + assert(0); + return NULL; + } + + /* The first 5 shaders pack ZS to color, the last 5 shaders unpack color + * to ZS. + */ + if (dst_is_color) + format_index += 5; + + void **shader = &ctx->fs_pack_color_zs[tgsi_tex][format_index]; + + /* Create the fragment shader on-demand. */ + if (!*shader) { + assert(!ctx->cached_all_shaders); + *shader = util_make_fs_pack_color_zs(pipe, tgsi_tex, zs_format, + dst_is_color); + } + return *shader; +} + +static inline void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx, enum pipe_texture_target target, unsigned nr_samples, @@ -1825,34 +1875,33 @@ void util_blitter_blit_generic(struct blitter_context *blitter, enum pipe_texture_target src_target = src->target; unsigned src_samples = src->texture->nr_samples; unsigned dst_samples = dst->texture->nr_samples; - bool has_depth, has_stencil, has_color; - bool blit_stencil, blit_depth, blit_color; void *sampler_state; const struct util_format_description *src_desc = util_format_description(src->format); const struct util_format_description *dst_desc = util_format_description(dst->format); - has_color = src_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && - dst_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS; - has_depth = util_format_has_depth(src_desc) && - util_format_has_depth(dst_desc); - has_stencil = util_format_has_stencil(src_desc) && - util_format_has_stencil(dst_desc); + bool src_has_color = src_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS; + bool src_has_depth = util_format_has_depth(src_desc); + bool src_has_stencil = util_format_has_stencil(src_desc); - blit_color = has_color && (mask & PIPE_MASK_RGBA); - blit_depth = has_depth && (mask & PIPE_MASK_Z); - blit_stencil = has_stencil && (mask & PIPE_MASK_S) && - ctx->has_stencil_export; + bool dst_has_color = mask & PIPE_MASK_RGBA && + dst_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS; + bool dst_has_depth = mask & PIPE_MASK_Z && + util_format_has_depth(dst_desc); + bool dst_has_stencil = ctx->has_stencil_export && + mask & PIPE_MASK_S && + util_format_has_stencil(dst_desc); - if (!blit_stencil && !blit_depth && !blit_color) { + /* Return if there is nothing to do. */ + if (!dst_has_color && !dst_has_depth && !dst_has_stencil) { return; } bool is_scaled = dstbox->width != abs(srcbox->width) || dstbox->height != abs(srcbox->height); - if (blit_stencil || !is_scaled) + if (src_has_stencil || !is_scaled) filter = PIPE_TEX_FILTER_NEAREST; bool use_txf = false; @@ -1900,38 +1949,65 @@ void util_blitter_blit_generic(struct blitter_context *blitter, blitter_check_saved_fb_state(ctx); blitter_disable_render_cond(ctx); - if (blit_depth || blit_stencil) { + /* Blend, DSA, fragment shader. */ + if (dst_has_depth && dst_has_stencil) { pipe->bind_blend_state(pipe, ctx->blend[0][0]); - - if (blit_depth && blit_stencil) { - pipe->bind_depth_stencil_alpha_state(pipe, - ctx->dsa_write_depth_stencil); + pipe->bind_depth_stencil_alpha_state(pipe, + ctx->dsa_write_depth_stencil); + if (src_has_color) { + assert(use_txf); ctx->bind_fs_state(pipe, - blitter_get_fs_texfetch_depthstencil(ctx, src_target, - src_samples, use_txf)); - } else if (blit_depth) { - pipe->bind_depth_stencil_alpha_state(pipe, - ctx->dsa_write_depth_keep_stencil); + blitter_get_fs_pack_color_zs(ctx, src_target, + src_samples, dst->format, false)); + } else { ctx->bind_fs_state(pipe, - blitter_get_fs_texfetch_depth(ctx, src_target, - src_samples, use_txf)); - } else { /* is_stencil */ - pipe->bind_depth_stencil_alpha_state(pipe, - ctx->dsa_keep_depth_write_stencil); + blitter_get_fs_texfetch_depthstencil(ctx, src_target, + src_samples, use_txf)); + } + } else if (dst_has_depth) { + pipe->bind_blend_state(pipe, ctx->blend[0][0]); + pipe->bind_depth_stencil_alpha_state(pipe, + ctx->dsa_write_depth_keep_stencil); + if (src_has_color && + (src->format == PIPE_FORMAT_R32_UINT || + src->format == PIPE_FORMAT_R32G32_UINT)) { + assert(use_txf); + ctx->bind_fs_state(pipe, + blitter_get_fs_pack_color_zs(ctx, src_target, + src_samples, dst->format, false)); + } else { ctx->bind_fs_state(pipe, - blitter_get_fs_texfetch_stencil(ctx, src_target, - src_samples, use_txf)); + blitter_get_fs_texfetch_depth(ctx, src_target, + src_samples, use_txf)); } + } else if (dst_has_stencil) { + pipe->bind_blend_state(pipe, ctx->blend[0][0]); + pipe->bind_depth_stencil_alpha_state(pipe, + ctx->dsa_keep_depth_write_stencil); + assert(src_has_stencil); /* unpacking from color is unsupported */ + ctx->bind_fs_state(pipe, + blitter_get_fs_texfetch_stencil(ctx, src_target, + src_samples, use_txf)); } else { unsigned colormask = mask & PIPE_MASK_RGBA; pipe->bind_blend_state(pipe, ctx->blend[colormask][alpha_blend]); pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); - ctx->bind_fs_state(pipe, + + if (src_has_depth && + (dst->format == PIPE_FORMAT_R32_UINT || + dst->format == PIPE_FORMAT_R32G32_UINT)) { + assert(use_txf); + ctx->bind_fs_state(pipe, + blitter_get_fs_pack_color_zs(ctx, src_target, + src_samples, src->format, true)); + } else { + ctx->bind_fs_state(pipe, blitter_get_fs_texfetch_col(ctx, src->format, dst->format, src_target, src_samples, dst_samples, filter, use_txf)); + } } /* Set the linear filter only for scaled color non-MSAA blits. */ @@ -1950,7 +2026,8 @@ void util_blitter_blit_generic(struct blitter_context *blitter, } /* Set samplers. */ - if (blit_depth && blit_stencil) { + if (src_has_depth && src_has_stencil && + (dst_has_color || (dst_has_depth && dst_has_stencil))) { /* Setup two samplers, one for depth and the other one for stencil. */ struct pipe_sampler_view templ; struct pipe_sampler_view *views[2]; @@ -1967,7 +2044,7 @@ void util_blitter_blit_generic(struct blitter_context *blitter, pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, 2, samplers); pipe_sampler_view_reference(&views[1], NULL); - } else if (blit_stencil) { + } else if (src_has_stencil && dst_has_stencil) { /* Set a stencil-only sampler view for it not to sample depth instead. */ struct pipe_sampler_view templ; struct pipe_sampler_view *view; @@ -1996,7 +2073,7 @@ void util_blitter_blit_generic(struct blitter_context *blitter, blitter_set_common_draw_rect_state(ctx, scissor != NULL, dst_samples > 1); do_blits(ctx, dst, dstbox, src, src_width0, src_height0, - srcbox, blit_depth || blit_stencil, use_txf); + srcbox, dst_has_depth || dst_has_stencil, use_txf); util_blitter_restore_vertex_states(blitter); util_blitter_restore_fragment_states(blitter); diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index c1f1ae47443..9e3fa55e648 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -597,6 +597,35 @@ void util_blitter_restore_fb_state(struct blitter_context *blitter); void util_blitter_restore_textures(struct blitter_context *blitter); void util_blitter_restore_constant_buffer_state(struct blitter_context *blitter); +/* These are supported combinations of blits from ZS to color and vice versa. + * The blitter will do the packing/unpacking of depth and stencil + * in the fragment shader. + */ +static inline enum pipe_format +util_blitter_get_color_format_for_zs(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return PIPE_FORMAT_R16_UNORM; + + case PIPE_FORMAT_Z32_FLOAT: + return PIPE_FORMAT_R32_FLOAT; + + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return PIPE_FORMAT_R32_UINT; + + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return PIPE_FORMAT_R32G32_UINT; + + case PIPE_FORMAT_Z32_UNORM: + default: + assert(0); + } +} + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index c111eaf1db5..2fdd60b0bb3 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -895,3 +895,164 @@ util_make_geometry_passthrough_shader(struct pipe_context *pipe, return ureg_create_shader_and_destroy(ureg, pipe); } +/** + * Blit from color to ZS or from ZS to color in a manner that is equivalent + * to memcpy. + * + * Color is either R32_UINT (for Z24S8 / S8Z24) or R32G32_UINT (Z32_S8X24). + * + * Depth and stencil samplers are used to load depth and stencil, + * and they are packed and the result is written to a color output. + * OR + * A color sampler is used to load a color value, which is unpacked and + * written to depth and stencil shader outputs. + */ +void * +util_make_fs_pack_color_zs(struct pipe_context *pipe, + enum tgsi_texture_type tex_target, + enum pipe_format zs_format, + bool dst_is_color) +{ + struct ureg_program *ureg; + struct ureg_src depth_sampler, stencil_sampler, color_sampler, coord; + struct ureg_dst out, depth, depth_x, stencil, out_depth, out_stencil, color; + + assert(zs_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || /* color is R32_UINT */ + zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM || /* color is R32_UINT */ + zs_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || /* color is R32G32_UINT */ + zs_format == PIPE_FORMAT_Z24X8_UNORM || /* color is R32_UINT */ + zs_format == PIPE_FORMAT_X8Z24_UNORM); /* color is R32_UINT */ + + bool has_stencil = zs_format != PIPE_FORMAT_Z24X8_UNORM && + zs_format != PIPE_FORMAT_X8Z24_UNORM; + bool is_z24 = zs_format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT; + bool z24_is_high = zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM; + + ureg = ureg_create(PIPE_SHADER_FRAGMENT); + if (!ureg) + return NULL; + + coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0, + TGSI_INTERPOLATE_LINEAR); + + if (dst_is_color) { + /* Load depth. */ + depth_sampler = ureg_DECL_sampler(ureg, 0); + ureg_DECL_sampler_view(ureg, 0, tex_target, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT); + + depth = ureg_DECL_temporary(ureg); + depth_x = ureg_writemask(depth, TGSI_WRITEMASK_X); + ureg_load_tex(ureg, depth_x, coord, depth_sampler, tex_target, true, true); + + /* Pack to Z24. */ + if (is_z24) { + double imm = 0xffffff; + struct ureg_src imm_f64 = ureg_DECL_immediate_f64(ureg, &imm, 2); + struct ureg_dst tmp_xy = ureg_writemask(ureg_DECL_temporary(ureg), + TGSI_WRITEMASK_XY); + + ureg_F2D(ureg, tmp_xy, ureg_src(depth)); + ureg_DMUL(ureg, tmp_xy, ureg_src(tmp_xy), imm_f64); + ureg_D2U(ureg, depth_x, ureg_src(tmp_xy)); + + if (z24_is_high) + ureg_SHL(ureg, depth_x, ureg_src(depth), ureg_imm1u(ureg, 8)); + else + ureg_AND(ureg, depth_x, ureg_src(depth), ureg_imm1u(ureg, 0xffffff)); + } + + if (has_stencil) { + /* Load stencil. */ + stencil_sampler = ureg_DECL_sampler(ureg, 1); + ureg_DECL_sampler_view(ureg, 0, tex_target, + TGSI_RETURN_TYPE_UINT, + TGSI_RETURN_TYPE_UINT, + TGSI_RETURN_TYPE_UINT, + TGSI_RETURN_TYPE_UINT); + + stencil = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X); + ureg_load_tex(ureg, stencil, coord, stencil_sampler, tex_target, + true, true); + + /* Pack stencil into depth. */ + if (is_z24) { + if (!z24_is_high) + ureg_SHL(ureg, stencil, ureg_src(stencil), ureg_imm1u(ureg, 24)); + + ureg_OR(ureg, depth_x, ureg_src(depth), ureg_src(stencil)); + } + } + + out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); + + if (is_z24) { + ureg_MOV(ureg, ureg_writemask(out, TGSI_WRITEMASK_X), ureg_src(depth)); + } else { + /* Z32_S8X24 */ + ureg_MOV(ureg, ureg_writemask(depth, TGSI_WRITEMASK_Y), + ureg_scalar(ureg_src(stencil), TGSI_SWIZZLE_X)); + ureg_MOV(ureg, ureg_writemask(out, TGSI_WRITEMASK_XY), ureg_src(depth)); + } + } else { + color_sampler = ureg_DECL_sampler(ureg, 0); + ureg_DECL_sampler_view(ureg, 0, tex_target, + TGSI_RETURN_TYPE_UINT, + TGSI_RETURN_TYPE_UINT, + TGSI_RETURN_TYPE_UINT, + TGSI_RETURN_TYPE_UINT); + + color = ureg_DECL_temporary(ureg); + ureg_load_tex(ureg, color, coord, color_sampler, tex_target, true, true); + + depth = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X); + stencil = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X); + + if (is_z24) { + double imm = 1.0 / 0xffffff; + struct ureg_src imm_f64 = ureg_DECL_immediate_f64(ureg, &imm, 2); + struct ureg_dst tmp_xy = ureg_writemask(ureg_DECL_temporary(ureg), + TGSI_WRITEMASK_XY); + + ureg_UBFE(ureg, depth, ureg_src(color), + ureg_imm1u(ureg, z24_is_high ? 8 : 0), + ureg_imm1u(ureg, 24)); + ureg_U2D(ureg, tmp_xy, ureg_src(depth)); + ureg_DMUL(ureg, tmp_xy, ureg_src(tmp_xy), imm_f64); + ureg_D2F(ureg, depth, ureg_src(tmp_xy)); + } else { + /* depth = color.x; (Z32_S8X24) */ + ureg_MOV(ureg, depth, ureg_src(color)); + } + + out_depth = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); + ureg_MOV(ureg, ureg_writemask(out_depth, TGSI_WRITEMASK_Z), + ureg_scalar(ureg_src(depth), TGSI_SWIZZLE_X)); + + if (has_stencil) { + if (is_z24) { + ureg_UBFE(ureg, stencil, ureg_src(color), + ureg_imm1u(ureg, z24_is_high ? 0 : 24), + ureg_imm1u(ureg, 8)); + } else { + /* stencil = color.y[0:7]; (Z32_S8X24) */ + ureg_UBFE(ureg, stencil, + ureg_scalar(ureg_src(color), TGSI_SWIZZLE_Y), + ureg_imm1u(ureg, 0), + ureg_imm1u(ureg, 8)); + } + + out_stencil = ureg_DECL_output(ureg, TGSI_SEMANTIC_STENCIL, 0); + ureg_MOV(ureg, ureg_writemask(out_stencil, TGSI_WRITEMASK_Y), + ureg_scalar(ureg_src(stencil), TGSI_SWIZZLE_X)); + } + } + + ureg_END(ureg); + + return ureg_create_shader_and_destroy(ureg, pipe); +} diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h index 4d4f5e97f5b..501906d6fd3 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.h +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -153,6 +153,12 @@ util_make_geometry_passthrough_shader(struct pipe_context *pipe, const ubyte *semantic_names, const ubyte *semantic_indexes); +void * +util_make_fs_pack_color_zs(struct pipe_context *pipe, + enum tgsi_texture_type tex_target, + enum pipe_format zs_format, + bool dst_is_color); + #ifdef __cplusplus } #endif -- 2.11.0