From 4b7de2a360ebb2d1cf7b3bbcc849026e5af47e41 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 2 Oct 2017 17:21:23 -0700 Subject: [PATCH] broadcom/vc5: Add support for f32 render targets. The TLB write code is getting ugly and needs a refactoring (that will hopefully handle TLBU uniform coalescing as well). --- src/broadcom/compiler/nir_to_vir.c | 45 +++++++++++++++++++++++++---------- src/broadcom/compiler/v3d_compiler.h | 2 ++ src/gallium/drivers/vc5/vc5_program.c | 4 ++++ 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 642173e3ad3..675cad2c32f 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1109,28 +1109,47 @@ emit_frag_end(struct v3d_compile *c) struct qreg b = color[2]; struct qreg a = color[3]; - conf |= TLB_TYPE_F16_COLOR; - conf |= TLB_F16_SWAP_HI_LO; - if (num_components >= 3) - conf |= TLB_VEC_SIZE_4_F16; - else - conf |= TLB_VEC_SIZE_2_F16; + if (c->fs_key->f32_color_rb) { + conf |= TLB_TYPE_F32_COLOR; + conf |= ((num_components - 1) << + TLB_VEC_SIZE_MINUS_1_SHIFT); + } else { + conf |= TLB_TYPE_F16_COLOR; + conf |= TLB_F16_SWAP_HI_LO; + if (num_components >= 3) + conf |= TLB_VEC_SIZE_4_F16; + else + conf |= TLB_VEC_SIZE_2_F16; + } if (c->fs_key->swap_color_rb & (1 << rt)) { r = color[2]; b = color[0]; } - inst = vir_VFPACK_dest(c, vir_reg(QFILE_TLB, 0), r, g); - if (conf != ~0) { - inst->dst.file = QFILE_TLBU; + if (c->fs_key->f32_color_rb & (1 << rt)) { + inst = vir_MOV_dest(c, vir_reg(QFILE_TLBU, 0), color[0]); + vir_set_cond(inst, discard_cond); inst->src[vir_get_implicit_uniform_src(inst)] = vir_uniform_ui(c, conf); - } - vir_set_cond(inst, discard_cond); - inst = vir_VFPACK_dest(c, vir_reg(QFILE_TLB, 0), b, a); - vir_set_cond(inst, discard_cond); + for (int i = 1; i < num_components; i++) { + inst = vir_MOV_dest(c, vir_reg(QFILE_TLB, 0), + color[i]); + vir_set_cond(inst, discard_cond); + } + } else { + inst = vir_VFPACK_dest(c, vir_reg(QFILE_TLB, 0), r, g); + if (conf != ~0) { + inst->dst.file = QFILE_TLBU; + inst->src[vir_get_implicit_uniform_src(inst)] = + vir_uniform_ui(c, conf); + } + vir_set_cond(inst, discard_cond); + + inst = vir_VFPACK_dest(c, vir_reg(QFILE_TLB, 0), b, a); + vir_set_cond(inst, discard_cond); + } break; } } diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index bdd505ee9ba..08db4fb0e2b 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -320,6 +320,8 @@ struct v3d_fs_key { bool clamp_color; uint8_t nr_cbufs; uint8_t swap_color_rb; + /* Mask of which render targets need to be written as 32-bit floats */ + uint8_t f32_color_rb; uint8_t alpha_test_func; uint8_t logicop_func; uint32_t point_sprite_mask; diff --git a/src/gallium/drivers/vc5/vc5_program.c b/src/gallium/drivers/vc5/vc5_program.c index d5634c7a7d3..80865a25a4d 100644 --- a/src/gallium/drivers/vc5/vc5_program.c +++ b/src/gallium/drivers/vc5/vc5_program.c @@ -369,6 +369,10 @@ vc5_update_compiled_fs(struct vc5_context *vc5, uint8_t prim_mode) if (desc->swizzle[0] == PIPE_SWIZZLE_Z) key->swap_color_rb |= 1 << i; + if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && + desc->channel[0].size == 32) { + key->f32_color_rb |= 1 << i; + } } if (key->is_points) { -- 2.11.0