From 7fe586f6fb69cd829d687dd58562ef5922667905 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 5 Apr 2018 10:27:22 +0200 Subject: [PATCH] radv: only enable PERFECT_ZPASS_COUNTS for precision occlusion queries This unnecessary when the precision bit flag is not set, and this might hurt performance. The Vulkan explains that not setting VK_QUERY_CONTROL_PRECISE_BIT might be more efficient on some implementations. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_cmd_buffer.c | 4 +++- src/amd/vulkan/radv_private.h | 1 + src/amd/vulkan/radv_query.c | 36 +++++++++++++++++++++++++++++++----- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 526b618f2a7..c0f60ac0d27 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1217,9 +1217,11 @@ void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer) } else { const struct radv_subpass *subpass = cmd_buffer->state.subpass; uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0; + bool perfect = cmd_buffer->state.perfect_occlusion_queries_enabled; if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { - db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | + db_count_control = + S_028004_PERFECT_ZPASS_COUNTS(perfect) | S_028004_SAMPLE_RATE(sample_rate) | S_028004_ZPASS_ENABLE(1) | S_028004_SLICE_EVEN_ENABLE(1) | diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index ca3beba2d41..9e655af844e 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -946,6 +946,7 @@ struct radv_cmd_state { uint32_t last_primitive_reset_index; enum radv_cmd_flush_bits flush_bits; unsigned active_occlusion_queries; + bool perfect_occlusion_queries_enabled; float offset_scale; uint32_t trace_id; uint32_t last_ia_multi_vgt_param; diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index cc943d5de07..859a4a1d687 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1079,7 +1079,8 @@ void radv_CmdResetQueryPool( static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, - VkQueryType query_type) + VkQueryType query_type, + VkQueryControlFlags flags) { struct radeon_winsys_cs *cs = cmd_buffer->cs; switch (query_type) { @@ -1087,8 +1088,27 @@ static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer, radeon_check_space(cmd_buffer->device->ws, cs, 7); ++cmd_buffer->state.active_occlusion_queries; - if (cmd_buffer->state.active_occlusion_queries == 1) + if (cmd_buffer->state.active_occlusion_queries == 1) { + if (flags & VK_QUERY_CONTROL_PRECISE_BIT) { + /* This is the first occlusion query, enable + * the hint if the precision bit is set. + */ + cmd_buffer->state.perfect_occlusion_queries_enabled = true; + } + radv_set_db_count_control(cmd_buffer); + } else { + if ((flags & VK_QUERY_CONTROL_PRECISE_BIT) && + !cmd_buffer->state.perfect_occlusion_queries_enabled) { + /* This is not the first query, but this one + * needs to enable precision, DB_COUNT_CONTROL + * has to be updated accordingly. + */ + cmd_buffer->state.perfect_occlusion_queries_enabled = true; + + radv_set_db_count_control(cmd_buffer); + } + } radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); @@ -1119,8 +1139,14 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer, radeon_check_space(cmd_buffer->device->ws, cs, 14); cmd_buffer->state.active_occlusion_queries--; - if (cmd_buffer->state.active_occlusion_queries == 0) + if (cmd_buffer->state.active_occlusion_queries == 0) { + /* Reset the perfect occlusion queries hint now that no + * queries are active. + */ + cmd_buffer->state.perfect_occlusion_queries_enabled = false; + radv_set_db_count_control(cmd_buffer); + } radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); @@ -1177,7 +1203,7 @@ void radv_CmdBeginQuery( va += pool->stride * query; - emit_begin_query(cmd_buffer, va, pool->type); + emit_begin_query(cmd_buffer, va, pool->type, flags); /* * For multiview we have to emit a query for each bit in the mask, @@ -1193,7 +1219,7 @@ void radv_CmdBeginQuery( for (unsigned i = 0; i < util_bitcount(cmd_buffer->state.subpass->view_mask); i++) { va += pool->stride; avail_va += 4; - emit_begin_query(cmd_buffer, va, pool->type); + emit_begin_query(cmd_buffer, va, pool->type, flags); emit_end_query(cmd_buffer, va, avail_va, pool->type); } } -- 2.11.0