From 627f94b72e0e9443ad116f072599a7342269f297 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Alejandro=20Pi=C3=B1eiro?= Date: Mon, 28 Sep 2015 17:00:19 +0200 Subject: [PATCH] i965/vec4: adding vec4_cmod_propagation optimization vec4 port of fs_cmod_propagation. Shader-db results (no vec4 grepping): total instructions in shared programs: 6240413 -> 6235841 (-0.07%) instructions in affected programs: 401933 -> 397361 (-1.14%) total loops in shared programs: 1979 -> 1979 (0.00%) helped: 2265 HURT: 0 v2: remove extra space and combine two if blocks, as suggested by Matt Turner v3: add condition check to bail out if current inst and inst being scanned has different writemask, as pointed by Matt Turner v3: updated shader-db numbers v4: remove block from foreach_inst_in_block_*_starting_from after commit 801f151917fedb13c5c6e96281a18d833dd6901f Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 1 + src/mesa/drivers/dri/i965/brw_vec4.h | 1 + .../drivers/dri/i965/brw_vec4_cmod_propagation.cpp | 157 +++++++++++++++++++++ 4 files changed, 160 insertions(+) create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index c2438bda356..434583defe3 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -57,6 +57,7 @@ i965_compiler_FILES = \ brw_util.c \ brw_util.h \ brw_vec4_builder.h \ + brw_vec4_cmod_propagation.cpp \ brw_vec4_copy_propagation.cpp \ brw_vec4.cpp \ brw_vec4_cse.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 3e7078d0b32..c8923ef016a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1862,6 +1862,7 @@ vec4_visitor::run() OPT(dead_code_eliminate); OPT(dead_control_flow_eliminate, this); OPT(opt_copy_propagation); + OPT(opt_cmod_propagation); OPT(opt_cse); OPT(opt_algebraic); OPT(opt_register_coalesce); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index d861b2e85df..89b6f912272 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -149,6 +149,7 @@ public: int var_range_start(unsigned v, unsigned n) const; int var_range_end(unsigned v, unsigned n) const; bool virtual_grf_interferes(int a, int b); + bool opt_cmod_propagation(); bool opt_copy_propagation(bool do_constant_prop = true); bool opt_cse_local(bblock_t *block); bool opt_cse(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp new file mode 100644 index 00000000000..329f24269ce --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp @@ -0,0 +1,157 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +/** @file brw_vec4_cmod_propagation.cpp + * + * Really similar to brw_fs_cmod_propagation but adapted to vec4 needs. Check + * brw_fs_cmod_propagation for further details on the rationale behind this + * optimization. + */ + +#include "brw_vec4.h" +#include "brw_cfg.h" + +namespace brw { + +static bool +opt_cmod_propagation_local(bblock_t *block) +{ + bool progress = false; + int ip = block->end_ip + 1; + + foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) { + ip--; + + if ((inst->opcode != BRW_OPCODE_AND && + inst->opcode != BRW_OPCODE_CMP && + inst->opcode != BRW_OPCODE_MOV) || + inst->predicate != BRW_PREDICATE_NONE || + !inst->dst.is_null() || + inst->src[0].file != GRF || + inst->src[0].abs) + continue; + + if (inst->opcode == BRW_OPCODE_AND && + !(inst->src[1].is_one() && + inst->conditional_mod == BRW_CONDITIONAL_NZ && + !inst->src[0].negate)) + continue; + + if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) + continue; + + if (inst->opcode == BRW_OPCODE_MOV && + inst->conditional_mod != BRW_CONDITIONAL_NZ) + continue; + + bool read_flag = false; + foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, inst) { + if (inst->src[0].in_range(scan_inst->dst, + scan_inst->regs_written)) { + if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) || + scan_inst->dst.reg_offset != inst->src[0].reg_offset || + (scan_inst->dst.writemask != WRITEMASK_X && + scan_inst->dst.writemask != WRITEMASK_XYZW) || + (scan_inst->dst.writemask == WRITEMASK_XYZW && + inst->src[0].swizzle != BRW_SWIZZLE_XYZW) || + (inst->dst.writemask & ~scan_inst->dst.writemask) != 0) { + break; + } + + /* CMP's result is the same regardless of dest type. */ + if (inst->conditional_mod == BRW_CONDITIONAL_NZ && + scan_inst->opcode == BRW_OPCODE_CMP && + (inst->dst.type == BRW_REGISTER_TYPE_D || + inst->dst.type == BRW_REGISTER_TYPE_UD)) { + inst->remove(block); + progress = true; + break; + } + + /* If the AND wasn't handled by the previous case, it isn't safe + * to remove it. + */ + if (inst->opcode == BRW_OPCODE_AND) + break; + + /* Comparisons operate differently for ints and floats */ + if (scan_inst->dst.type != inst->dst.type && + (scan_inst->dst.type == BRW_REGISTER_TYPE_F || + inst->dst.type == BRW_REGISTER_TYPE_F)) + break; + + /* If the instruction generating inst's source also wrote the + * flag, and inst is doing a simple .nz comparison, then inst + * is redundant - the appropriate value is already in the flag + * register. Delete inst. + */ + if (inst->conditional_mod == BRW_CONDITIONAL_NZ && + !inst->src[0].negate && + scan_inst->writes_flag()) { + inst->remove(block); + progress = true; + break; + } + + /* Otherwise, try propagating the conditional. */ + enum brw_conditional_mod cond = + inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod) + : inst->conditional_mod; + + if (scan_inst->can_do_cmod() && + ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || + scan_inst->conditional_mod == cond)) { + scan_inst->conditional_mod = cond; + inst->remove(block); + progress = true; + } + break; + } + + if (scan_inst->writes_flag()) + break; + + read_flag = read_flag || scan_inst->reads_flag(); + } + } + + return progress; +} + +bool +vec4_visitor::opt_cmod_propagation() +{ + bool progress = false; + + foreach_block_reverse(block, cfg) { + progress = opt_cmod_propagation_local(block) || progress; + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + +} /* namespace brw */ -- 2.11.0