From 5df88c2096281f416b2738debac1c4c329e29673 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sun, 2 Nov 2014 20:36:53 -0800 Subject: [PATCH] i965/vec4: Rewrite dead code elimination to use live in/out. Improves 359 shaders by >=10% 114 shaders by >=20% 91 shaders by >=30% 82 shaders by >=40% 22 shaders by >=50% 4 shaders by >=60% 2 shaders by >=80% total instructions in shared programs: 5845346 -> 5822422 (-0.39%) instructions in affected programs: 364979 -> 342055 (-6.28%) Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 155 ------------------- .../dri/i965/brw_vec4_dead_code_eliminate.cpp | 169 +++++++++++++++++++++ 3 files changed, 170 insertions(+), 155 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 26bf458e183..f03422e0498 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -103,6 +103,7 @@ i965_FILES = \ brw_vec4.cpp \ brw_vec4_copy_propagation.cpp \ brw_vec4_cse.cpp \ + brw_vec4_dead_code_eliminate.cpp \ brw_vec4_generator.cpp \ brw_vec4_gs_visitor.cpp \ brw_vec4_live_variables.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 534b4b0668f..4d893e15dca 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -429,161 +429,6 @@ vec4_visitor::opt_reduce_swizzle() return progress; } -static bool -try_eliminate_instruction(vec4_instruction *inst, int new_writemask, - const struct brw_context *brw) -{ - if (inst->has_side_effects()) - return false; - - if (new_writemask == 0) { - /* Don't dead code eliminate instructions that write to the - * accumulator as a side-effect. Instead just set the destination - * to the null register to free it. - */ - if (inst->writes_accumulator || inst->writes_flag()) { - inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); - } else { - inst->opcode = BRW_OPCODE_NOP; - } - - return true; - } else if (inst->dst.writemask != new_writemask) { - switch (inst->opcode) { - case SHADER_OPCODE_TXF_CMS: - case SHADER_OPCODE_GEN4_SCRATCH_READ: - case VS_OPCODE_PULL_CONSTANT_LOAD: - case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: - break; - default: - /* Do not set a writemask on Gen6 for math instructions, those are - * executed using align1 mode that does not support a destination mask. - */ - if (!(brw->gen == 6 && inst->is_math()) && !inst->is_tex()) { - inst->dst.writemask = new_writemask; - return true; - } - } - } - - return false; -} - -/** - * Must be called after calculate_live_intervals() to remove unused - * writes to registers -- register allocation will fail otherwise - * because something deffed but not used won't be considered to - * interfere with other regs. - */ -bool -vec4_visitor::dead_code_eliminate() -{ - bool progress = false; - int pc = -1; - - calculate_live_intervals(); - - foreach_block_and_inst(block, vec4_instruction, inst, cfg) { - pc++; - - bool inst_writes_flag = false; - if (inst->dst.file != GRF) { - if (inst->dst.is_null() && inst->writes_flag()) { - inst_writes_flag = true; - } else { - continue; - } - } - - if (inst->dst.file == GRF) { - int write_mask = inst->dst.writemask; - - for (int c = 0; c < 4; c++) { - if (write_mask & (1 << c)) { - assert(this->virtual_grf_end[inst->dst.reg * 4 + c] >= pc); - if (this->virtual_grf_end[inst->dst.reg * 4 + c] == pc) { - write_mask &= ~(1 << c); - } - } - } - - progress = try_eliminate_instruction(inst, write_mask, brw) || - progress; - } - - if (inst->predicate || inst->prev == NULL) - continue; - - int dead_channels; - if (inst_writes_flag) { -/* Arbitrarily chosen, other than not being an xyzw writemask. */ -#define FLAG_WRITEMASK (1 << 5) - dead_channels = inst->reads_flag() ? 0 : FLAG_WRITEMASK; - } else { - dead_channels = inst->dst.writemask; - - for (int i = 0; i < 3; i++) { - if (inst->src[i].file != GRF || - inst->src[i].reg != inst->dst.reg) - continue; - - for (int j = 0; j < 4; j++) { - int swiz = BRW_GET_SWZ(inst->src[i].swizzle, j); - dead_channels &= ~(1 << swiz); - } - } - } - - foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, - inst, block) { - if (dead_channels == 0) - break; - - if (inst_writes_flag) { - if (scan_inst->dst.is_null() && scan_inst->writes_flag()) { - scan_inst->opcode = BRW_OPCODE_NOP; - progress = true; - continue; - } else if (scan_inst->reads_flag()) { - break; - } - } - - if (inst->dst.file == scan_inst->dst.file && - inst->dst.reg == scan_inst->dst.reg && - inst->dst.reg_offset == scan_inst->dst.reg_offset) { - int new_writemask = scan_inst->dst.writemask & ~dead_channels; - - progress = try_eliminate_instruction(scan_inst, new_writemask, brw) || - progress; - } - - for (int i = 0; i < 3; i++) { - if (scan_inst->src[i].file != inst->dst.file || - scan_inst->src[i].reg != inst->dst.reg) - continue; - - for (int j = 0; j < 4; j++) { - int swiz = BRW_GET_SWZ(scan_inst->src[i].swizzle, j); - dead_channels &= ~(1 << swiz); - } - } - } - } - - if (progress) { - foreach_block_and_inst_safe (block, backend_instruction, inst, cfg) { - if (inst->opcode == BRW_OPCODE_NOP) { - inst->remove(block); - } - } - - invalidate_live_intervals(); - } - - return progress; -} - void vec4_visitor::split_uniform_registers() { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp new file mode 100644 index 00000000000..b8370ba63ec --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp @@ -0,0 +1,169 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +#include "brw_vec4_live_variables.h" +#include "brw_cfg.h" + +/** @file brw_vec4_dead_code_eliminate.cpp + * + * Dataflow-aware dead code elimination. + * + * Walks the instruction list from the bottom, removing instructions that + * have results that both aren't used in later blocks and haven't been read + * yet in the tail end of this block. + */ + +using namespace brw; + +static bool +can_do_writemask(const struct brw_context *brw, + const vec4_instruction *inst) +{ + switch (inst->opcode) { + case SHADER_OPCODE_GEN4_SCRATCH_READ: + case VS_OPCODE_PULL_CONSTANT_LOAD: + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + return false; + default: + /* The MATH instruction on Gen6 only executes in align1 mode, which does + * not support writemasking. + */ + if (brw->gen == 6 && inst->is_math()) + return false; + + if (inst->is_tex()) + return false; + + return true; + } +} + +bool +vec4_visitor::dead_code_eliminate() +{ + bool progress = false; + + calculate_live_intervals(); + + int num_vars = live_intervals->num_vars; + BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars)); + BITSET_WORD *flag_live = ralloc_array(NULL, BITSET_WORD, 1); + + foreach_block(block, cfg) { + memcpy(live, live_intervals->block_data[block->num].liveout, + sizeof(BITSET_WORD) * BITSET_WORDS(num_vars)); + memcpy(flag_live, live_intervals->block_data[block->num].flag_liveout, + sizeof(BITSET_WORD)); + + foreach_inst_in_block_reverse(vec4_instruction, inst, block) { + if (inst->dst.file == GRF && !inst->has_side_effects()) { + bool result_live[4] = { false }; + + for (int c = 0; c < 4; c++) { + int var = inst->dst.reg * 4 + c; + result_live[c] = BITSET_TEST(live, var); + } + + /* If the instruction can't do writemasking, then it's all or + * nothing. + */ + if (!can_do_writemask(brw, inst)) { + bool result = result_live[0] | result_live[1] | + result_live[2] | result_live[3]; + result_live[0] = result; + result_live[1] = result; + result_live[2] = result; + result_live[3] = result; + } + + for (int c = 0; c < 4; c++) { + if (!result_live[c] && inst->dst.writemask & (1 << c)) { + inst->dst.writemask &= ~(1 << c); + progress = true; + + if (inst->dst.writemask == 0) { + if (inst->writes_accumulator) { + inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); + } else { + inst->opcode = BRW_OPCODE_NOP; + continue; + } + } + } + } + } + + if (inst->dst.is_null() && inst->writes_flag()) { + if (!BITSET_TEST(flag_live, 0)) { + inst->opcode = BRW_OPCODE_NOP; + progress = true; + continue; + } + } + + if (inst->dst.file == GRF && !inst->predicate) { + for (int c = 0; c < 4; c++) { + if (inst->dst.writemask & (1 << c)) { + int var = inst->dst.reg * 4 + c; + BITSET_CLEAR(live, var); + } + } + } + + if (inst->writes_flag()) { + BITSET_CLEAR(flag_live, 0); + } + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) { + for (int c = 0; c < 4; c++) { + int swiz = BRW_GET_SWZ(inst->src[i].swizzle, c); + int var = inst->src[i].reg * 4 + swiz; + + BITSET_SET(live, var); + } + } + } + + if (inst->reads_flag()) { + BITSET_SET(flag_live, 0); + } + } + } + + ralloc_free(live); + ralloc_free(flag_live); + + if (progress) { + foreach_block_and_inst_safe(block, backend_instruction, inst, cfg) { + if (inst->opcode == BRW_OPCODE_NOP) { + inst->remove(block); + } + } + + invalidate_live_intervals(); + } + + return progress; +} -- 2.11.0