From 9e6b39e1d521aa723749a47d958d58900bf25138 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Mon, 26 Nov 2018 12:05:00 +1100 Subject: [PATCH] nir: detect more induction variables This allows loop analysis to detect inductions variables that are incremented in both branches of an if rather than in a main loop block. For example: loop { block block_1: /* preds: block_0 block_7 */ vec1 32 ssa_8 = phi block_0: ssa_4, block_7: ssa_20 vec1 32 ssa_9 = phi block_0: ssa_0, block_7: ssa_4 vec1 32 ssa_10 = phi block_0: ssa_1, block_7: ssa_4 vec1 32 ssa_11 = phi block_0: ssa_2, block_7: ssa_21 vec1 32 ssa_12 = phi block_0: ssa_3, block_7: ssa_22 vec4 32 ssa_13 = vec4 ssa_12, ssa_11, ssa_10, ssa_9 vec1 32 ssa_14 = ige ssa_8, ssa_5 /* succs: block_2 block_3 */ if ssa_14 { block block_2: /* preds: block_1 */ break /* succs: block_8 */ } else { block block_3: /* preds: block_1 */ /* succs: block_4 */ } block block_4: /* preds: block_3 */ vec1 32 ssa_15 = ilt ssa_6, ssa_8 /* succs: block_5 block_6 */ if ssa_15 { block block_5: /* preds: block_4 */ vec1 32 ssa_16 = iadd ssa_8, ssa_7 vec1 32 ssa_17 = load_const (0x3f800000 /* 1.000000*/) /* succs: block_7 */ } else { block block_6: /* preds: block_4 */ vec1 32 ssa_18 = iadd ssa_8, ssa_7 vec1 32 ssa_19 = load_const (0x3f800000 /* 1.000000*/) /* succs: block_7 */ } block block_7: /* preds: block_5 block_6 */ vec1 32 ssa_20 = phi block_5: ssa_16, block_6: ssa_18 vec1 32 ssa_21 = phi block_5: ssa_17, block_6: ssa_4 vec1 32 ssa_22 = phi block_5: ssa_4, block_6: ssa_19 /* succs: block_1 */ } Unfortunatly GCM could move the addition out of the if for us (making this patch unrequired) but we still cannot enable the GCM pass without regressions. This unrolls a loop in Rise of The Tomb Raider. vkpipeline-db results (VEGA): Totals from affected shaders: SGPRS: 88 -> 96 (9.09 %) VGPRS: 56 -> 52 (-7.14 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 2168 -> 4560 (110.33 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 4 -> 4 (0.00 %) Wait states: 0 -> 0 (0.00 %) Reviewed-by: Thomas Helland Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=32211 --- src/compiler/nir/nir_loop_analyze.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index 4a7397d2149..3de45401975 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -245,6 +245,42 @@ compute_induction_information(loop_info_state *state) if (src_var->in_if_branch || src_var->in_nested_loop) break; + /* Detect inductions variables that are incremented in both branches + * of an unnested if rather than in a loop block. + */ + if (is_var_phi(src_var)) { + nir_phi_instr *src_phi = + nir_instr_as_phi(src_var->def->parent_instr); + + nir_op alu_op; + nir_ssa_def *alu_srcs[2] = {0}; + nir_foreach_phi_src(src2, src_phi) { + nir_loop_variable *src_var2 = + get_loop_var(src2->src.ssa, state); + + if (!src_var2->in_if_branch || !is_var_alu(src_var2)) + break; + + nir_alu_instr *alu = + nir_instr_as_alu(src_var2->def->parent_instr); + if (nir_op_infos[alu->op].num_inputs != 2) + break; + + if (alu->src[0].src.ssa == alu_srcs[0] && + alu->src[1].src.ssa == alu_srcs[1] && + alu->op == alu_op) { + /* Both branches perform the same calculation so we can use + * one of them to find the induction variable. + */ + src_var = src_var2; + } else { + alu_srcs[0] = alu->src[0].src.ssa; + alu_srcs[1] = alu->src[1].src.ssa; + alu_op = alu->op; + } + } + } + if (!src_var->in_loop) { biv->def_outside_loop = src_var; } else if (is_var_alu(src_var)) { -- 2.11.0