From eb2776504ae32feaf41a5bad9f09f154045e96a3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 21 Aug 2015 10:57:24 -0700 Subject: [PATCH 1/1] vc4: Actually allow math results to allocate into r4. I switched us to tracking whether the results *could* go to r4, but then didn't make a separate register class for the class bits that included r4. Switch the "any" class to actually be "any", and name the "any but r4" class more appropriately. total instructions in shared programs: 96798 -> 94680 (-2.19%) instructions in affected programs: 62736 -> 60618 (-3.38%) --- src/gallium/drivers/vc4/vc4_context.h | 1 + src/gallium/drivers/vc4/vc4_register_allocate.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 654c46f3c0d..3a63af8f2b0 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -270,6 +270,7 @@ struct vc4_context { struct ra_regs *regs; unsigned int reg_class_any; + unsigned int reg_class_a_or_b_or_acc; unsigned int reg_class_r4_or_a; unsigned int reg_class_a; diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index 2ea88500227..3ced50f3a44 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -116,6 +116,7 @@ vc4_alloc_reg_set(struct vc4_context *vc4) vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs), true); vc4->reg_class_any = ra_alloc_reg_class(vc4->regs); + vc4->reg_class_a_or_b_or_acc = ra_alloc_reg_class(vc4->regs); vc4->reg_class_r4_or_a = ra_alloc_reg_class(vc4->regs); vc4->reg_class_a = ra_alloc_reg_class(vc4->regs); for (uint32_t i = 0; i < ARRAY_SIZE(vc4_regs); i++) { @@ -130,10 +131,12 @@ vc4_alloc_reg_set(struct vc4_context *vc4) */ if (vc4_regs[i].mux == QPU_MUX_R4) { ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i); + ra_class_add_reg(vc4->regs, vc4->reg_class_any, i); continue; } ra_class_add_reg(vc4->regs, vc4->reg_class_any, i); + ra_class_add_reg(vc4->regs, vc4->reg_class_a_or_b_or_acc, i); } for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2) { @@ -304,9 +307,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) switch (class_bits[i]) { case CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4: - case CLASS_BIT_A | CLASS_BIT_B_OR_ACC: ra_set_node_class(g, node, vc4->reg_class_any); break; + case CLASS_BIT_A | CLASS_BIT_B_OR_ACC: + ra_set_node_class(g, node, vc4->reg_class_a_or_b_or_acc); + break; case CLASS_BIT_A | CLASS_BIT_R4: ra_set_node_class(g, node, vc4->reg_class_r4_or_a); break; -- 2.11.0