X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=src%2Futil%2Fregister_allocate.c;h=500540bfdd9f3885642c38aa84b86e4f2c02eef1;hb=7cde4dbcd750dabc74185da058844d43928fa206;hp=2ad8c3ce11a1611cb3e561cf77a3bdb6da5c58ef;hpb=f80af89d48f2c9045c7e0438a1b35d09be3e84d5;p=android-x86%2Fexternal-mesa.git diff --git a/src/util/register_allocate.c b/src/util/register_allocate.c index 2ad8c3ce11a..500540bfdd9 100644 --- a/src/util/register_allocate.c +++ b/src/util/register_allocate.c @@ -75,7 +75,6 @@ #include "ralloc.h" #include "main/imports.h" #include "main/macros.h" -#include "main/mtypes.h" #include "util/bitset.h" #include "register_allocate.h" @@ -174,6 +173,10 @@ struct ra_graph { * stack. */ unsigned int stack_optimistic_start; + + unsigned int (*select_reg_callback)(struct ra_graph *g, BITSET_WORD *regs, + void *data); + void *select_reg_callback_data; }; /** @@ -183,7 +186,7 @@ struct ra_graph { * using ralloc_free(). */ struct ra_regs * -ra_alloc_reg_set(void *mem_ctx, unsigned int count) +ra_alloc_reg_set(void *mem_ctx, unsigned int count, bool need_conflict_lists) { unsigned int i; struct ra_regs *regs; @@ -197,9 +200,15 @@ ra_alloc_reg_set(void *mem_ctx, unsigned int count) BITSET_WORDS(count)); BITSET_SET(regs->regs[i].conflicts, i); - regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4); - regs->regs[i].conflict_list_size = 4; - regs->regs[i].conflict_list[0] = i; + if (need_conflict_lists) { + regs->regs[i].conflict_list = ralloc_array(regs->regs, + unsigned int, 4); + regs->regs[i].conflict_list_size = 4; + regs->regs[i].conflict_list[0] = i; + } else { + regs->regs[i].conflict_list = NULL; + regs->regs[i].conflict_list_size = 0; + } regs->regs[i].num_conflicts = 1; } @@ -227,12 +236,14 @@ ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2) { struct ra_reg *reg1 = ®s->regs[r1]; - if (reg1->conflict_list_size == reg1->num_conflicts) { - reg1->conflict_list_size *= 2; - reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list, - unsigned int, reg1->conflict_list_size); + if (reg1->conflict_list) { + if (reg1->conflict_list_size == reg1->num_conflicts) { + reg1->conflict_list_size *= 2; + reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list, + unsigned int, reg1->conflict_list_size); + } + reg1->conflict_list[reg1->num_conflicts++] = r2; } - reg1->conflict_list[reg1->num_conflicts++] = r2; BITSET_SET(reg1->conflicts, r2); } @@ -255,7 +266,7 @@ ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2) */ void ra_add_transitive_reg_conflict(struct ra_regs *regs, - unsigned int base_reg, unsigned int reg) + unsigned int base_reg, unsigned int reg) { unsigned int i; @@ -266,13 +277,37 @@ ra_add_transitive_reg_conflict(struct ra_regs *regs, } } +/** + * Makes every conflict on the given register transitive. In other words, + * every register that conflicts with r will now conflict with every other + * register conflicting with r. + * + * This can simplify code for setting up multiple register classes + * which are aggregates of some base hardware registers, compared to + * explicitly using ra_add_reg_conflict. + */ +void +ra_make_reg_conflicts_transitive(struct ra_regs *regs, unsigned int r) +{ + struct ra_reg *reg = ®s->regs[r]; + BITSET_WORD tmp; + int c; + + BITSET_FOREACH_SET(c, tmp, reg->conflicts, regs->count) { + struct ra_reg *other = ®s->regs[c]; + unsigned i; + for (i = 0; i < BITSET_WORDS(regs->count); i++) + other->conflicts[i] |= reg->conflicts[i]; + } +} + unsigned int ra_alloc_reg_class(struct ra_regs *regs) { struct ra_class *class; regs->classes = reralloc(regs->regs, regs->classes, struct ra_class *, - regs->class_count + 1); + regs->class_count + 1); class = rzalloc(regs, struct ra_class); regs->classes[regs->class_count] = class; @@ -319,35 +354,39 @@ ra_set_finalize(struct ra_regs *regs, unsigned int **q_values) for (b = 0; b < regs->class_count; b++) { for (c = 0; c < regs->class_count; c++) { regs->classes[b]->q[c] = q_values[b][c]; - } + } + } + } else { + /* Compute, for each class B and C, how many regs of B an + * allocation to C could conflict with. + */ + for (b = 0; b < regs->class_count; b++) { + for (c = 0; c < regs->class_count; c++) { + unsigned int rc; + int max_conflicts = 0; + + for (rc = 0; rc < regs->count; rc++) { + int conflicts = 0; + unsigned int i; + + if (!reg_belongs_to_class(rc, regs->classes[c])) + continue; + + for (i = 0; i < regs->regs[rc].num_conflicts; i++) { + unsigned int rb = regs->regs[rc].conflict_list[i]; + if (reg_belongs_to_class(rb, regs->classes[b])) + conflicts++; + } + max_conflicts = MAX2(max_conflicts, conflicts); + } + regs->classes[b]->q[c] = max_conflicts; + } } - return; } - /* Compute, for each class B and C, how many regs of B an - * allocation to C could conflict with. - */ - for (b = 0; b < regs->class_count; b++) { - for (c = 0; c < regs->class_count; c++) { - unsigned int rc; - int max_conflicts = 0; - - for (rc = 0; rc < regs->count; rc++) { - int conflicts = 0; - unsigned int i; - - if (!reg_belongs_to_class(rc, regs->classes[c])) - continue; - - for (i = 0; i < regs->regs[rc].num_conflicts; i++) { - unsigned int rb = regs->regs[rc].conflict_list[i]; - if (reg_belongs_to_class(rb, regs->classes[b])) - conflicts++; - } - max_conflicts = MAX2(max_conflicts, conflicts); - } - regs->classes[b]->q[c] = max_conflicts; - } + for (b = 0; b < regs->count; b++) { + ralloc_free(regs->regs[b].conflict_list); + regs->regs[b].conflict_list = NULL; } } @@ -356,11 +395,11 @@ ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2) { BITSET_SET(g->nodes[n1].adjacency, n2); - if (n1 != n2) { - int n1_class = g->nodes[n1].class; - int n2_class = g->nodes[n2].class; - g->nodes[n1].q_total += g->regs->classes[n1_class]->q[n2_class]; - } + assert(n1 != n2); + + int n1_class = g->nodes[n1].class; + int n2_class = g->nodes[n2].class; + g->nodes[n1].q_total += g->regs->classes[n1_class]->q[n2_class]; if (g->nodes[n1].adjacency_count >= g->nodes[n1].adjacency_list_size) { @@ -397,25 +436,34 @@ ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) g->nodes[i].adjacency_count = 0; g->nodes[i].q_total = 0; - ra_add_node_adjacency(g, i, i); g->nodes[i].reg = NO_REG; } return g; } +void ra_set_select_reg_callback(struct ra_graph *g, + unsigned int (*callback)(struct ra_graph *g, + BITSET_WORD *regs, + void *data), + void *data) +{ + g->select_reg_callback = callback; + g->select_reg_callback_data = data; +} + void ra_set_node_class(struct ra_graph *g, - unsigned int n, unsigned int class) + unsigned int n, unsigned int class) { g->nodes[n].class = class; } void ra_add_node_interference(struct ra_graph *g, - unsigned int n1, unsigned int n2) + unsigned int n1, unsigned int n2) { - if (!BITSET_TEST(g->nodes[n1].adjacency, n2)) { + if (n1 != n2 && !BITSET_TEST(g->nodes[n1].adjacency, n2)) { ra_add_node_adjacency(g, n1, n2); ra_add_node_adjacency(g, n2, n1); } @@ -439,9 +487,9 @@ decrement_q(struct ra_graph *g, unsigned int n) unsigned int n2 = g->nodes[n].adjacency_list[i]; unsigned int n2_class = g->nodes[n2].class; - if (n != n2 && !g->nodes[n2].in_stack) { + if (!g->nodes[n2].in_stack) { assert(g->nodes[n2].q_total >= g->regs->classes[n2_class]->q[n_class]); - g->nodes[n2].q_total -= g->regs->classes[n2_class]->q[n_class]; + g->nodes[n2].q_total -= g->regs->classes[n2_class]->q[n_class]; } } } @@ -503,6 +551,58 @@ ra_simplify(struct ra_graph *g) g->stack_optimistic_start = stack_optimistic_start; } +static bool +ra_any_neighbors_conflict(struct ra_graph *g, unsigned int n, unsigned int r) +{ + unsigned int i; + + for (i = 0; i < g->nodes[n].adjacency_count; i++) { + unsigned int n2 = g->nodes[n].adjacency_list[i]; + + if (!g->nodes[n2].in_stack && + BITSET_TEST(g->regs->regs[r].conflicts, g->nodes[n2].reg)) { + return true; + } + } + + return false; +} + +/* Computes a bitfield of what regs are available for a given register + * selection. + * + * This lets drivers implement a more complicated policy than our simple first + * or round robin policies (which don't require knowing the whole bitset) + */ +static bool +ra_compute_available_regs(struct ra_graph *g, unsigned int n, BITSET_WORD *regs) +{ + struct ra_class *c = g->regs->classes[g->nodes[n].class]; + + /* Populate with the set of regs that are in the node's class. */ + memcpy(regs, c->regs, BITSET_WORDS(g->regs->count) * sizeof(BITSET_WORD)); + + /* Remove any regs that conflict with nodes that we're adjacent to and have + * already colored. + */ + for (int i = 0; i < g->nodes[n].adjacency_count; i++) { + unsigned int n2 = g->nodes[n].adjacency_list[i]; + unsigned int r = g->nodes[n2].reg; + + if (!g->nodes[n2].in_stack) { + for (int j = 0; j < BITSET_WORDS(g->regs->count); j++) + regs[j] &= ~g->regs->regs[r].conflicts[j]; + } + } + + for (int i = 0; i < BITSET_WORDS(g->regs->count); i++) { + if (regs[i]) + return true; + } + + return false; +} + /** * Pops nodes from the stack back into the graph, coloring them with * registers as they go. @@ -514,42 +614,45 @@ static bool ra_select(struct ra_graph *g) { int start_search_reg = 0; + BITSET_WORD *select_regs = NULL; + + if (g->select_reg_callback) + select_regs = malloc(BITSET_WORDS(g->regs->count) * sizeof(BITSET_WORD)); while (g->stack_count != 0) { - unsigned int i; unsigned int ri; unsigned int r = -1; int n = g->stack[g->stack_count - 1]; struct ra_class *c = g->regs->classes[g->nodes[n].class]; - /* Find the lowest-numbered reg which is not used by a member - * of the graph adjacent to us. - */ - for (ri = 0; ri < g->regs->count; ri++) { - r = (start_search_reg + ri) % g->regs->count; - if (!reg_belongs_to_class(r, c)) - continue; - - /* Check if any of our neighbors conflict with this register choice. */ - for (i = 0; i < g->nodes[n].adjacency_count; i++) { - unsigned int n2 = g->nodes[n].adjacency_list[i]; - - if (!g->nodes[n2].in_stack && - BITSET_TEST(g->regs->regs[r].conflicts, g->nodes[n2].reg)) { - break; - } - } - if (i == g->nodes[n].adjacency_count) - break; - } - /* set this to false even if we return here so that * ra_get_best_spill_node() considers this node later. */ g->nodes[n].in_stack = false; - if (ri == g->regs->count) - return false; + if (g->select_reg_callback) { + if (!ra_compute_available_regs(g, n, select_regs)) { + free(select_regs); + return false; + } + + r = g->select_reg_callback(g, select_regs, g->select_reg_callback_data); + } else { + /* Find the lowest-numbered reg which is not used by a member + * of the graph adjacent to us. + */ + for (ri = 0; ri < g->regs->count; ri++) { + r = (start_search_reg + ri) % g->regs->count; + if (!reg_belongs_to_class(r, c)) + continue; + + if (!ra_any_neighbors_conflict(g, n, r)) + break; + } + + if (ri >= g->regs->count) + return false; + } g->nodes[n].reg = r; g->stack_count--; @@ -568,6 +671,8 @@ ra_select(struct ra_graph *g) start_search_reg = r + 1; } + free(select_regs); + return true; } @@ -618,11 +723,9 @@ ra_get_spill_benefit(struct ra_graph *g, unsigned int n) */ for (j = 0; j < g->nodes[n].adjacency_count; j++) { unsigned int n2 = g->nodes[n].adjacency_list[j]; - if (n != n2) { - unsigned int n2_class = g->nodes[n2].class; - benefit += ((float)g->regs->classes[n_class]->q[n2_class] / - g->regs->classes[n_class]->p); - } + unsigned int n2_class = g->nodes[n2].class; + benefit += ((float)g->regs->classes[n_class]->q[n2_class] / + g->regs->classes[n_class]->p); } return benefit; @@ -648,7 +751,7 @@ ra_get_best_spill_node(struct ra_graph *g) float cost = g->nodes[n].spill_cost; float benefit; - if (cost <= 0.0) + if (cost <= 0.0f) continue; if (g->nodes[n].in_stack)