From 8bb000f460ad8af7a916af1b12206b22616fce32 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 4 Feb 2018 21:37:22 +0000 Subject: [PATCH] broadcom/vc5: Try to merge more than 2 QPU instructions together. Obviously it would be good to have an ADD and a MUL and a signal together, but we can even potentially have multiple signals merged, as well. total instructions in shared programs: 100423 -> 97874 (-2.54%) instructions in affected programs: 78812 -> 76263 (-3.23%) --- src/broadcom/compiler/qpu_schedule.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index fdddd51bf4d..3ced2a49499 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -1268,6 +1268,13 @@ schedule_instructions(struct v3d_compile *c, fprintf(stderr, "\n"); } + /* We can't mark_instruction_scheduled() the chosen inst until + * we're done identifying instructions to merge, so put the + * merged instructions on a list for a moment. + */ + struct list_head merged_list; + list_inithead(&merged_list); + /* Schedule this instruction onto the QPU list. Also try to * find an instruction to pair with it. */ @@ -1277,13 +1284,14 @@ schedule_instructions(struct v3d_compile *c, mark_instruction_scheduled(schedule_list, time, chosen, true); - merge = choose_instruction_to_schedule(devinfo, + while ((merge = + choose_instruction_to_schedule(devinfo, scoreboard, schedule_list, - chosen); - if (merge) { + chosen))) { time = MAX2(merge->unblocked_time, time); list_del(&merge->link); + list_addtail(&merge->link, &merged_list); (void)qpu_merge_inst(devinfo, inst, inst, &merge->inst->qpu); if (merge->inst->uniform != -1) { @@ -1329,8 +1337,8 @@ schedule_instructions(struct v3d_compile *c, * DAG edge as we do so. */ mark_instruction_scheduled(schedule_list, time, chosen, false); - - if (merge) { + list_for_each_entry(struct schedule_node, merge, &merged_list, + link) { mark_instruction_scheduled(schedule_list, time, merge, false); -- 2.11.0