2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file opt_tree_grafting.cpp
27 * Takes assignments to variables that are dereferenced only once and
28 * pastes the RHS expression into where the variable is dereferenced.
30 * In the process of various operations like function inlining and
31 * tertiary op handling, we'll end up with our expression trees having
32 * been chopped up into a series of assignments of short expressions
33 * to temps. Other passes like ir_algebraic.cpp would prefer to see
34 * the deepest expression trees they can to try to optimize them.
36 * This is a lot like copy propagaton. In comparison, copy
37 * propagation only acts on plain copies, not arbitrary expressions on
38 * the RHS. Generally, we wouldn't want to go pasting some
39 * complicated expression everywhere it got used, though, so we don't
40 * handle expressions in that pass.
42 * The hard part is making sure we don't move an expression across
43 * some other assignments that would change the value of the
44 * expression. So we split this into two passes: First, find the
45 * variables in our scope which are written to once and read once, and
46 * then go through basic blocks seeing if we find an opportunity to
47 * move those expressions safely.
51 #include "ir_visitor.h"
52 #include "ir_variable_refcount.h"
53 #include "ir_basic_block.h"
54 #include "ir_optimization.h"
55 #include "glsl_types.h"
59 static bool debug = false;
61 class ir_tree_grafting_visitor : public ir_hierarchical_visitor {
63 ir_tree_grafting_visitor(ir_assignment *graft_assign,
64 ir_variable *graft_var)
66 this->progress = false;
67 this->graft_assign = graft_assign;
68 this->graft_var = graft_var;
71 virtual ir_visitor_status visit_leave(class ir_assignment *);
72 virtual ir_visitor_status visit_enter(class ir_call *);
73 virtual ir_visitor_status visit_enter(class ir_expression *);
74 virtual ir_visitor_status visit_enter(class ir_function *);
75 virtual ir_visitor_status visit_enter(class ir_function_signature *);
76 virtual ir_visitor_status visit_enter(class ir_if *);
77 virtual ir_visitor_status visit_enter(class ir_loop *);
78 virtual ir_visitor_status visit_enter(class ir_swizzle *);
79 virtual ir_visitor_status visit_enter(class ir_texture *);
81 ir_visitor_status check_graft(ir_instruction *ir, ir_variable *var);
83 bool do_graft(ir_rvalue **rvalue);
86 ir_variable *graft_var;
87 ir_assignment *graft_assign;
90 struct find_deref_info {
96 dereferences_variable_callback(ir_instruction *ir, void *data)
98 struct find_deref_info *info = (struct find_deref_info *)data;
99 ir_dereference_variable *deref = ir->as_dereference_variable();
101 if (deref && deref->var == info->var)
106 dereferences_variable(ir_instruction *ir, ir_variable *var)
108 struct find_deref_info info;
113 visit_tree(ir, dereferences_variable_callback, &info);
119 ir_tree_grafting_visitor::do_graft(ir_rvalue **rvalue)
124 ir_dereference_variable *deref = (*rvalue)->as_dereference_variable();
126 if (!deref || deref->var != this->graft_var)
130 printf("GRAFTING:\n");
131 this->graft_assign->print();
138 this->graft_assign->remove();
139 *rvalue = this->graft_assign->rhs;
141 this->progress = true;
146 ir_tree_grafting_visitor::visit_enter(ir_loop *ir)
149 /* Do not traverse into the body of the loop since that is a
150 * different basic block.
156 * Check if we can continue grafting after writing to a variable. If the
157 * expression we're trying to graft references the variable, we must stop.
159 * \param ir An instruction that writes to a variable.
160 * \param var The variable being updated.
163 ir_tree_grafting_visitor::check_graft(ir_instruction *ir, ir_variable *var)
165 if (dereferences_variable(this->graft_assign->rhs, var)) {
167 printf("graft killed by: ");
174 return visit_continue;
178 ir_tree_grafting_visitor::visit_leave(ir_assignment *ir)
180 if (do_graft(&ir->rhs) ||
181 do_graft(&ir->condition))
184 /* If this assignment updates a variable used in the assignment
185 * we're trying to graft, then we're done.
187 return check_graft(ir, ir->lhs->variable_referenced());
191 ir_tree_grafting_visitor::visit_enter(ir_function *ir)
194 return visit_continue_with_parent;
198 ir_tree_grafting_visitor::visit_enter(ir_function_signature *ir)
201 return visit_continue_with_parent;
205 ir_tree_grafting_visitor::visit_enter(ir_call *ir)
207 exec_list_iterator sig_iter = ir->callee->parameters.iterator();
208 /* Reminder: iterating ir_call iterates its parameters. */
209 foreach_iter(exec_list_iterator, iter, *ir) {
210 ir_variable *sig_param = (ir_variable *)sig_iter.get();
211 ir_rvalue *ir = (ir_rvalue *)iter.get();
212 ir_rvalue *new_ir = ir;
214 if (sig_param->mode != ir_var_function_in
215 && sig_param->mode != ir_var_const_in) {
216 if (check_graft(ir, sig_param) == visit_stop)
221 if (do_graft(&new_ir)) {
222 ir->replace_with(new_ir);
228 if (ir->return_deref && check_graft(ir, ir->return_deref->var) == visit_stop)
231 return visit_continue;
235 ir_tree_grafting_visitor::visit_enter(ir_expression *ir)
237 for (unsigned int i = 0; i < ir->get_num_operands(); i++) {
238 if (do_graft(&ir->operands[i]))
242 return visit_continue;
246 ir_tree_grafting_visitor::visit_enter(ir_if *ir)
248 if (do_graft(&ir->condition))
251 /* Do not traverse into the body of the if-statement since that is a
252 * different basic block.
254 return visit_continue_with_parent;
258 ir_tree_grafting_visitor::visit_enter(ir_swizzle *ir)
260 if (do_graft(&ir->val))
263 return visit_continue;
267 ir_tree_grafting_visitor::visit_enter(ir_texture *ir)
269 if (do_graft(&ir->coordinate) ||
270 do_graft(&ir->projector) ||
271 do_graft(&ir->offset) ||
272 do_graft(&ir->shadow_comparitor))
280 if (do_graft(&ir->lod_info.bias))
286 if (do_graft(&ir->lod_info.lod))
290 if (do_graft(&ir->lod_info.sample_index))
294 if (do_graft(&ir->lod_info.grad.dPdx) ||
295 do_graft(&ir->lod_info.grad.dPdy))
300 return visit_continue;
303 struct tree_grafting_info {
304 ir_variable_refcount_visitor *refs;
309 try_tree_grafting(ir_assignment *start,
310 ir_variable *lhs_var,
311 ir_instruction *bb_last)
313 ir_tree_grafting_visitor v(start, lhs_var);
316 printf("trying to graft: ");
321 for (ir_instruction *ir = (ir_instruction *)start->next;
323 ir = (ir_instruction *)ir->next) {
331 ir_visitor_status s = ir->accept(&v);
340 tree_grafting_basic_block(ir_instruction *bb_first,
341 ir_instruction *bb_last,
344 struct tree_grafting_info *info = (struct tree_grafting_info *)data;
345 ir_instruction *ir, *next;
347 for (ir = bb_first, next = (ir_instruction *)ir->next;
349 ir = next, next = (ir_instruction *)ir->next) {
350 ir_assignment *assign = ir->as_assignment();
355 ir_variable *lhs_var = assign->whole_variable_written();
359 if (lhs_var->mode == ir_var_function_out ||
360 lhs_var->mode == ir_var_function_inout ||
361 lhs_var->mode == ir_var_shader_out)
364 ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var);
366 if (!entry->declaration ||
367 entry->assigned_count != 1 ||
368 entry->referenced_count != 2)
371 assert(assign == entry->assign);
373 /* Found a possibly graftable assignment. Now, walk through the
374 * rest of the BB seeing if the deref is here, and if nothing interfered with
375 * pasting its expression's values in between.
377 info->progress |= try_tree_grafting(assign, lhs_var, bb_last);
381 } /* unnamed namespace */
384 * Does a copy propagation pass on the code present in the instruction stream.
387 do_tree_grafting(exec_list *instructions)
389 ir_variable_refcount_visitor refs;
390 struct tree_grafting_info info;
392 info.progress = false;
395 visit_list_elements(info.refs, instructions);
397 call_for_basic_blocks(instructions, tree_grafting_basic_block, &info);
399 return info.progress;