1 #!/usr/bin/env python2.7
3 """A script to generate FileCheck statements for regression tests.
5 This script is a utility to update LLVM opt or llc test cases with new
6 FileCheck patterns. It can either update all of the tests in the file or
7 a single test function.
10 $ update_test_checks.py --tool=../bin/opt test/foo.ll
13 1. Make a compiler patch that requires updating some number of FileCheck lines
14 in regression test files.
15 2. Save the patch and revert it from your local work area.
16 3. Update the RUN-lines in the affected regression tests to look canonical.
17 Example: "; RUN: opt < %s -instcombine -S | FileCheck %s"
18 4. Refresh the FileCheck lines for either the entire file or select functions by
20 5. Commit the fresh baseline of checks.
21 6. Apply your patch from step 1 and rebuild your local binaries.
22 7. Re-run this script on affected regression tests.
23 8. Check the diffs to ensure the script has done something reasonable.
24 9. Submit a patch including the regression test diffs for review.
26 A common pattern is to have the script insert complete checking of every
27 instruction. Then, edit it down to only check the relevant instructions.
28 The script is designed to make adding checks to a test case fast, it is *not*
29 designed to be authoratitive about what constitutes a good test!
34 import os # Used to advertise this file's name ("autogenerated_note").
41 ADVERT = '; NOTE: Assertions have been autogenerated by '
43 # RegEx: this is where the magic happens.
45 SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
46 SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M)
47 SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
48 SCRUB_X86_SHUFFLES_RE = (
50 r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
52 SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
53 SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
54 SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
55 SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
56 SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
58 RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
59 IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
60 LLC_FUNCTION_RE = re.compile(
61 r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
62 r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
63 r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
65 OPT_FUNCTION_RE = re.compile(
66 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
67 r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
69 CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
70 CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
71 # Match things that look at identifiers, but only if they are followed by
72 # spaces, commas, paren, or end of the string
73 IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s\(\)]|\Z)')
76 # Invoke the tool that is being tested.
77 def invoke_tool(args, cmd_args, ir):
78 with open(ir) as ir_file:
79 stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
80 shell=True, stdin=ir_file)
81 # Fix line endings to unix CR style.
82 stdout = stdout.replace('\r\n', '\n')
86 # FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
88 # Detect shuffle asm comments and hide the operands in favor of the comments.
89 asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
90 # Generically match the stack offset of a memory operand.
91 asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
92 # Generically match a RIP-relative memory operand.
93 asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
94 # Generically match a LCP symbol.
95 asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
96 # Strip kill operands inserted into the asm.
97 asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
101 def scrub_body(body, tool_basename):
102 # Scrub runs of whitespace out of the assembly, but leave the leading
103 # whitespace in place.
104 body = SCRUB_WHITESPACE_RE.sub(r' ', body)
105 # Expand the tabs used for indentation.
106 body = string.expandtabs(body, 2)
107 # Strip trailing whitespace.
108 body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
109 if tool_basename == "llc":
110 body = scrub_asm(body)
114 # Build up a dictionary of all the function bodies.
115 def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
116 if tool_basename == "llc":
117 func_regex = LLC_FUNCTION_RE
119 func_regex = OPT_FUNCTION_RE
120 for m in func_regex.finditer(raw_tool_output):
123 func = m.group('func')
124 scrubbed_body = scrub_body(m.group('body'), tool_basename)
125 if func.startswith('stress'):
126 # We only use the last line of the function body for stress tests.
127 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
129 print >>sys.stderr, 'Processing function: ' + func
130 for l in scrubbed_body.splitlines():
131 print >>sys.stderr, ' ' + l
132 for prefix in prefixes:
133 if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
134 if prefix == prefixes[-1]:
135 print >>sys.stderr, ('WARNING: Found conflicting asm under the '
136 'same prefix: %r!' % (prefix,))
138 func_dict[prefix][func] = None
141 func_dict[prefix][func] = scrubbed_body
144 # Create a FileCheck variable name based on an IR name.
145 def get_value_name(var):
148 var = var.replace('.', '_')
152 # Create a FileCheck variable from regex.
153 def get_value_definition(var):
154 return '[[' + get_value_name(var) + ':%.*]]'
157 # Use a FileCheck variable.
158 def get_value_use(var):
159 return '[[' + get_value_name(var) + ']]'
161 # Replace IR value defs and uses with FileCheck variables.
162 def genericize_check_lines(lines):
163 # This gets called for each match that occurs in
164 # a line. We transform variables we haven't seen
165 # into defs, and variables we have seen into uses.
166 def transform_line_vars(match):
169 rv = get_value_use(var)
172 rv = get_value_definition(var)
173 # re.sub replaces the entire regex match
174 # with whatever you return, so we have
175 # to make sure to hand it back everything
176 # including the commas and spaces.
177 return match.group(1) + rv + match.group(3)
182 for i, line in enumerate(lines):
183 # An IR variable named '%.' matches the FileCheck regex string.
184 line = line.replace('%.', '%dot')
185 # Ignore any comments, since the check lines will too.
186 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
187 lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
191 def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
192 # Select a label format based on the whether we're checking asm or IR.
193 if tool_basename == "llc":
194 check_label_format = "; %s-LABEL: %s:"
196 check_label_format = "; %s-LABEL: @%s("
198 printed_prefixes = []
199 for checkprefixes, _ in prefix_list:
200 for checkprefix in checkprefixes:
201 if checkprefix in printed_prefixes:
203 if not func_dict[checkprefix][func_name]:
205 # Add some space between different check prefixes, but not after the last
206 # check line (before the test code).
207 #if len(printed_prefixes) != 0:
208 # output_lines.append(';')
209 printed_prefixes.append(checkprefix)
210 output_lines.append(check_label_format % (checkprefix, func_name))
211 func_body = func_dict[checkprefix][func_name].splitlines()
213 # For IR output, change all defs to FileCheck variables, so we're immune
214 # to variable naming fashions.
215 if tool_basename == "opt":
216 func_body = genericize_check_lines(func_body)
218 # This could be selectively enabled with an optional invocation argument.
219 # Disabled for now: better to check everything. Be safe rather than sorry.
221 # Handle the first line of the function body as a special case because
222 # it's often just noise (a useless asm comment or entry label).
223 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
224 # is_blank_line = True
226 # output_lines.append('; %s: %s' % (checkprefix, func_body[0]))
227 # is_blank_line = False
229 # For llc tests, there may be asm directives between the label and the
230 # first checked line (most likely that first checked line is "# BB#0").
231 if tool_basename == "opt":
232 is_blank_line = False
234 is_blank_line = True;
236 for func_line in func_body:
237 if func_line.strip() == '':
240 # Do not waste time checking IR comments.
241 if tool_basename == "opt":
242 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
244 # Skip blank lines instead of checking them.
245 if is_blank_line == True:
246 output_lines.append('; %s: %s' % (checkprefix, func_line))
248 output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line))
249 is_blank_line = False
251 # Add space between different check prefixes and also before the first
252 # line of code in the test function.
253 output_lines.append(';')
258 def should_add_line_to_output(input_line, prefix_set):
259 # Skip any blank comment lines in the IR.
260 if input_line.strip() == ';':
262 # Skip any blank lines in the IR.
263 #if input_line.strip() == '':
265 # And skip any CHECK lines. We're building our own.
266 m = CHECK_RE.match(input_line)
267 if m and m.group(1) in prefix_set:
274 from argparse import RawTextHelpFormatter
275 parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
276 parser.add_argument('-v', '--verbose', action='store_true',
277 help='Show verbose output')
278 parser.add_argument('--tool-binary', default='llc',
279 help='The tool used to generate the test case')
281 '--function', help='The function in the test file to update')
282 parser.add_argument('tests', nargs='+')
283 args = parser.parse_args()
285 autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
287 tool_basename = os.path.basename(args.tool_binary)
288 if (tool_basename != "llc" and tool_basename != "opt"):
289 print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
292 for test in args.tests:
294 print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
295 with open(test) as f:
296 input_lines = [l.rstrip() for l in f]
298 raw_lines = [m.group(1)
299 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
300 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
301 for l in raw_lines[1:]:
302 if run_lines[-1].endswith("\\"):
303 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
308 print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
310 print >>sys.stderr, ' RUN: ' + l
314 (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
316 if not tool_cmd.startswith(tool_basename + ' '):
317 print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
320 if not filecheck_cmd.startswith('FileCheck '):
321 print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
324 tool_cmd_args = tool_cmd[len(tool_basename):].strip()
325 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
327 check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
328 for item in m.group(1).split(',')]
329 if not check_prefixes:
330 check_prefixes = ['CHECK']
332 # FIXME: We should use multiple check prefixes to common check lines. For
333 # now, we just ignore all but the last.
334 prefix_list.append((check_prefixes, tool_cmd_args))
337 for prefixes, _ in prefix_list:
338 for prefix in prefixes:
339 func_dict.update({prefix: dict()})
340 for prefixes, tool_args in prefix_list:
342 print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
343 print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
345 raw_tool_output = invoke_tool(args, tool_args, test)
346 build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
348 is_in_function = False
349 is_in_function_start = False
350 prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
352 print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
354 output_lines.append(autogenerated_note)
356 for input_line in input_lines:
357 if is_in_function_start:
360 if input_line.lstrip().startswith(';'):
361 m = CHECK_RE.match(input_line)
362 if not m or m.group(1) not in prefix_set:
363 output_lines.append(input_line)
366 # Print out the various check lines here.
367 output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
368 is_in_function_start = False
371 if should_add_line_to_output(input_line, prefix_set) == True:
372 # This input line of the function body will go as-is into the output.
373 # Except make leading whitespace uniform: 2 spaces.
374 input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
375 output_lines.append(input_line)
378 if input_line.strip() == '}':
379 is_in_function = False
382 # Discard any previous script advertising.
383 if input_line.startswith(ADVERT):
386 # If it's outside a function, it just gets copied to the output.
387 output_lines.append(input_line)
389 m = IR_FUNCTION_RE.match(input_line)
393 if args.function is not None and name != args.function:
394 # When filtering on a specific function, skip all others.
396 is_in_function = is_in_function_start = True
399 print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
401 with open(test, 'wb') as f:
402 f.writelines([l + '\n' for l in output_lines])
405 if __name__ == '__main__':