utils/update_test_checks.py

   1 #!/usr/bin/env python2.7
   2
   3 """A script to generate FileCheck statements for regression tests.
   4
   5 This script is a utility to update LLVM opt or llc test cases with new
   6 FileCheck patterns. It can either update all of the tests in the file or
   7 a single test function.
   8
   9 Example usage:
  10 $ update_test_checks.py --tool=../bin/opt test/foo.ll
  11
  12 Workflow:
  13 1. Make a compiler patch that requires updating some number of FileCheck lines
  14    in regression test files.
  15 2. Save the patch and revert it from your local work area.
  16 3. Update the RUN-lines in the affected regression tests to look canonical.
  17    Example: "; RUN: opt < %s -instcombine -S | FileCheck %s"
  18 4. Refresh the FileCheck lines for either the entire file or select functions by
  19    running this script.
  20 5. Commit the fresh baseline of checks.
  21 6. Apply your patch from step 1 and rebuild your local binaries.
  22 7. Re-run this script on affected regression tests.
  23 8. Check the diffs to ensure the script has done something reasonable.
  24 9. Submit a patch including the regression test diffs for review.
  25
  26 A common pattern is to have the script insert complete checking of every
  27 instruction. Then, edit it down to only check the relevant instructions.
  28 The script is designed to make adding checks to a test case fast, it is *not*
  29 designed to be authoratitive about what constitutes a good test!
  30 """
  31
  32 import argparse
  33 import itertools
  34 import os         # Used to advertise this file's name ("autogenerated_note").
  35 import string
  36 import subprocess
  37 import sys
  38 import tempfile
  39 import re
  40
  41 ADVERT = '; NOTE: Assertions have been autogenerated by '
  42
  43 # RegEx: this is where the magic happens.
  44
  45 SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
  46 SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
  47 SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
  48 SCRUB_X86_SHUFFLES_RE = (
  49     re.compile(
  50         r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
  51         flags=re.M))
  52 SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
  53 SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
  54 SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
  55 SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
  56 SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
  57
  58 RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
  59 IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
  60 LLC_FUNCTION_RE = re.compile(
  61     r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
  62     r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
  63     r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
  64     flags=(re.M | re.S))
  65 OPT_FUNCTION_RE = re.compile(
  66     r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
  67     r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
  68     flags=(re.M | re.S))
  69 CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
  70 CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
  71 # Match things that look at identifiers, but only if they are followed by
  72 # spaces, commas, paren, or end of the string
  73 IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s\(\)]|\Z)')
  74
  75
  76 # Invoke the tool that is being tested.
  77 def invoke_tool(args, cmd_args, ir):
  78   with open(ir) as ir_file:
  79     stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
  80                                      shell=True, stdin=ir_file)
  81   # Fix line endings to unix CR style.
  82   stdout = stdout.replace('\r\n', '\n')
  83   return stdout
  84
  85
  86 # FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
  87 def scrub_asm(asm):
  88   # Detect shuffle asm comments and hide the operands in favor of the comments.
  89   asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
  90   # Generically match the stack offset of a memory operand.
  91   asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
  92   # Generically match a RIP-relative memory operand.
  93   asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
  94   # Generically match a LCP symbol.
  95   asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
  96   # Strip kill operands inserted into the asm.
  97   asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
  98   return asm
  99
 100
 101 def scrub_body(body, tool_basename):
 102   # Scrub runs of whitespace out of the assembly, but leave the leading
 103   # whitespace in place.
 104   body = SCRUB_WHITESPACE_RE.sub(r' ', body)
 105   # Expand the tabs used for indentation.
 106   body = string.expandtabs(body, 2)
 107   # Strip trailing whitespace.
 108   body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
 109   if tool_basename == "llc":
 110     body = scrub_asm(body)
 111   return body
 112
 113
 114 # Build up a dictionary of all the function bodies.
 115 def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
 116   if tool_basename == "llc":
 117     func_regex = LLC_FUNCTION_RE
 118   else:
 119     func_regex = OPT_FUNCTION_RE
 120   for m in func_regex.finditer(raw_tool_output):
 121     if not m:
 122       continue
 123     func = m.group('func')
 124     scrubbed_body = scrub_body(m.group('body'), tool_basename)
 125     if func.startswith('stress'):
 126       # We only use the last line of the function body for stress tests.
 127       scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
 128     if verbose:
 129       print >>sys.stderr, 'Processing function: ' + func
 130       for l in scrubbed_body.splitlines():
 131         print >>sys.stderr, '  ' + l
 132     for prefix in prefixes:
 133       if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
 134         if prefix == prefixes[-1]:
 135           print >>sys.stderr, ('WARNING: Found conflicting asm under the '
 136                                'same prefix: %r!' % (prefix,))
 137         else:
 138           func_dict[prefix][func] = None
 139           continue
 140
 141       func_dict[prefix][func] = scrubbed_body
 142
 143
 144 # Create a FileCheck variable name based on an IR name.
 145 def get_value_name(var):
 146   if var.isdigit():
 147     var = 'TMP' + var
 148   var = var.replace('.', '_')
 149   return var.upper()
 150
 151
 152 # Create a FileCheck variable from regex.
 153 def get_value_definition(var):
 154   return '[[' + get_value_name(var) + ':%.*]]'
 155
 156
 157 # Use a FileCheck variable.
 158 def get_value_use(var):
 159   return '[[' + get_value_name(var) + ']]'
 160
 161 # Replace IR value defs and uses with FileCheck variables.
 162 def genericize_check_lines(lines):
 163   # This gets called for each match that occurs in
 164   # a line. We transform variables we haven't seen
 165   # into defs, and variables we have seen into uses.
 166   def transform_line_vars(match):
 167     var = match.group(2)
 168     if var in vars_seen:
 169       rv = get_value_use(var)
 170     else:
 171       vars_seen.add(var)
 172       rv = get_value_definition(var)
 173     # re.sub replaces the entire regex match
 174     # with whatever you return, so we have
 175     # to make sure to hand it back everything
 176     # including the commas and spaces.
 177     return match.group(1) + rv + match.group(3)
 178
 179   vars_seen = set()
 180   lines_with_def = []
 181
 182   for i, line in enumerate(lines):
 183     # An IR variable named '%.' matches the FileCheck regex string.
 184     line = line.replace('%.', '%dot')
 185     # Ignore any comments, since the check lines will too.
 186     scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
 187     lines[i] =  IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
 188   return lines
 189
 190
 191 def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
 192   # Select a label format based on the whether we're checking asm or IR.
 193   if tool_basename == "llc":
 194     check_label_format = "; %s-LABEL: %s:"
 195   else:
 196     check_label_format = "; %s-LABEL: @%s("
 197
 198   printed_prefixes = []
 199   for checkprefixes, _ in prefix_list:
 200     for checkprefix in checkprefixes:
 201       if checkprefix in printed_prefixes:
 202         break
 203       if not func_dict[checkprefix][func_name]:
 204         continue
 205       # Add some space between different check prefixes, but not after the last
 206       # check line (before the test code).
 207       #if len(printed_prefixes) != 0:
 208       #  output_lines.append(';')
 209       printed_prefixes.append(checkprefix)
 210       output_lines.append(check_label_format % (checkprefix, func_name))
 211       func_body = func_dict[checkprefix][func_name].splitlines()
 212
 213       # For IR output, change all defs to FileCheck variables, so we're immune
 214       # to variable naming fashions.
 215       if tool_basename == "opt":
 216         func_body = genericize_check_lines(func_body)
 217
 218       # This could be selectively enabled with an optional invocation argument.
 219       # Disabled for now: better to check everything. Be safe rather than sorry.
 220
 221       # Handle the first line of the function body as a special case because
 222       # it's often just noise (a useless asm comment or entry label).
 223       #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
 224       #  is_blank_line = True
 225       #else:
 226       #  output_lines.append('; %s:       %s' % (checkprefix, func_body[0]))
 227       #  is_blank_line = False
 228
 229       # For llc tests, there may be asm directives between the label and the
 230       # first checked line (most likely that first checked line is "# BB#0").
 231       if tool_basename == "opt":
 232         is_blank_line = False
 233       else:
 234         is_blank_line = True;
 235
 236       for func_line in func_body:
 237         if func_line.strip() == '':
 238           is_blank_line = True
 239           continue
 240         # Do not waste time checking IR comments.
 241         if tool_basename == "opt":
 242           func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
 243
 244         # Skip blank lines instead of checking them.
 245         if is_blank_line == True:
 246           output_lines.append('; %s:       %s' % (checkprefix, func_line))
 247         else:
 248           output_lines.append('; %s-NEXT:  %s' % (checkprefix, func_line))
 249         is_blank_line = False
 250
 251       # Add space between different check prefixes and also before the first
 252       # line of code in the test function.
 253       output_lines.append(';')
 254       break
 255   return output_lines
 256
 257
 258 def should_add_line_to_output(input_line, prefix_set):
 259   # Skip any blank comment lines in the IR.
 260   if input_line.strip() == ';':
 261     return False
 262   # Skip any blank lines in the IR.
 263   #if input_line.strip() == '':
 264   #  return False
 265   # And skip any CHECK lines. We're building our own.
 266   m = CHECK_RE.match(input_line)
 267   if m and m.group(1) in prefix_set:
 268     return False
 269
 270   return True
 271
 272
 273 def main():
 274   from argparse import RawTextHelpFormatter
 275   parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
 276   parser.add_argument('-v', '--verbose', action='store_true',
 277                       help='Show verbose output')
 278   parser.add_argument('--tool-binary', default='llc',
 279                       help='The tool used to generate the test case')
 280   parser.add_argument(
 281       '--function', help='The function in the test file to update')
 282   parser.add_argument('tests', nargs='+')
 283   args = parser.parse_args()
 284
 285   autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
 286
 287   tool_basename = os.path.basename(args.tool_binary)
 288   if (tool_basename != "llc" and tool_basename != "opt"):
 289     print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
 290     sys.exit(1)
 291
 292   for test in args.tests:
 293     if args.verbose:
 294       print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
 295     with open(test) as f:
 296       input_lines = [l.rstrip() for l in f]
 297
 298     raw_lines = [m.group(1)
 299                  for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
 300     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
 301     for l in raw_lines[1:]:
 302       if run_lines[-1].endswith("\\"):
 303         run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
 304       else:
 305         run_lines.append(l)
 306
 307     if args.verbose:
 308       print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
 309       for l in run_lines:
 310         print >>sys.stderr, '  RUN: ' + l
 311
 312     prefix_list = []
 313     for l in run_lines:
 314       (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
 315
 316       if not tool_cmd.startswith(tool_basename + ' '):
 317         print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
 318         continue
 319
 320       if not filecheck_cmd.startswith('FileCheck '):
 321         print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
 322         continue
 323
 324       tool_cmd_args = tool_cmd[len(tool_basename):].strip()
 325       tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
 326
 327       check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
 328                                for item in m.group(1).split(',')]
 329       if not check_prefixes:
 330         check_prefixes = ['CHECK']
 331
 332       # FIXME: We should use multiple check prefixes to common check lines. For
 333       # now, we just ignore all but the last.
 334       prefix_list.append((check_prefixes, tool_cmd_args))
 335
 336     func_dict = {}
 337     for prefixes, _ in prefix_list:
 338       for prefix in prefixes:
 339         func_dict.update({prefix: dict()})
 340     for prefixes, tool_args in prefix_list:
 341       if args.verbose:
 342         print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
 343         print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
 344
 345       raw_tool_output = invoke_tool(args, tool_args, test)
 346       build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
 347
 348     is_in_function = False
 349     is_in_function_start = False
 350     prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
 351     if args.verbose:
 352       print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
 353     output_lines = []
 354     output_lines.append(autogenerated_note)
 355
 356     for input_line in input_lines:
 357       if is_in_function_start:
 358         if input_line == '':
 359           continue
 360         if input_line.lstrip().startswith(';'):
 361           m = CHECK_RE.match(input_line)
 362           if not m or m.group(1) not in prefix_set:
 363             output_lines.append(input_line)
 364             continue
 365
 366         # Print out the various check lines here.
 367         output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
 368         is_in_function_start = False
 369
 370       if is_in_function:
 371         if should_add_line_to_output(input_line, prefix_set) == True:
 372           # This input line of the function body will go as-is into the output.
 373           # Except make leading whitespace uniform: 2 spaces.
 374           input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
 375           output_lines.append(input_line)
 376         else:
 377           continue
 378         if input_line.strip() == '}':
 379           is_in_function = False
 380         continue
 381
 382       # Discard any previous script advertising.
 383       if input_line.startswith(ADVERT):
 384         continue
 385
 386       # If it's outside a function, it just gets copied to the output.
 387       output_lines.append(input_line)
 388
 389       m = IR_FUNCTION_RE.match(input_line)
 390       if not m:
 391         continue
 392       name = m.group(1)
 393       if args.function is not None and name != args.function:
 394         # When filtering on a specific function, skip all others.
 395         continue
 396       is_in_function = is_in_function_start = True
 397
 398     if args.verbose:
 399       print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
 400
 401     with open(test, 'wb') as f:
 402       f.writelines([l + '\n' for l in output_lines])
 403
 404
 405 if __name__ == '__main__':
 406   main()
 407