-#!/usr/bin/env python2.7
+#!/usr/bin/env python
-"""A script to generate FileCheck statements for regression tests.
+"""A script to generate FileCheck statements for 'opt' regression tests.
-This script is a utility to update LLVM opt or llc test cases with new
+This script is a utility to update LLVM opt test cases with new
FileCheck patterns. It can either update all of the tests in the file or
a single test function.
Example usage:
-$ update_test_checks.py --tool=../bin/opt test/foo.ll
+$ update_test_checks.py --opt=../bin/opt test/foo.ll
Workflow:
1. Make a compiler patch that requires updating some number of FileCheck lines
designed to be authoratitive about what constitutes a good test!
"""
+from __future__ import print_function
+
import argparse
+import glob
import itertools
import os # Used to advertise this file's name ("autogenerated_note").
import string
import tempfile
import re
+from UpdateTestChecks import common
+
ADVERT = '; NOTE: Assertions have been autogenerated by '
# RegEx: this is where the magic happens.
-SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
-SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M)
-SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
-SCRUB_X86_SHUFFLES_RE = (
- re.compile(
- r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
- flags=re.M))
-SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
-SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
-SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
-SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
-SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
-
-RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
-LLC_FUNCTION_RE = re.compile(
- r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
- r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
- r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
- flags=(re.M | re.S))
-OPT_FUNCTION_RE = re.compile(
- r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
- r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
- flags=(re.M | re.S))
-CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
-CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
-# Match things that look at identifiers, but only if they are followed by
-# spaces, commas, paren, or end of the string
-IR_VALUE_RE = re.compile(r'(\s+)%(\w+?)([,\s\(\)]|\Z)')
-
-
-# Invoke the tool that is being tested.
-def invoke_tool(args, cmd_args, ir):
- with open(ir) as ir_file:
- stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
- shell=True, stdin=ir_file)
- # Fix line endings to unix CR style.
- stdout = stdout.replace('\r\n', '\n')
- return stdout
-
-
-# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
-def scrub_asm(asm):
- # Detect shuffle asm comments and hide the operands in favor of the comments.
- asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
- # Generically match the stack offset of a memory operand.
- asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
- # Generically match a RIP-relative memory operand.
- asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
- # Generically match a LCP symbol.
- asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
- # Strip kill operands inserted into the asm.
- asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
- return asm
-
-
-def scrub_body(body, tool_basename):
- # Scrub runs of whitespace out of the assembly, but leave the leading
- # whitespace in place.
- body = SCRUB_WHITESPACE_RE.sub(r' ', body)
- # Expand the tabs used for indentation.
- body = string.expandtabs(body, 2)
- # Strip trailing whitespace.
- body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
- if tool_basename == "llc":
- body = scrub_asm(body)
- return body
-
-
-# Build up a dictionary of all the function bodies.
-def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
- if tool_basename == "llc":
- func_regex = LLC_FUNCTION_RE
- else:
- func_regex = OPT_FUNCTION_RE
- for m in func_regex.finditer(raw_tool_output):
- if not m:
- continue
- func = m.group('func')
- scrubbed_body = scrub_body(m.group('body'), tool_basename)
- if func.startswith('stress'):
- # We only use the last line of the function body for stress tests.
- scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
- if verbose:
- print >>sys.stderr, 'Processing function: ' + func
- for l in scrubbed_body.splitlines():
- print >>sys.stderr, ' ' + l
- for prefix in prefixes:
- if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
- if prefix == prefixes[-1]:
- print >>sys.stderr, ('WARNING: Found conflicting asm under the '
- 'same prefix: %r!' % (prefix,))
- else:
- func_dict[prefix][func] = None
- continue
-
- func_dict[prefix][func] = scrubbed_body
-
-
-# Create a FileCheck variable name based on an IR name.
-def get_value_name(var):
- if var.isdigit():
- var = 'TMP' + var
- var = var.replace('.', '_')
- return var.upper()
-
-
-# Create a FileCheck variable from regex.
-def get_value_definition(var):
- return '[[' + get_value_name(var) + ':%.*]]'
-
-
-# Use a FileCheck variable.
-def get_value_use(var):
- return '[[' + get_value_name(var) + ']]'
-
-# Replace IR value defs and uses with FileCheck variables.
-def genericize_check_lines(lines):
- # This gets called for each match that occurs in
- # a line. We transform variables we haven't seen
- # into defs, and variables we have seen into uses.
- def transform_line_vars(match):
- var = match.group(2)
- if var in vars_seen:
- rv = get_value_use(var)
- else:
- vars_seen.add(var)
- rv = get_value_definition(var)
- # re.sub replaces the entire regex match
- # with whatever you return, so we have
- # to make sure to hand it back everything
- # including the commas and spaces.
- return match.group(1) + rv + match.group(3)
-
- vars_seen = set()
- lines_with_def = []
-
- for i, line in enumerate(lines):
- # An IR variable named '%.' matches the FileCheck regex string.
- line = line.replace('%.', '%dot')
- # Ignore any comments, since the check lines will too.
- scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
- lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
- return lines
-
-
-def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
- # Select a label format based on the whether we're checking asm or IR.
- if tool_basename == "llc":
- check_label_format = "; %s-LABEL: %s:"
- else:
- check_label_format = "; %s-LABEL: @%s("
-
- printed_prefixes = []
- for checkprefixes, _ in prefix_list:
- for checkprefix in checkprefixes:
- if checkprefix in printed_prefixes:
- break
- if not func_dict[checkprefix][func_name]:
- continue
- # Add some space between different check prefixes, but not after the last
- # check line (before the test code).
- #if len(printed_prefixes) != 0:
- # output_lines.append(';')
- printed_prefixes.append(checkprefix)
- output_lines.append(check_label_format % (checkprefix, func_name))
- func_body = func_dict[checkprefix][func_name].splitlines()
-
- # For IR output, change all defs to FileCheck variables, so we're immune
- # to variable naming fashions.
- if tool_basename == "opt":
- func_body = genericize_check_lines(func_body)
-
- # This could be selectively enabled with an optional invocation argument.
- # Disabled for now: better to check everything. Be safe rather than sorry.
-
- # Handle the first line of the function body as a special case because
- # it's often just noise (a useless asm comment or entry label).
- #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
- # is_blank_line = True
- #else:
- # output_lines.append('; %s: %s' % (checkprefix, func_body[0]))
- # is_blank_line = False
-
- # For llc tests, there may be asm directives between the label and the
- # first checked line (most likely that first checked line is "# BB#0").
- if tool_basename == "opt":
- is_blank_line = False
- else:
- is_blank_line = True;
- for func_line in func_body:
- if func_line.strip() == '':
- is_blank_line = True
- continue
- # Do not waste time checking IR comments.
- if tool_basename == "opt":
- func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
-
- # Skip blank lines instead of checking them.
- if is_blank_line == True:
- output_lines.append('; %s: %s' % (checkprefix, func_line))
- else:
- output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line))
- is_blank_line = False
-
- # Add space between different check prefixes and also before the first
- # line of code in the test function.
- output_lines.append(';')
- break
- return output_lines
-def should_add_line_to_output(input_line, prefix_set):
- # Skip any blank comment lines in the IR.
- if input_line.strip() == ';':
- return False
- # Skip any blank lines in the IR.
- #if input_line.strip() == '':
- # return False
- # And skip any CHECK lines. We're building our own.
- m = CHECK_RE.match(input_line)
- if m and m.group(1) in prefix_set:
- return False
-
- return True
def main():
parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
parser.add_argument('-v', '--verbose', action='store_true',
help='Show verbose output')
- parser.add_argument('--tool-binary', default='llc',
- help='The tool used to generate the test case')
+ parser.add_argument('--opt-binary', default='opt',
+ help='The opt binary used to generate the test case')
parser.add_argument(
'--function', help='The function in the test file to update')
parser.add_argument('tests', nargs='+')
autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
- tool_basename = os.path.basename(args.tool_binary)
- if (tool_basename != "llc" and tool_basename != "opt"):
- print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
+ opt_basename = os.path.basename(args.opt_binary)
+ if not re.match(r'^opt(-\d+)?$', opt_basename):
+ print('ERROR: Unexpected opt name: ' + opt_basename, file=sys.stderr)
sys.exit(1)
+ opt_basename = 'opt'
+ test_paths = []
for test in args.tests:
+ if not glob.glob(test):
+ print('WARNING: Test file \'%s\' was not found. Ignoring it.' % (test,), file=sys.stderr)
+ continue
+ test_paths.append(test)
+
+ for test in test_paths:
if args.verbose:
- print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
+ print('Scanning for RUN lines in test file: %s' % (test,), file=sys.stderr)
with open(test) as f:
input_lines = [l.rstrip() for l in f]
raw_lines = [m.group(1)
- for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
+ for m in [common.RUN_LINE_RE.match(l) for l in input_lines] if m]
run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
for l in raw_lines[1:]:
if run_lines[-1].endswith("\\"):
run_lines.append(l)
if args.verbose:
- print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
+ print('Found %d RUN lines:' % (len(run_lines),), file=sys.stderr)
for l in run_lines:
- print >>sys.stderr, ' RUN: ' + l
+ print(' RUN: ' + l, file=sys.stderr)
prefix_list = []
for l in run_lines:
(tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
- if not tool_cmd.startswith(tool_basename + ' '):
- print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
+ if not tool_cmd.startswith(opt_basename + ' '):
+ print('WARNING: Skipping non-%s RUN line: %s' % (opt_basename, l), file=sys.stderr)
continue
if not filecheck_cmd.startswith('FileCheck '):
- print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
+ print('WARNING: Skipping non-FileChecked RUN line: ' + l, file=sys.stderr)
continue
- tool_cmd_args = tool_cmd[len(tool_basename):].strip()
+ tool_cmd_args = tool_cmd[len(opt_basename):].strip()
tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
- check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
+ check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
for item in m.group(1).split(',')]
if not check_prefixes:
check_prefixes = ['CHECK']
for prefixes, _ in prefix_list:
for prefix in prefixes:
func_dict.update({prefix: dict()})
- for prefixes, tool_args in prefix_list:
+ for prefixes, opt_args in prefix_list:
if args.verbose:
- print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
- print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
+ print('Extracted opt cmd: ' + opt_basename + ' ' + opt_args, file=sys.stderr)
+ print('Extracted FileCheck prefixes: ' + str(prefixes), file=sys.stderr)
- raw_tool_output = invoke_tool(args, tool_args, test)
- build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
+ raw_tool_output = common.invoke_tool(args.opt_binary, opt_args, test)
+ common.build_function_body_dictionary(
+ common.OPT_FUNCTION_RE, common.scrub_body, [],
+ raw_tool_output, prefixes, func_dict, args.verbose)
is_in_function = False
is_in_function_start = False
prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
if args.verbose:
- print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
+ print('Rewriting FileCheck prefixes: %s' % (prefix_set,), file=sys.stderr)
output_lines = []
output_lines.append(autogenerated_note)
if input_line == '':
continue
if input_line.lstrip().startswith(';'):
- m = CHECK_RE.match(input_line)
+ m = common.CHECK_RE.match(input_line)
if not m or m.group(1) not in prefix_set:
output_lines.append(input_line)
continue
# Print out the various check lines here.
- output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
+ common.add_ir_checks(output_lines, ';', prefix_list, func_dict, func_name)
is_in_function_start = False
if is_in_function:
- if should_add_line_to_output(input_line, prefix_set) == True:
+ if common.should_add_line_to_output(input_line, prefix_set):
# This input line of the function body will go as-is into the output.
# Except make leading whitespace uniform: 2 spaces.
- input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
+ input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
output_lines.append(input_line)
else:
continue
m = IR_FUNCTION_RE.match(input_line)
if not m:
continue
- name = m.group(1)
- if args.function is not None and name != args.function:
+ func_name = m.group(1)
+ if args.function is not None and func_name != args.function:
# When filtering on a specific function, skip all others.
continue
is_in_function = is_in_function_start = True
if args.verbose:
- print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
+ print('Writing %d lines to %s...' % (len(output_lines), test), file=sys.stderr)
with open(test, 'wb') as f:
- f.writelines([l + '\n' for l in output_lines])
+ f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines])
if __name__ == '__main__':
main()
-