gn build: Merge r366361.

[android-x86/external-llvm.git] / utils / update_test_checks.py
diff --git a/utils/update_test_checks.py b/utils/update_test_checks.py

index 7d31959..92ce053 100755 (executable)
--- a/utils/update_test_checks.py
+++ b/utils/update_test_checks.py
@@ -1,13 +1,13 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python
  
-"""A script to generate FileCheck statements for regression tests.
+"""A script to generate FileCheck statements for 'opt' regression tests.
  
-This script is a utility to update LLVM opt or llc test cases with new
+This script is a utility to update LLVM opt test cases with new
  FileCheck patterns. It can either update all of the tests in the file or
  a single test function.
  
  Example usage:
-$ update_test_checks.py --tool=../bin/opt test/foo.ll
+$ update_test_checks.py --opt=../bin/opt test/foo.ll
  
  Workflow:
  1. Make a compiler patch that requires updating some number of FileCheck lines
@@ -29,7 +29,10 @@ The script is designed to make adding checks to a test case fast, it is *not*
  designed to be authoratitive about what constitutes a good test!
  """
  
+from __future__ import print_function
+
  import argparse
+import glob
  import itertools
  import os         # Used to advertise this file's name ("autogenerated_note").
  import string
@@ -38,236 +41,16 @@ import sys
  import tempfile
  import re
  
+from UpdateTestChecks import common
+
  ADVERT = '; NOTE: Assertions have been autogenerated by '
  
  # RegEx: this is where the magic happens.
  
-SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
-SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
-SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
-SCRUB_X86_SHUFFLES_RE = (
-    re.compile(
-        r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
-        flags=re.M))
-SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
-SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
-SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
-SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
-SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
-
-RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
  IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
-LLC_FUNCTION_RE = re.compile(
-    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
-    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
-    r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
-    flags=(re.M | re.S))
-OPT_FUNCTION_RE = re.compile(
-    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
-    r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
-    flags=(re.M | re.S))
-CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
-CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
-# Match things that look at identifiers, but only if they are followed by
-# spaces, commas, paren, or end of the string
-IR_VALUE_RE = re.compile(r'(\s+)%(\w+?)([,\s\(\)]|\Z)')
-
-
-# Invoke the tool that is being tested.
-def invoke_tool(args, cmd_args, ir):
-  with open(ir) as ir_file:
-    stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
-                                     shell=True, stdin=ir_file)
-  # Fix line endings to unix CR style.
-  stdout = stdout.replace('\r\n', '\n')
-  return stdout
-
-
-# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
-def scrub_asm(asm):
-  # Detect shuffle asm comments and hide the operands in favor of the comments.
-  asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
-  # Generically match the stack offset of a memory operand.
-  asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
-  # Generically match a RIP-relative memory operand.
-  asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
-  # Generically match a LCP symbol.
-  asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
-  # Strip kill operands inserted into the asm.
-  asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
-  return asm
-
-
-def scrub_body(body, tool_basename):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
-  # Expand the tabs used for indentation.
-  body = string.expandtabs(body, 2)
-  # Strip trailing whitespace.
-  body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
-  if tool_basename == "llc":
-    body = scrub_asm(body)
-  return body
-
-
-# Build up a dictionary of all the function bodies.
-def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
-  if tool_basename == "llc":
-    func_regex = LLC_FUNCTION_RE
-  else:
-    func_regex = OPT_FUNCTION_RE
-  for m in func_regex.finditer(raw_tool_output):
-    if not m:
-      continue
-    func = m.group('func')
-    scrubbed_body = scrub_body(m.group('body'), tool_basename)
-    if func.startswith('stress'):
-      # We only use the last line of the function body for stress tests.
-      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
-    if verbose:
-      print >>sys.stderr, 'Processing function: ' + func
-      for l in scrubbed_body.splitlines():
-        print >>sys.stderr, '  ' + l
-    for prefix in prefixes:
-      if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
-        if prefix == prefixes[-1]:
-          print >>sys.stderr, ('WARNING: Found conflicting asm under the '
-                               'same prefix: %r!' % (prefix,))
-        else:
-          func_dict[prefix][func] = None
-          continue
-
-      func_dict[prefix][func] = scrubbed_body
-
-
-# Create a FileCheck variable name based on an IR name.
-def get_value_name(var):
-  if var.isdigit():
-    var = 'TMP' + var
-  var = var.replace('.', '_')
-  return var.upper()
-
-
-# Create a FileCheck variable from regex.
-def get_value_definition(var):
-  return '[[' + get_value_name(var) + ':%.*]]'
-
-
-# Use a FileCheck variable.
-def get_value_use(var):
-  return '[[' + get_value_name(var) + ']]'
-
-# Replace IR value defs and uses with FileCheck variables.
-def genericize_check_lines(lines):
-  # This gets called for each match that occurs in
-  # a line. We transform variables we haven't seen
-  # into defs, and variables we have seen into uses.
-  def transform_line_vars(match):
-    var = match.group(2)
-    if var in vars_seen:
-      rv = get_value_use(var)
-    else:
-      vars_seen.add(var)
-      rv = get_value_definition(var)
-    # re.sub replaces the entire regex match
-    # with whatever you return, so we have
-    # to make sure to hand it back everything
-    # including the commas and spaces.
-    return match.group(1) + rv + match.group(3)
-
-  vars_seen = set()
-  lines_with_def = []
-
-  for i, line in enumerate(lines):
-    # An IR variable named '%.' matches the FileCheck regex string.
-    line = line.replace('%.', '%dot')
-    # Ignore any comments, since the check lines will too.
-    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
-    lines[i] =  IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
-  return lines
-
-
-def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
-  # Select a label format based on the whether we're checking asm or IR.
-  if tool_basename == "llc":
-    check_label_format = "; %s-LABEL: %s:"
-  else:
-    check_label_format = "; %s-LABEL: @%s("
-
-  printed_prefixes = []
-  for checkprefixes, _ in prefix_list:
-    for checkprefix in checkprefixes:
-      if checkprefix in printed_prefixes:
-        break
-      if not func_dict[checkprefix][func_name]:
-        continue
-      # Add some space between different check prefixes, but not after the last
-      # check line (before the test code).
-      #if len(printed_prefixes) != 0:
-      #  output_lines.append(';')
-      printed_prefixes.append(checkprefix)
-      output_lines.append(check_label_format % (checkprefix, func_name))
-      func_body = func_dict[checkprefix][func_name].splitlines()
-
-      # For IR output, change all defs to FileCheck variables, so we're immune
-      # to variable naming fashions.
-      if tool_basename == "opt":
-        func_body = genericize_check_lines(func_body)
-
-      # This could be selectively enabled with an optional invocation argument.
-      # Disabled for now: better to check everything. Be safe rather than sorry.
-
-      # Handle the first line of the function body as a special case because
-      # it's often just noise (a useless asm comment or entry label).
-      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
-      #  is_blank_line = True
-      #else:
-      #  output_lines.append('; %s:       %s' % (checkprefix, func_body[0]))
-      #  is_blank_line = False
-
-      # For llc tests, there may be asm directives between the label and the
-      # first checked line (most likely that first checked line is "# BB#0").
-      if tool_basename == "opt":
-        is_blank_line = False
-      else:
-        is_blank_line = True;
  
-      for func_line in func_body:
-        if func_line.strip() == '':
-          is_blank_line = True
-          continue
-        # Do not waste time checking IR comments.
-        if tool_basename == "opt":
-          func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
-
-        # Skip blank lines instead of checking them.
-        if is_blank_line == True:
-          output_lines.append('; %s:       %s' % (checkprefix, func_line))
-        else:
-          output_lines.append('; %s-NEXT:  %s' % (checkprefix, func_line))
-        is_blank_line = False
-
-      # Add space between different check prefixes and also before the first
-      # line of code in the test function.
-      output_lines.append(';')
-      break
-  return output_lines
  
  
-def should_add_line_to_output(input_line, prefix_set):
-  # Skip any blank comment lines in the IR.
-  if input_line.strip() == ';':
-    return False
-  # Skip any blank lines in the IR.
-  #if input_line.strip() == '':
-  #  return False
-  # And skip any CHECK lines. We're building our own.
-  m = CHECK_RE.match(input_line)
-  if m and m.group(1) in prefix_set:
-    return False
-
-  return True
  
  
  def main():
@@ -275,8 +58,8 @@ def main():
    parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='Show verbose output')
-  parser.add_argument('--tool-binary', default='llc',
-                      help='The tool used to generate the test case')
+  parser.add_argument('--opt-binary', default='opt',
+                      help='The opt binary used to generate the test case')
    parser.add_argument(
        '--function', help='The function in the test file to update')
    parser.add_argument('tests', nargs='+')
@@ -284,19 +67,27 @@ def main():
  
    autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
  
-  tool_basename = os.path.basename(args.tool_binary)
-  if (tool_basename != "llc" and tool_basename != "opt"):
-    print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
+  opt_basename = os.path.basename(args.opt_binary)
+  if not re.match(r'^opt(-\d+)?$', opt_basename):
+    print('ERROR: Unexpected opt name: ' + opt_basename, file=sys.stderr)
      sys.exit(1)
+  opt_basename = 'opt'
  
+  test_paths = []
    for test in args.tests:
+    if not glob.glob(test):
+      print('WARNING: Test file \'%s\' was not found. Ignoring it.' % (test,), file=sys.stderr)
+      continue
+    test_paths.append(test)
+
+  for test in test_paths:
      if args.verbose:
-      print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
+      print('Scanning for RUN lines in test file: %s' % (test,), file=sys.stderr)
      with open(test) as f:
        input_lines = [l.rstrip() for l in f]
  
      raw_lines = [m.group(1)
-                 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
+                 for m in [common.RUN_LINE_RE.match(l) for l in input_lines] if m]
      run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
      for l in raw_lines[1:]:
        if run_lines[-1].endswith("\\"):
@@ -305,26 +96,26 @@ def main():
          run_lines.append(l)
  
      if args.verbose:
-      print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
+      print('Found %d RUN lines:' % (len(run_lines),), file=sys.stderr)
        for l in run_lines:
-        print >>sys.stderr, '  RUN: ' + l
+        print('  RUN: ' + l, file=sys.stderr)
  
      prefix_list = []
      for l in run_lines:
        (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
  
-      if not tool_cmd.startswith(tool_basename + ' '):
-        print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
+      if not tool_cmd.startswith(opt_basename + ' '):
+        print('WARNING: Skipping non-%s RUN line: %s' % (opt_basename, l), file=sys.stderr)
          continue
  
        if not filecheck_cmd.startswith('FileCheck '):
-        print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
+        print('WARNING: Skipping non-FileChecked RUN line: ' + l, file=sys.stderr)
          continue
  
-      tool_cmd_args = tool_cmd[len(tool_basename):].strip()
+      tool_cmd_args = tool_cmd[len(opt_basename):].strip()
        tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
  
-      check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
+      check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
                                 for item in m.group(1).split(',')]
        if not check_prefixes:
          check_prefixes = ['CHECK']
@@ -337,19 +128,21 @@ def main():
      for prefixes, _ in prefix_list:
        for prefix in prefixes:
          func_dict.update({prefix: dict()})
-    for prefixes, tool_args in prefix_list:
+    for prefixes, opt_args in prefix_list:
        if args.verbose:
-        print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
-        print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
+        print('Extracted opt cmd: ' + opt_basename + ' ' + opt_args, file=sys.stderr)
+        print('Extracted FileCheck prefixes: ' + str(prefixes), file=sys.stderr)
  
-      raw_tool_output = invoke_tool(args, tool_args, test)
-      build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
+      raw_tool_output = common.invoke_tool(args.opt_binary, opt_args, test)
+      common.build_function_body_dictionary(
+              common.OPT_FUNCTION_RE, common.scrub_body, [],
+              raw_tool_output, prefixes, func_dict, args.verbose)
  
      is_in_function = False
      is_in_function_start = False
      prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
      if args.verbose:
-      print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
+      print('Rewriting FileCheck prefixes: %s' % (prefix_set,), file=sys.stderr)
      output_lines = []
      output_lines.append(autogenerated_note)
  
@@ -358,20 +151,20 @@ def main():
          if input_line == '':
            continue
          if input_line.lstrip().startswith(';'):
-          m = CHECK_RE.match(input_line)
+          m = common.CHECK_RE.match(input_line)
            if not m or m.group(1) not in prefix_set:
              output_lines.append(input_line)
              continue
  
          # Print out the various check lines here.
-        output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
+        common.add_ir_checks(output_lines, ';', prefix_list, func_dict, func_name)
          is_in_function_start = False
  
        if is_in_function:
-        if should_add_line_to_output(input_line, prefix_set) == True:
+        if common.should_add_line_to_output(input_line, prefix_set):
            # This input line of the function body will go as-is into the output.
            # Except make leading whitespace uniform: 2 spaces.
-          input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
+          input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
            output_lines.append(input_line)
          else:
            continue
@@ -389,19 +182,18 @@ def main():
        m = IR_FUNCTION_RE.match(input_line)
        if not m:
          continue
-      name = m.group(1)
-      if args.function is not None and name != args.function:
+      func_name = m.group(1)
+      if args.function is not None and func_name != args.function:
          # When filtering on a specific function, skip all others.
          continue
        is_in_function = is_in_function_start = True
  
      if args.verbose:
-      print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
+      print('Writing %d lines to %s...' % (len(output_lines), test), file=sys.stderr)
  
      with open(test, 'wb') as f:
-      f.writelines([l + '\n' for l in output_lines])
+      f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines])
  
  
  if __name__ == '__main__':
    main()
-