From cf77b92dddad227a70dbe3214cefa375fad10122 Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Thu, 30 Aug 2018 20:53:48 +0000 Subject: [PATCH] Add a utility script to stress test the demangler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@341120 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/demangle_tree.py | 226 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 utils/demangle_tree.py diff --git a/utils/demangle_tree.py b/utils/demangle_tree.py new file mode 100644 index 00000000000..1185a233a41 --- /dev/null +++ b/utils/demangle_tree.py @@ -0,0 +1,226 @@ +# Given a path to llvm-objdump and a directory tree, spider the directory tree +# dumping every object file encountered with correct options needed to demangle +# symbols in the object file, and collect statistics about failed / crashed +# demanglings. Useful for stress testing the demangler against a large corpus +# of inputs. + +import argparse +import functools +import os +import re +import sys +import subprocess +import traceback +from multiprocessing import Pool +import multiprocessing + +args = None + +def parse_line(line): + question = line.find('?') + if question == -1: + return None, None + + open_paren = line.find('(', question) + if open_paren == -1: + return None, None + close_paren = line.rfind(')', open_paren) + if open_paren == -1: + return None, None + mangled = line[question : open_paren] + demangled = line[open_paren+1 : close_paren] + return mangled.strip(), demangled.strip() + +class Result(object): + def __init__(self): + self.crashed = [] + self.file = None + self.nsymbols = 0 + self.errors = set() + self.nfiles = 0 + +class MapContext(object): + def __init__(self): + self.rincomplete = None + self.rcumulative = Result() + self.pending_objs = [] + self.npending = 0 + +def process_file(path, objdump): + r = Result() + r.file = path + + popen_args = [objdump, '-t', '-demangle', path] + p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate() + if p.returncode != 0: + r.crashed = [r.file] + return r + + output = stdout.decode('utf-8') + + for line in output.splitlines(): + mangled, demangled = parse_line(line) + if mangled is None: + continue + r.nsymbols += 1 + if "invalid mangled name" in demangled: + r.errors.add(mangled) + return r + +def add_results(r1, r2): + r1.crashed.extend(r2.crashed) + r1.errors.update(r2.errors) + r1.nsymbols += r2.nsymbols + r1.nfiles += r2.nfiles + +def print_result_row(directory, result): + print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format( + result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory)) + +def process_one_chunk(pool, chunk_size, objdump, context): + objs = [] + + incomplete = False + dir_results = {} + ordered_dirs = [] + while context.npending > 0 and len(objs) < chunk_size: + this_dir = context.pending_objs[0][0] + ordered_dirs.append(this_dir) + re = Result() + if context.rincomplete is not None: + re = context.rincomplete + context.rincomplete = None + + dir_results[this_dir] = re + re.file = this_dir + + nneeded = chunk_size - len(objs) + objs_this_dir = context.pending_objs[0][1] + navail = len(objs_this_dir) + ntaken = min(nneeded, navail) + objs.extend(objs_this_dir[0:ntaken]) + remaining_objs_this_dir = objs_this_dir[ntaken:] + context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir) + context.npending -= ntaken + if ntaken == navail: + context.pending_objs.pop(0) + else: + incomplete = True + + re.nfiles += ntaken + + assert(len(objs) == chunk_size or context.npending == 0) + + copier = functools.partial(process_file, objdump=objdump) + mapped_results = list(pool.map(copier, objs)) + + for mr in mapped_results: + result_dir = os.path.dirname(mr.file) + result_entry = dir_results[result_dir] + add_results(result_entry, mr) + + # It's only possible that a single item is incomplete, and it has to be the + # last item. + if incomplete: + context.rincomplete = dir_results[ordered_dirs[-1]] + ordered_dirs.pop() + + # Now ordered_dirs contains a list of all directories which *did* complete. + for c in ordered_dirs: + re = dir_results[c] + add_results(context.rcumulative, re) + print_result_row(c, re) + +def process_pending_files(pool, chunk_size, objdump, context): + while context.npending >= chunk_size: + process_one_chunk(pool, chunk_size, objdump, context) + +def go(): + global args + + obj_dir = args.dir + extensions = args.extensions.split(',') + extensions = [x if x[0] == '.' else '.' + x for x in extensions] + + + pool_size = 48 + pool = Pool(processes=pool_size) + + try: + nfiles = 0 + context = MapContext() + + for root, dirs, files in os.walk(obj_dir): + root = os.path.normpath(root) + pending = [] + for f in files: + file, ext = os.path.splitext(f) + if not ext in extensions: + continue + + nfiles += 1 + full_path = os.path.join(root, f) + full_path = os.path.normpath(full_path) + pending.append(full_path) + + # If this directory had no object files, just print a default + # status line and continue with the next dir + if len(pending) == 0: + print_result_row(root, Result()) + continue + + context.npending += len(pending) + context.pending_objs.append((root, pending)) + # Drain the tasks, `pool_size` at a time, until we have less than + # `pool_size` tasks remaining. + process_pending_files(pool, pool_size, args.objdump, context) + + assert(context.npending < pool_size); + process_one_chunk(pool, pool_size, args.objdump, context) + + total = context.rcumulative + nfailed = len(total.errors) + nsuccess = total.nsymbols - nfailed + ncrashed = len(total.crashed) + + if (nfailed > 0): + print("Failures:") + for m in sorted(total.errors): + print(" " + m) + if (ncrashed > 0): + print("Crashes:") + for f in sorted(total.crashed): + print(" " + f) + print("Summary:") + spct = float(nsuccess)/float(total.nsymbols) + fpct = float(nfailed)/float(total.nsymbols) + cpct = float(ncrashed)/float(nfiles) + print("Processed {0} object files.".format(nfiles)) + print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct)) + print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct)) + print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct)) + + except: + traceback.print_exc() + + pool.close() + pool.join() + +if __name__ == "__main__": + def_obj = 'obj' if sys.platform == 'win32' else 'o' + + parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.') + parser.add_argument('dir', type=str, help='the root directory at which to start crawling') + parser.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' + + 'the tool is located as if by `which llvm-objdump`.') + parser.add_argument('--extensions', type=str, default=def_obj, + help='comma separated list of extensions to demangle (e.g. `o,obj`). ' + + 'By default this will be `obj` on Windows and `o` otherwise.') + + args = parser.parse_args() + + + multiprocessing.freeze_support() + go() + -- 2.11.0