WebKitTools/Scripts/webkitpy/layout_tests/run_webkit_tests.py

   1 #!/usr/bin/env python
   2 # Copyright (C) 2010 Google Inc. All rights reserved.
   3 #
   4 # Redistribution and use in source and binary forms, with or without
   5 # modification, are permitted provided that the following conditions are
   6 # met:
   7 #
   8 #     * Redistributions of source code must retain the above copyright
   9 # notice, this list of conditions and the following disclaimer.
  10 #     * Redistributions in binary form must reproduce the above
  11 # copyright notice, this list of conditions and the following disclaimer
  12 # in the documentation and/or other materials provided with the
  13 # distribution.
  14 #     * Neither the name of Google Inc. nor the names of its
  15 # contributors may be used to endorse or promote products derived from
  16 # this software without specific prior written permission.
  17 #
  18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30 """Run layout tests.
  31
  32 This is a port of the existing webkit test script run-webkit-tests.
  33
  34 The TestRunner class runs a series of tests (TestType interface) against a set
  35 of test files.  If a test file fails a TestType, it returns a list TestFailure
  36 objects to the TestRunner.  The TestRunner then aggregates the TestFailures to
  37 create a final report.
  38
  39 This script reads several files, if they exist in the test_lists subdirectory
  40 next to this script itself.  Each should contain a list of paths to individual
  41 tests or entire subdirectories of tests, relative to the outermost test
  42 directory.  Entire lines starting with '//' (comments) will be ignored.
  43
  44 For details of the files' contents and purposes, see test_lists/README.
  45 """
  46
  47 from __future__ import with_statement
  48
  49 import codecs
  50 import errno
  51 import glob
  52 import logging
  53 import math
  54 import optparse
  55 import os
  56 import pdb
  57 import platform
  58 import Queue
  59 import random
  60 import re
  61 import shutil
  62 import signal
  63 import sys
  64 import time
  65 import traceback
  66
  67 from layout_package import dump_render_tree_thread
  68 from layout_package import json_layout_results_generator
  69 from layout_package import printing
  70 from layout_package import test_expectations
  71 from layout_package import test_failures
  72 from layout_package import test_files
  73 from layout_package import test_results_uploader
  74 from test_types import image_diff
  75 from test_types import text_diff
  76 from test_types import test_type_base
  77
  78 from webkitpy.common.system.executive import Executive
  79 from webkitpy.thirdparty import simplejson
  80
  81 import port
  82
  83 _log = logging.getLogger("webkitpy.layout_tests.run_webkit_tests")
  84
  85 # Builder base URL where we have the archived test results.
  86 BUILDER_BASE_URL = "http://build.chromium.org/buildbot/layout_test_results/"
  87
  88 TestExpectationsFile = test_expectations.TestExpectationsFile
  89
  90
  91 class TestInfo:
  92     """Groups information about a test for easy passing of data."""
  93
  94     def __init__(self, port, filename, timeout):
  95         """Generates the URI and stores the filename and timeout for this test.
  96         Args:
  97           filename: Full path to the test.
  98           timeout: Timeout for running the test in TestShell.
  99           """
 100         self.filename = filename
 101         self.uri = port.filename_to_uri(filename)
 102         self.timeout = timeout
 103         # FIXME: Confusing that the file is .checksum and we call it "hash"
 104         self._expected_hash_path = port.expected_filename(filename, '.checksum')
 105         self._have_read_expected_hash = False
 106         self._image_hash = None
 107
 108     def _read_image_hash(self):
 109         try:
 110             with codecs.open(self._expected_hash_path, "r", "ascii") as hash_file:
 111                 return hash_file.read()
 112         except IOError, e:
 113             if errno.ENOENT != e.errno:
 114                 raise
 115
 116     def image_hash(self):
 117         # Read the image_hash lazily to reduce startup time.
 118         # This class is accessed across threads, but only one thread should
 119         # ever be dealing with any given TestInfo so no locking is needed.
 120         if not self._have_read_expected_hash:
 121             self._have_read_expected_hash = True
 122             self._image_hash = self._read_image_hash()
 123         return self._image_hash
 124
 125
 126 class ResultSummary(object):
 127     """A class for partitioning the test results we get into buckets.
 128
 129     This class is basically a glorified struct and it's private to this file
 130     so we don't bother with any information hiding."""
 131
 132     def __init__(self, expectations, test_files):
 133         self.total = len(test_files)
 134         self.remaining = self.total
 135         self.expectations = expectations
 136         self.expected = 0
 137         self.unexpected = 0
 138         self.tests_by_expectation = {}
 139         self.tests_by_timeline = {}
 140         self.results = {}
 141         self.unexpected_results = {}
 142         self.failures = {}
 143         self.tests_by_expectation[test_expectations.SKIP] = set()
 144         for expectation in TestExpectationsFile.EXPECTATIONS.values():
 145             self.tests_by_expectation[expectation] = set()
 146         for timeline in TestExpectationsFile.TIMELINES.values():
 147             self.tests_by_timeline[timeline] = (
 148                 expectations.get_tests_with_timeline(timeline))
 149
 150     def add(self, result, expected):
 151         """Add a TestResult into the appropriate bin.
 152
 153         Args:
 154           result: TestResult from dump_render_tree_thread.
 155           expected: whether the result was what we expected it to be.
 156         """
 157
 158         self.tests_by_expectation[result.type].add(result.filename)
 159         self.results[result.filename] = result
 160         self.remaining -= 1
 161         if len(result.failures):
 162             self.failures[result.filename] = result.failures
 163         if expected:
 164             self.expected += 1
 165         else:
 166             self.unexpected_results[result.filename] = result.type
 167             self.unexpected += 1
 168
 169
 170 def summarize_unexpected_results(port_obj, expectations, result_summary,
 171                                  retry_summary):
 172     """Summarize any unexpected results as a dict.
 173
 174     FIXME: split this data structure into a separate class?
 175
 176     Args:
 177         port_obj: interface to port-specific hooks
 178         expectations: test_expectations.TestExpectations object
 179         result_summary: summary object from initial test runs
 180         retry_summary: summary object from final test run of retried tests
 181     Returns:
 182         A dictionary containing a summary of the unexpected results from the
 183         run, with the following fields:
 184         'version': a version indicator (1 in this version)
 185         'fixable': # of fixable tests (NOW - PASS)
 186         'skipped': # of skipped tests (NOW & SKIPPED)
 187         'num_regressions': # of non-flaky failures
 188         'num_flaky': # of flaky failures
 189         'num_passes': # of unexpected passes
 190         'tests': a dict of tests -> {'expected': '...', 'actual': '...'}
 191     """
 192     results = {}
 193     results['version'] = 1
 194
 195     tbe = result_summary.tests_by_expectation
 196     tbt = result_summary.tests_by_timeline
 197     results['fixable'] = len(tbt[test_expectations.NOW] -
 198                                 tbe[test_expectations.PASS])
 199     results['skipped'] = len(tbt[test_expectations.NOW] &
 200                                 tbe[test_expectations.SKIP])
 201
 202     num_passes = 0
 203     num_flaky = 0
 204     num_regressions = 0
 205     keywords = {}
 206     for k, v in TestExpectationsFile.EXPECTATIONS.iteritems():
 207         keywords[v] = k.upper()
 208
 209     tests = {}
 210     for filename, result in result_summary.unexpected_results.iteritems():
 211         # Note that if a test crashed in the original run, we ignore
 212         # whether or not it crashed when we retried it (if we retried it),
 213         # and always consider the result not flaky.
 214         test = port_obj.relative_test_filename(filename)
 215         expected = expectations.get_expectations_string(filename)
 216         actual = [keywords[result]]
 217
 218         if result == test_expectations.PASS:
 219             num_passes += 1
 220         elif result == test_expectations.CRASH:
 221             num_regressions += 1
 222         else:
 223             if filename not in retry_summary.unexpected_results:
 224                 actual.extend(expectations.get_expectations_string(
 225                     filename).split(" "))
 226                 num_flaky += 1
 227             else:
 228                 retry_result = retry_summary.unexpected_results[filename]
 229                 if result != retry_result:
 230                     actual.append(keywords[retry_result])
 231                     num_flaky += 1
 232                 else:
 233                     num_regressions += 1
 234
 235         tests[test] = {}
 236         tests[test]['expected'] = expected
 237         tests[test]['actual'] = " ".join(actual)
 238
 239     results['tests'] = tests
 240     results['num_passes'] = num_passes
 241     results['num_flaky'] = num_flaky
 242     results['num_regressions'] = num_regressions
 243
 244     return results
 245
 246
 247 class TestRunner:
 248     """A class for managing running a series of tests on a series of layout
 249     test files."""
 250
 251     HTTP_SUBDIR = os.sep.join(['', 'http', ''])
 252     WEBSOCKET_SUBDIR = os.sep.join(['', 'websocket', ''])
 253
 254     # The per-test timeout in milliseconds, if no --time-out-ms option was
 255     # given to run_webkit_tests. This should correspond to the default timeout
 256     # in DumpRenderTree.
 257     DEFAULT_TEST_TIMEOUT_MS = 6 * 1000
 258
 259     def __init__(self, port, options, printer):
 260         """Initialize test runner data structures.
 261
 262         Args:
 263           port: an object implementing port-specific
 264           options: a dictionary of command line options
 265           printer: a Printer object to record updates to.
 266         """
 267         self._port = port
 268         self._options = options
 269         self._printer = printer
 270
 271         # disable wss server. need to install pyOpenSSL on buildbots.
 272         # self._websocket_secure_server = websocket_server.PyWebSocket(
 273         #        options.results_directory, use_tls=True, port=9323)
 274
 275         # a list of TestType objects
 276         self._test_types = []
 277
 278         # a set of test files, and the same tests as a list
 279         self._test_files = set()
 280         self._test_files_list = None
 281         self._result_queue = Queue.Queue()
 282
 283         self._retrying = False
 284
 285         # Hack for dumping threads on the bots
 286         self._last_thread_dump = None
 287
 288     def __del__(self):
 289         _log.debug("flushing stdout")
 290         sys.stdout.flush()
 291         _log.debug("flushing stderr")
 292         sys.stderr.flush()
 293         _log.debug("stopping http server")
 294         self._port.stop_http_server()
 295         _log.debug("stopping websocket server")
 296         self._port.stop_websocket_server()
 297
 298     def gather_file_paths(self, paths):
 299         """Find all the files to test.
 300
 301         Args:
 302           paths: a list of globs to use instead of the defaults."""
 303         self._test_files = test_files.gather_test_files(self._port, paths)
 304
 305     def parse_expectations(self, test_platform_name, is_debug_mode):
 306         """Parse the expectations from the test_list files and return a data
 307         structure holding them. Throws an error if the test_list files have
 308         invalid syntax."""
 309         if self._options.lint_test_files:
 310             test_files = None
 311         else:
 312             test_files = self._test_files
 313
 314         try:
 315             expectations_str = self._port.test_expectations()
 316             overrides_str = self._port.test_expectations_overrides()
 317             self._expectations = test_expectations.TestExpectations(
 318                 self._port, test_files, expectations_str, test_platform_name,
 319                 is_debug_mode, self._options.lint_test_files,
 320                 tests_are_present=True, overrides=overrides_str)
 321             return self._expectations
 322         except SyntaxError, err:
 323             if self._options.lint_test_files:
 324                 print str(err)
 325             else:
 326                 raise err
 327
 328     def prepare_lists_and_print_output(self):
 329         """Create appropriate subsets of test lists and returns a
 330         ResultSummary object. Also prints expected test counts.
 331         """
 332
 333         # Remove skipped - both fixable and ignored - files from the
 334         # top-level list of files to test.
 335         num_all_test_files = len(self._test_files)
 336         self._printer.print_expected("Found:  %d tests" %
 337                                      (len(self._test_files)))
 338         if not num_all_test_files:
 339             _log.critical("No tests to run.")
 340             sys.exit(1)
 341
 342         skipped = set()
 343         if num_all_test_files > 1 and not self._options.force:
 344             skipped = self._expectations.get_tests_with_result_type(
 345                            test_expectations.SKIP)
 346             self._test_files -= skipped
 347
 348         # Create a sorted list of test files so the subset chunk,
 349         # if used, contains alphabetically consecutive tests.
 350         self._test_files_list = list(self._test_files)
 351         if self._options.randomize_order:
 352             random.shuffle(self._test_files_list)
 353         else:
 354             self._test_files_list.sort()
 355
 356         # If the user specifies they just want to run a subset of the tests,
 357         # just grab a subset of the non-skipped tests.
 358         if self._options.run_chunk or self._options.run_part:
 359             chunk_value = self._options.run_chunk or self._options.run_part
 360             test_files = self._test_files_list
 361             try:
 362                 (chunk_num, chunk_len) = chunk_value.split(":")
 363                 chunk_num = int(chunk_num)
 364                 assert(chunk_num >= 0)
 365                 test_size = int(chunk_len)
 366                 assert(test_size > 0)
 367             except:
 368                 _log.critical("invalid chunk '%s'" % chunk_value)
 369                 sys.exit(1)
 370
 371             # Get the number of tests
 372             num_tests = len(test_files)
 373
 374             # Get the start offset of the slice.
 375             if self._options.run_chunk:
 376                 chunk_len = test_size
 377                 # In this case chunk_num can be really large. We need
 378                 # to make the slave fit in the current number of tests.
 379                 slice_start = (chunk_num * chunk_len) % num_tests
 380             else:
 381                 # Validate the data.
 382                 assert(test_size <= num_tests)
 383                 assert(chunk_num <= test_size)
 384
 385                 # To count the chunk_len, and make sure we don't skip
 386                 # some tests, we round to the next value that fits exactly
 387                 # all the parts.
 388                 rounded_tests = num_tests
 389                 if rounded_tests % test_size != 0:
 390                     rounded_tests = (num_tests + test_size -
 391                                      (num_tests % test_size))
 392
 393                 chunk_len = rounded_tests / test_size
 394                 slice_start = chunk_len * (chunk_num - 1)
 395                 # It does not mind if we go over test_size.
 396
 397             # Get the end offset of the slice.
 398             slice_end = min(num_tests, slice_start + chunk_len)
 399
 400             files = test_files[slice_start:slice_end]
 401
 402             tests_run_msg = 'Running: %d tests (chunk slice [%d:%d] of %d)' % (
 403                 (slice_end - slice_start), slice_start, slice_end, num_tests)
 404             self._printer.print_expected(tests_run_msg)
 405
 406             # If we reached the end and we don't have enough tests, we run some
 407             # from the beginning.
 408             if (self._options.run_chunk and
 409                 (slice_end - slice_start < chunk_len)):
 410                 extra = 1 + chunk_len - (slice_end - slice_start)
 411                 extra_msg = ('   last chunk is partial, appending [0:%d]' %
 412                             extra)
 413                 self._printer.print_expected(extra_msg)
 414                 tests_run_msg += "\n" + extra_msg
 415                 files.extend(test_files[0:extra])
 416             tests_run_filename = os.path.join(self._options.results_directory,
 417                                               "tests_run.txt")
 418             with codecs.open(tests_run_filename, "w", "utf-8") as file:
 419                 file.write(tests_run_msg + "\n")
 420
 421             len_skip_chunk = int(len(files) * len(skipped) /
 422                                  float(len(self._test_files)))
 423             skip_chunk_list = list(skipped)[0:len_skip_chunk]
 424             skip_chunk = set(skip_chunk_list)
 425
 426             # Update expectations so that the stats are calculated correctly.
 427             # We need to pass a list that includes the right # of skipped files
 428             # to ParseExpectations so that ResultSummary() will get the correct
 429             # stats. So, we add in the subset of skipped files, and then
 430             # subtract them back out.
 431             self._test_files_list = files + skip_chunk_list
 432             self._test_files = set(self._test_files_list)
 433
 434             self._expectations = self.parse_expectations(
 435                 self._port.test_platform_name(),
 436                 self._options.configuration == 'Debug')
 437
 438             self._test_files = set(files)
 439             self._test_files_list = files
 440         else:
 441             skip_chunk = skipped
 442
 443         result_summary = ResultSummary(self._expectations,
 444             self._test_files | skip_chunk)
 445         self._print_expected_results_of_type(result_summary,
 446             test_expectations.PASS, "passes")
 447         self._print_expected_results_of_type(result_summary,
 448             test_expectations.FAIL, "failures")
 449         self._print_expected_results_of_type(result_summary,
 450             test_expectations.FLAKY, "flaky")
 451         self._print_expected_results_of_type(result_summary,
 452             test_expectations.SKIP, "skipped")
 453
 454         if self._options.force:
 455             self._printer.print_expected('Running all tests, including '
 456                                          'skips (--force)')
 457         else:
 458             # Note that we don't actually run the skipped tests (they were
 459             # subtracted out of self._test_files, above), but we stub out the
 460             # results here so the statistics can remain accurate.
 461             for test in skip_chunk:
 462                 result = dump_render_tree_thread.TestResult(test,
 463                     failures=[], test_run_time=0, total_time_for_all_diffs=0,
 464                     time_for_diffs=0)
 465                 result.type = test_expectations.SKIP
 466                 result_summary.add(result, expected=True)
 467         self._printer.print_expected('')
 468
 469         return result_summary
 470
 471     def add_test_type(self, test_type):
 472         """Add a TestType to the TestRunner."""
 473         self._test_types.append(test_type)
 474
 475     def _get_dir_for_test_file(self, test_file):
 476         """Returns the highest-level directory by which to shard the given
 477         test file."""
 478         index = test_file.rfind(os.sep + 'LayoutTests' + os.sep)
 479
 480         test_file = test_file[index + len('LayoutTests/'):]
 481         test_file_parts = test_file.split(os.sep, 1)
 482         directory = test_file_parts[0]
 483         test_file = test_file_parts[1]
 484
 485         # The http tests are very stable on mac/linux.
 486         # TODO(ojan): Make the http server on Windows be apache so we can
 487         # turn shard the http tests there as well. Switching to apache is
 488         # what made them stable on linux/mac.
 489         return_value = directory
 490         while ((directory != 'http' or sys.platform in ('darwin', 'linux2'))
 491                 and test_file.find(os.sep) >= 0):
 492             test_file_parts = test_file.split(os.sep, 1)
 493             directory = test_file_parts[0]
 494             return_value = os.path.join(return_value, directory)
 495             test_file = test_file_parts[1]
 496
 497         return return_value
 498
 499     def _get_test_info_for_file(self, test_file):
 500         """Returns the appropriate TestInfo object for the file. Mostly this
 501         is used for looking up the timeout value (in ms) to use for the given
 502         test."""
 503         if self._expectations.has_modifier(test_file, test_expectations.SLOW):
 504             return TestInfo(self._port, test_file,
 505                             self._options.slow_time_out_ms)
 506         return TestInfo(self._port, test_file, self._options.time_out_ms)
 507
 508     def _get_test_file_queue(self, test_files):
 509         """Create the thread safe queue of lists of (test filenames, test URIs)
 510         tuples. Each TestShellThread pulls a list from this queue and runs
 511         those tests in order before grabbing the next available list.
 512
 513         Shard the lists by directory. This helps ensure that tests that depend
 514         on each other (aka bad tests!) continue to run together as most
 515         cross-tests dependencies tend to occur within the same directory.
 516
 517         Return:
 518           The Queue of lists of TestInfo objects.
 519         """
 520
 521         if (self._options.experimental_fully_parallel or
 522             self._is_single_threaded()):
 523             filename_queue = Queue.Queue()
 524             for test_file in test_files:
 525                 filename_queue.put(
 526                     ('.', [self._get_test_info_for_file(test_file)]))
 527             return filename_queue
 528
 529         tests_by_dir = {}
 530         for test_file in test_files:
 531             directory = self._get_dir_for_test_file(test_file)
 532             tests_by_dir.setdefault(directory, [])
 533             tests_by_dir[directory].append(
 534                 self._get_test_info_for_file(test_file))
 535
 536         # Sort by the number of tests in the dir so that the ones with the
 537         # most tests get run first in order to maximize parallelization.
 538         # Number of tests is a good enough, but not perfect, approximation
 539         # of how long that set of tests will take to run. We can't just use
 540         # a PriorityQueue until we move # to Python 2.6.
 541         test_lists = []
 542         http_tests = None
 543         for directory in tests_by_dir:
 544             test_list = tests_by_dir[directory]
 545             # Keep the tests in alphabetical order.
 546             # TODO: Remove once tests are fixed so they can be run in any
 547             # order.
 548             test_list.reverse()
 549             test_list_tuple = (directory, test_list)
 550             if directory == 'LayoutTests' + os.sep + 'http':
 551                 http_tests = test_list_tuple
 552             else:
 553                 test_lists.append(test_list_tuple)
 554         test_lists.sort(lambda a, b: cmp(len(b[1]), len(a[1])))
 555
 556         # Put the http tests first. There are only a couple hundred of them,
 557         # but each http test takes a very long time to run, so sorting by the
 558         # number of tests doesn't accurately capture how long they take to run.
 559         if http_tests:
 560             test_lists.insert(0, http_tests)
 561
 562         filename_queue = Queue.Queue()
 563         for item in test_lists:
 564             filename_queue.put(item)
 565         return filename_queue
 566
 567     def _get_dump_render_tree_args(self, index):
 568         """Returns the tuple of arguments for tests and for DumpRenderTree."""
 569         shell_args = []
 570         test_args = test_type_base.TestArguments()
 571         png_path = None
 572         if self._options.pixel_tests:
 573             png_path = os.path.join(self._options.results_directory,
 574                                     "png_result%s.png" % index)
 575             shell_args.append("--pixel-tests=" + png_path)
 576             test_args.png_path = png_path
 577
 578         test_args.new_baseline = self._options.new_baseline
 579         test_args.reset_results = self._options.reset_results
 580
 581         if self._options.startup_dialog:
 582             shell_args.append('--testshell-startup-dialog')
 583
 584         if self._options.gp_fault_error_box:
 585             shell_args.append('--gp-fault-error-box')
 586
 587         return test_args, png_path, shell_args
 588
 589     def _contains_tests(self, subdir):
 590         for test_file in self._test_files:
 591             if test_file.find(subdir) >= 0:
 592                 return True
 593         return False
 594
 595     def _instantiate_dump_render_tree_threads(self, test_files,
 596                                               result_summary):
 597         """Instantitates and starts the TestShellThread(s).
 598
 599         Return:
 600           The list of threads.
 601         """
 602         filename_queue = self._get_test_file_queue(test_files)
 603
 604         # Instantiate TestShellThreads and start them.
 605         threads = []
 606         for i in xrange(int(self._options.child_processes)):
 607             # Create separate TestTypes instances for each thread.
 608             test_types = []
 609             for test_type in self._test_types:
 610                 test_types.append(test_type(self._port,
 611                                     self._options.results_directory))
 612
 613             test_args, png_path, shell_args = \
 614                 self._get_dump_render_tree_args(i)
 615             thread = dump_render_tree_thread.TestShellThread(self._port,
 616                 filename_queue, self._result_queue, test_types, test_args,
 617                 png_path, shell_args, self._options)
 618             if self._is_single_threaded():
 619                 thread.run_in_main_thread(self, result_summary)
 620             else:
 621                 thread.start()
 622             threads.append(thread)
 623
 624         return threads
 625
 626     def _is_single_threaded(self):
 627         """Returns whether we should run all the tests in the main thread."""
 628         return int(self._options.child_processes) == 1
 629
 630     def _run_tests(self, file_list, result_summary):
 631         """Runs the tests in the file_list.
 632
 633         Return: A tuple (keyboard_interrupted, thread_timings, test_timings,
 634             individual_test_timings)
 635             keyboard_interrupted is whether someone typed Ctrl^C
 636             thread_timings is a list of dicts with the total runtime
 637               of each thread with 'name', 'num_tests', 'total_time' properties
 638             test_timings is a list of timings for each sharded subdirectory
 639               of the form [time, directory_name, num_tests]
 640             individual_test_timings is a list of run times for each test
 641               in the form {filename:filename, test_run_time:test_run_time}
 642             result_summary: summary object to populate with the results
 643         """
 644         # FIXME: We should use webkitpy.tool.grammar.pluralize here.
 645         plural = ""
 646         if self._options.child_processes > 1:
 647             plural = "s"
 648         self._printer.print_update('Starting %s%s ...' %
 649                                    (self._port.driver_name(), plural))
 650         threads = self._instantiate_dump_render_tree_threads(file_list,
 651                                                              result_summary)
 652         self._printer.print_update("Starting testing ...")
 653
 654         keyboard_interrupted = self._wait_for_threads_to_finish(threads,
 655                                                                 result_summary)
 656         (thread_timings, test_timings, individual_test_timings) = \
 657             self._collect_timing_info(threads)
 658
 659         return (keyboard_interrupted, thread_timings, test_timings,
 660                 individual_test_timings)
 661
 662     def _wait_for_threads_to_finish(self, threads, result_summary):
 663         keyboard_interrupted = False
 664         try:
 665             # Loop through all the threads waiting for them to finish.
 666             some_thread_is_alive = True
 667             while some_thread_is_alive:
 668                 some_thread_is_alive = False
 669                 t = time.time()
 670                 for thread in threads:
 671                     exception_info = thread.exception_info()
 672                     if exception_info is not None:
 673                         # Re-raise the thread's exception here to make it
 674                         # clear that testing was aborted. Otherwise,
 675                         # the tests that did not run would be assumed
 676                         # to have passed.
 677                         raise (exception_info[0], exception_info[1],
 678                                exception_info[2])
 679
 680                     if thread.isAlive():
 681                         some_thread_is_alive = True
 682                         next_timeout = thread.next_timeout()
 683                         if (next_timeout and t > next_timeout):
 684                             _log_wedged_thread(thread)
 685                             thread.clear_next_timeout()
 686
 687                 self.update_summary(result_summary)
 688
 689                 if some_thread_is_alive:
 690                     time.sleep(0.01)
 691
 692         except KeyboardInterrupt:
 693             keyboard_interrupted = True
 694             for thread in threads:
 695                 thread.cancel()
 696
 697         return keyboard_interrupted
 698
 699     def _collect_timing_info(self, threads):
 700         test_timings = {}
 701         individual_test_timings = []
 702         thread_timings = []
 703
 704         for thread in threads:
 705             thread_timings.append({'name': thread.getName(),
 706                                    'num_tests': thread.get_num_tests(),
 707                                    'total_time': thread.get_total_time()})
 708             test_timings.update(thread.get_directory_timing_stats())
 709             individual_test_timings.extend(thread.get_test_results())
 710
 711         return (thread_timings, test_timings, individual_test_timings)
 712
 713     def needs_http(self):
 714         """Returns whether the test runner needs an HTTP server."""
 715         return self._contains_tests(self.HTTP_SUBDIR)
 716
 717     def run(self, result_summary):
 718         """Run all our tests on all our test files.
 719
 720         For each test file, we run each test type. If there are any failures,
 721         we collect them for reporting.
 722
 723         Args:
 724           result_summary: a summary object tracking the test results.
 725
 726         Return:
 727           The number of unexpected results (0 == success)
 728         """
 729         if not self._test_files:
 730             return 0
 731         start_time = time.time()
 732
 733         if self.needs_http():
 734             self._printer.print_update('Starting HTTP server ...')
 735
 736             self._port.start_http_server()
 737
 738         if self._contains_tests(self.WEBSOCKET_SUBDIR):
 739             self._printer.print_update('Starting WebSocket server ...')
 740             self._port.start_websocket_server()
 741             # self._websocket_secure_server.Start()
 742
 743         keyboard_interrupted, thread_timings, test_timings, \
 744             individual_test_timings = (
 745             self._run_tests(self._test_files_list, result_summary))
 746
 747         # We exclude the crashes from the list of results to retry, because
 748         # we want to treat even a potentially flaky crash as an error.
 749         failures = self._get_failures(result_summary, include_crashes=False)
 750         retry_summary = result_summary
 751         while (len(failures) and self._options.retry_failures and
 752             not self._retrying and not keyboard_interrupted):
 753             _log.info('')
 754             _log.info("Retrying %d unexpected failure(s) ..." % len(failures))
 755             _log.info('')
 756             self._retrying = True
 757             retry_summary = ResultSummary(self._expectations, failures.keys())
 758             # Note that we intentionally ignore the return value here.
 759             self._run_tests(failures.keys(), retry_summary)
 760             failures = self._get_failures(retry_summary, include_crashes=True)
 761
 762         end_time = time.time()
 763
 764         self._print_timing_statistics(end_time - start_time,
 765                                       thread_timings, test_timings,
 766                                       individual_test_timings,
 767                                       result_summary)
 768
 769         self._print_result_summary(result_summary)
 770
 771         sys.stdout.flush()
 772         sys.stderr.flush()
 773
 774         self._printer.print_one_line_summary(result_summary.total,
 775                                              result_summary.expected,
 776                                              result_summary.unexpected)
 777
 778         unexpected_results = summarize_unexpected_results(self._port,
 779             self._expectations, result_summary, retry_summary)
 780         self._printer.print_unexpected_results(unexpected_results)
 781
 782         if self._options.record_results:
 783             # Write the same data to log files.
 784             self._write_json_files(unexpected_results, result_summary,
 785                                    individual_test_timings)
 786
 787             # Upload generated JSON files to appengine server.
 788             self._upload_json_files()
 789
 790         # Write the summary to disk (results.html) and display it if requested.
 791         wrote_results = self._write_results_html_file(result_summary)
 792         if self._options.show_results and wrote_results:
 793             self._show_results_html_file()
 794
 795         # Now that we've completed all the processing we can, we re-raise
 796         # a KeyboardInterrupt if necessary so the caller can handle it.
 797         if keyboard_interrupted:
 798             raise KeyboardInterrupt
 799
 800         # Ignore flaky failures and unexpected passes so we don't turn the
 801         # bot red for those.
 802         return unexpected_results['num_regressions']
 803
 804     def update_summary(self, result_summary):
 805         """Update the summary and print results with any completed tests."""
 806         while True:
 807             try:
 808                 result = self._result_queue.get_nowait()
 809             except Queue.Empty:
 810                 return
 811
 812             expected = self._expectations.matches_an_expected_result(
 813                 result.filename, result.type, self._options.pixel_tests)
 814             result_summary.add(result, expected)
 815             exp_str = self._expectations.get_expectations_string(
 816                 result.filename)
 817             got_str = self._expectations.expectation_to_string(result.type)
 818             self._printer.print_test_result(result, expected, exp_str, got_str)
 819             self._printer.print_progress(result_summary, self._retrying,
 820                                          self._test_files_list)
 821
 822     def _get_failures(self, result_summary, include_crashes):
 823         """Filters a dict of results and returns only the failures.
 824
 825         Args:
 826           result_summary: the results of the test run
 827           include_crashes: whether crashes are included in the output.
 828             We use False when finding the list of failures to retry
 829             to see if the results were flaky. Although the crashes may also be
 830             flaky, we treat them as if they aren't so that they're not ignored.
 831         Returns:
 832           a dict of files -> results
 833         """
 834         failed_results = {}
 835         for test, result in result_summary.unexpected_results.iteritems():
 836             if (result == test_expectations.PASS or
 837                 result == test_expectations.CRASH and not include_crashes):
 838                 continue
 839             failed_results[test] = result
 840
 841         return failed_results
 842
 843     def _write_json_files(self, unexpected_results, result_summary,
 844                         individual_test_timings):
 845         """Writes the results of the test run as JSON files into the results
 846         dir.
 847
 848         There are three different files written into the results dir:
 849           unexpected_results.json: A short list of any unexpected results.
 850             This is used by the buildbots to display results.
 851           expectations.json: This is used by the flakiness dashboard.
 852           results.json: A full list of the results - used by the flakiness
 853             dashboard and the aggregate results dashboard.
 854
 855         Args:
 856           unexpected_results: dict of unexpected results
 857           result_summary: full summary object
 858           individual_test_timings: list of test times (used by the flakiness
 859             dashboard).
 860         """
 861         results_directory = self._options.results_directory
 862         _log.debug("Writing JSON files in %s." % results_directory)
 863         unexpected_json_path = os.path.join(results_directory, "unexpected_results.json")
 864         with codecs.open(unexpected_json_path, "w", "utf-8") as file:
 865             simplejson.dump(unexpected_results, file, sort_keys=True, indent=2)
 866
 867         # Write a json file of the test_expectations.txt file for the layout
 868         # tests dashboard.
 869         expectations_path = os.path.join(results_directory, "expectations.json")
 870         expectations_json = \
 871             self._expectations.get_expectations_json_for_all_platforms()
 872         with codecs.open(expectations_path, "w", "utf-8") as file:
 873             file.write(u"ADD_EXPECTATIONS(%s);" % expectations_json)
 874
 875         json_layout_results_generator.JSONLayoutResultsGenerator(
 876             self._port, self._options.builder_name, self._options.build_name,
 877             self._options.build_number, self._options.results_directory,
 878             BUILDER_BASE_URL, individual_test_timings,
 879             self._expectations, result_summary, self._test_files_list,
 880             not self._options.upload_full_results,
 881             self._options.test_results_server)
 882
 883         _log.debug("Finished writing JSON files.")
 884
 885     def _upload_json_files(self):
 886         if not self._options.test_results_server:
 887             return
 888
 889         _log.info("Uploading JSON files for builder: %s",
 890                    self._options.builder_name)
 891
 892         attrs = [("builder", self._options.builder_name)]
 893         json_files = ["expectations.json"]
 894         if self._options.upload_full_results:
 895             json_files.append("results.json")
 896         else:
 897             json_files.append("incremental_results.json")
 898
 899         files = [(file, os.path.join(self._options.results_directory, file))
 900             for file in json_files]
 901
 902         uploader = test_results_uploader.TestResultsUploader(
 903             self._options.test_results_server)
 904         try:
 905             # Set uploading timeout in case appengine server is having problem.
 906             # 120 seconds are more than enough to upload test results.
 907             uploader.upload(attrs, files, 120)
 908         except Exception, err:
 909             _log.error("Upload failed: %s" % err)
 910             return
 911
 912         _log.info("JSON files uploaded.")
 913
 914     def _print_expected_results_of_type(self, result_summary,
 915                                         result_type, result_type_str):
 916         """Print the number of the tests in a given result class.
 917
 918         Args:
 919           result_summary - the object containing all the results to report on
 920           result_type - the particular result type to report in the summary.
 921           result_type_str - a string description of the result_type.
 922         """
 923         tests = self._expectations.get_tests_with_result_type(result_type)
 924         now = result_summary.tests_by_timeline[test_expectations.NOW]
 925         wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX]
 926         defer = result_summary.tests_by_timeline[test_expectations.DEFER]
 927
 928         # We use a fancy format string in order to print the data out in a
 929         # nicely-aligned table.
 930         fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd defer, %%%dd wontfix)"
 931                   % (self._num_digits(now), self._num_digits(defer),
 932                   self._num_digits(wontfix)))
 933         self._printer.print_expected(fmtstr %
 934             (len(tests), result_type_str, len(tests & now),
 935              len(tests & defer), len(tests & wontfix)))
 936
 937     def _num_digits(self, num):
 938         """Returns the number of digits needed to represent the length of a
 939         sequence."""
 940         ndigits = 1
 941         if len(num):
 942             ndigits = int(math.log10(len(num))) + 1
 943         return ndigits
 944
 945     def _print_timing_statistics(self, total_time, thread_timings,
 946                                directory_test_timings, individual_test_timings,
 947                                result_summary):
 948         """Record timing-specific information for the test run.
 949
 950         Args:
 951           total_time: total elapsed time (in seconds) for the test run
 952           thread_timings: wall clock time each thread ran for
 953           directory_test_timings: timing by directory
 954           individual_test_timings: timing by file
 955           result_summary: summary object for the test run
 956         """
 957         self._printer.print_timing("Test timing:")
 958         self._printer.print_timing("  %6.2f total testing time" % total_time)
 959         self._printer.print_timing("")
 960         self._printer.print_timing("Thread timing:")
 961         cuml_time = 0
 962         for t in thread_timings:
 963             self._printer.print_timing("    %10s: %5d tests, %6.2f secs" %
 964                   (t['name'], t['num_tests'], t['total_time']))
 965             cuml_time += t['total_time']
 966         self._printer.print_timing("   %6.2f cumulative, %6.2f optimal" %
 967               (cuml_time, cuml_time / int(self._options.child_processes)))
 968         self._printer.print_timing("")
 969
 970         self._print_aggregate_test_statistics(individual_test_timings)
 971         self._print_individual_test_times(individual_test_timings,
 972                                           result_summary)
 973         self._print_directory_timings(directory_test_timings)
 974
 975     def _print_aggregate_test_statistics(self, individual_test_timings):
 976         """Prints aggregate statistics (e.g. median, mean, etc.) for all tests.
 977         Args:
 978           individual_test_timings: List of dump_render_tree_thread.TestStats
 979               for all tests.
 980         """
 981         test_types = []  # Unit tests don't actually produce any timings.
 982         if individual_test_timings:
 983             test_types = individual_test_timings[0].time_for_diffs.keys()
 984         times_for_dump_render_tree = []
 985         times_for_diff_processing = []
 986         times_per_test_type = {}
 987         for test_type in test_types:
 988             times_per_test_type[test_type] = []
 989
 990         for test_stats in individual_test_timings:
 991             times_for_dump_render_tree.append(test_stats.test_run_time)
 992             times_for_diff_processing.append(
 993                 test_stats.total_time_for_all_diffs)
 994             time_for_diffs = test_stats.time_for_diffs
 995             for test_type in test_types:
 996                 times_per_test_type[test_type].append(
 997                     time_for_diffs[test_type])
 998
 999         self._print_statistics_for_test_timings(
1000             "PER TEST TIME IN TESTSHELL (seconds):",
1001             times_for_dump_render_tree)
1002         self._print_statistics_for_test_timings(
1003             "PER TEST DIFF PROCESSING TIMES (seconds):",
1004             times_for_diff_processing)
1005         for test_type in test_types:
1006             self._print_statistics_for_test_timings(
1007                 "PER TEST TIMES BY TEST TYPE: %s" % test_type,
1008                 times_per_test_type[test_type])
1009
1010     def _print_individual_test_times(self, individual_test_timings,
1011                                   result_summary):
1012         """Prints the run times for slow, timeout and crash tests.
1013         Args:
1014           individual_test_timings: List of dump_render_tree_thread.TestStats
1015               for all tests.
1016           result_summary: summary object for test run
1017         """
1018         # Reverse-sort by the time spent in DumpRenderTree.
1019         individual_test_timings.sort(lambda a, b:
1020             cmp(b.test_run_time, a.test_run_time))
1021
1022         num_printed = 0
1023         slow_tests = []
1024         timeout_or_crash_tests = []
1025         unexpected_slow_tests = []
1026         for test_tuple in individual_test_timings:
1027             filename = test_tuple.filename
1028             is_timeout_crash_or_slow = False
1029             if self._expectations.has_modifier(filename,
1030                                                test_expectations.SLOW):
1031                 is_timeout_crash_or_slow = True
1032                 slow_tests.append(test_tuple)
1033
1034             if filename in result_summary.failures:
1035                 result = result_summary.results[filename].type
1036                 if (result == test_expectations.TIMEOUT or
1037                     result == test_expectations.CRASH):
1038                     is_timeout_crash_or_slow = True
1039                     timeout_or_crash_tests.append(test_tuple)
1040
1041             if (not is_timeout_crash_or_slow and
1042                 num_printed < printing.NUM_SLOW_TESTS_TO_LOG):
1043                 num_printed = num_printed + 1
1044                 unexpected_slow_tests.append(test_tuple)
1045
1046         self._printer.print_timing("")
1047         self._print_test_list_timing("%s slowest tests that are not "
1048             "marked as SLOW and did not timeout/crash:" %
1049             printing.NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests)
1050         self._printer.print_timing("")
1051         self._print_test_list_timing("Tests marked as SLOW:", slow_tests)
1052         self._printer.print_timing("")
1053         self._print_test_list_timing("Tests that timed out or crashed:",
1054                                      timeout_or_crash_tests)
1055         self._printer.print_timing("")
1056
1057     def _print_test_list_timing(self, title, test_list):
1058         """Print timing info for each test.
1059
1060         Args:
1061           title: section heading
1062           test_list: tests that fall in this section
1063         """
1064         if self._printer.disabled('slowest'):
1065             return
1066
1067         self._printer.print_timing(title)
1068         for test_tuple in test_list:
1069             filename = test_tuple.filename[len(
1070                 self._port.layout_tests_dir()) + 1:]
1071             filename = filename.replace('\\', '/')
1072             test_run_time = round(test_tuple.test_run_time, 1)
1073             self._printer.print_timing("  %s took %s seconds" %
1074                                        (filename, test_run_time))
1075
1076     def _print_directory_timings(self, directory_test_timings):
1077         """Print timing info by directory for any directories that
1078         take > 10 seconds to run.
1079
1080         Args:
1081           directory_test_timing: time info for each directory
1082         """
1083         timings = []
1084         for directory in directory_test_timings:
1085             num_tests, time_for_directory = directory_test_timings[directory]
1086             timings.append((round(time_for_directory, 1), directory,
1087                             num_tests))
1088         timings.sort()
1089
1090         self._printer.print_timing("Time to process slowest subdirectories:")
1091         min_seconds_to_print = 10
1092         for timing in timings:
1093             if timing[0] > min_seconds_to_print:
1094                 self._printer.print_timing(
1095                     "  %s took %s seconds to run %s tests." % (timing[1],
1096                     timing[0], timing[2]))
1097         self._printer.print_timing("")
1098
1099     def _print_statistics_for_test_timings(self, title, timings):
1100         """Prints the median, mean and standard deviation of the values in
1101         timings.
1102
1103         Args:
1104           title: Title for these timings.
1105           timings: A list of floats representing times.
1106         """
1107         self._printer.print_timing(title)
1108         timings.sort()
1109
1110         num_tests = len(timings)
1111         if not num_tests:
1112             return
1113         percentile90 = timings[int(.9 * num_tests)]
1114         percentile99 = timings[int(.99 * num_tests)]
1115
1116         if num_tests % 2 == 1:
1117             median = timings[((num_tests - 1) / 2) - 1]
1118         else:
1119             lower = timings[num_tests / 2 - 1]
1120             upper = timings[num_tests / 2]
1121             median = (float(lower + upper)) / 2
1122
1123         mean = sum(timings) / num_tests
1124
1125         for time in timings:
1126             sum_of_deviations = math.pow(time - mean, 2)
1127
1128         std_deviation = math.sqrt(sum_of_deviations / num_tests)
1129         self._printer.print_timing("  Median:          %6.3f" % median)
1130         self._printer.print_timing("  Mean:            %6.3f" % mean)
1131         self._printer.print_timing("  90th percentile: %6.3f" % percentile90)
1132         self._printer.print_timing("  99th percentile: %6.3f" % percentile99)
1133         self._printer.print_timing("  Standard dev:    %6.3f" % std_deviation)
1134         self._printer.print_timing("")
1135
1136     def _print_result_summary(self, result_summary):
1137         """Print a short summary about how many tests passed.
1138
1139         Args:
1140           result_summary: information to log
1141         """
1142         failed = len(result_summary.failures)
1143         skipped = len(
1144             result_summary.tests_by_expectation[test_expectations.SKIP])
1145         total = result_summary.total
1146         passed = total - failed - skipped
1147         pct_passed = 0.0
1148         if total > 0:
1149             pct_passed = float(passed) * 100 / total
1150
1151         self._printer.print_actual("")
1152         self._printer.print_actual("=> Results: %d/%d tests passed (%.1f%%)" %
1153                      (passed, total, pct_passed))
1154         self._printer.print_actual("")
1155         self._print_result_summary_entry(result_summary,
1156             test_expectations.NOW, "Tests to be fixed for the current release")
1157
1158         self._printer.print_actual("")
1159         self._print_result_summary_entry(result_summary,
1160             test_expectations.DEFER,
1161             "Tests we'll fix in the future if they fail (DEFER)")
1162
1163         self._printer.print_actual("")
1164         self._print_result_summary_entry(result_summary,
1165             test_expectations.WONTFIX,
1166             "Tests that will only be fixed if they crash (WONTFIX)")
1167         self._printer.print_actual("")
1168
1169     def _print_result_summary_entry(self, result_summary, timeline,
1170                                     heading):
1171         """Print a summary block of results for a particular timeline of test.
1172
1173         Args:
1174           result_summary: summary to print results for
1175           timeline: the timeline to print results for (NOT, WONTFIX, etc.)
1176           heading: a textual description of the timeline
1177         """
1178         total = len(result_summary.tests_by_timeline[timeline])
1179         not_passing = (total -
1180            len(result_summary.tests_by_expectation[test_expectations.PASS] &
1181                result_summary.tests_by_timeline[timeline]))
1182         self._printer.print_actual("=> %s (%d):" % (heading, not_passing))
1183
1184         for result in TestExpectationsFile.EXPECTATION_ORDER:
1185             if result == test_expectations.PASS:
1186                 continue
1187             results = (result_summary.tests_by_expectation[result] &
1188                        result_summary.tests_by_timeline[timeline])
1189             desc = TestExpectationsFile.EXPECTATION_DESCRIPTIONS[result]
1190             if not_passing and len(results):
1191                 pct = len(results) * 100.0 / not_passing
1192                 self._printer.print_actual("  %5d %-24s (%4.1f%%)" %
1193                     (len(results), desc[len(results) != 1], pct))
1194
1195     def _results_html(self, test_files, failures, title="Test Failures", override_time=None):
1196         """
1197         test_files = a list of file paths
1198         failures = dictionary mapping test paths to failure objects
1199         title = title printed at top of test
1200         override_time = current time (used by unit tests)
1201         """
1202         page = """<html>
1203   <head>
1204     <title>Layout Test Results (%(time)s)</title>
1205   </head>
1206   <body>
1207     <h2>%(title)s (%(time)s)</h2>
1208         """ % {'title': title, 'time': override_time or time.asctime()}
1209
1210         for test_file in sorted(test_files):
1211             test_name = self._port.relative_test_filename(test_file)
1212             test_url = self._port.filename_to_uri(test_file)
1213             page += u"<p><a href='%s'>%s</a><br />\n" % (test_url, test_name)
1214             test_failures = failures.get(test_file, [])
1215             for failure in test_failures:
1216                 page += u"&nbsp;&nbsp;%s<br/>" % failure.result_html_output(test_name)
1217             page += "</p>\n"
1218         page += "</body></html>\n"
1219         return page
1220
1221     def _write_results_html_file(self, result_summary):
1222         """Write results.html which is a summary of tests that failed.
1223
1224         Args:
1225           result_summary: a summary of the results :)
1226
1227         Returns:
1228           True if any results were written (since expected failures may be
1229           omitted)
1230         """
1231         # test failures
1232         if self._options.full_results_html:
1233             results_title = "Test Failures"
1234             test_files = result_summary.failures.keys()
1235         else:
1236             results_title = "Unexpected Test Failures"
1237             unexpected_failures = self._get_failures(result_summary,
1238                 include_crashes=True)
1239             test_files = unexpected_failures.keys()
1240         if not len(test_files):
1241             return False
1242
1243         out_filename = os.path.join(self._options.results_directory,
1244                                     "results.html")
1245         with codecs.open(out_filename, "w", "utf-8") as results_file:
1246             html = self._results_html(test_files, result_summary.failures, results_title)
1247             results_file.write(html)
1248
1249         return True
1250
1251     def _show_results_html_file(self):
1252         """Shows the results.html page."""
1253         results_filename = os.path.join(self._options.results_directory,
1254                                         "results.html")
1255         self._port.show_results_html_file(results_filename)
1256
1257
1258 def read_test_files(files):
1259     tests = []
1260     for file in files:
1261         # FIXME: This could be cleaner using a list comprehension.
1262         for line in codecs.open(file, "r", "utf-8"):
1263             line = test_expectations.strip_comments(line)
1264             if line:
1265                 tests.append(line)
1266     return tests
1267
1268
1269 def run(port_obj, options, args, regular_output=sys.stderr,
1270         buildbot_output=sys.stdout):
1271     """Run the tests.
1272
1273     Args:
1274       port_obj: Port object for port-specific behavior
1275       options: a dictionary of command line options
1276       args: a list of sub directories or files to test
1277       regular_output: a stream-like object that we can send logging/debug
1278           output to
1279       buildbot_output: a stream-like object that we can write all output that
1280           is intended to be parsed by the buildbot to
1281     Returns:
1282       the number of unexpected results that occurred, or -1 if there is an
1283           error.
1284     """
1285
1286     # Configure the printing subsystem for printing output, logging debug
1287     # info, and tracing tests.
1288
1289     if not options.child_processes:
1290         # FIXME: Investigate perf/flakiness impact of using cpu_count + 1.
1291         options.child_processes = port_obj.default_child_processes()
1292
1293     printer = printing.Printer(port_obj, options, regular_output=regular_output,
1294         buildbot_output=buildbot_output,
1295         child_processes=int(options.child_processes),
1296         is_fully_parallel=options.experimental_fully_parallel)
1297     if options.help_printing:
1298         printer.help_printing()
1299         return 0
1300
1301     executive = Executive()
1302
1303     if not options.configuration:
1304         options.configuration = port_obj.default_configuration()
1305
1306     if options.pixel_tests is None:
1307         options.pixel_tests = True
1308
1309     if not options.use_apache:
1310         options.use_apache = sys.platform in ('darwin', 'linux2')
1311
1312     if options.results_directory.startswith("/"):
1313         # Assume it's an absolute path and normalize.
1314         options.results_directory = port_obj.get_absolute_path(
1315             options.results_directory)
1316     else:
1317         # If it's a relative path, make the output directory relative to
1318         # Debug or Release.
1319         options.results_directory = port_obj.results_directory()
1320
1321     last_unexpected_results = []
1322     if options.print_last_failures or options.retest_last_failures:
1323         unexpected_results_filename = os.path.join(
1324            options.results_directory, "unexpected_results.json")
1325         with codecs.open(unexpected_results_filename, "r", "utf-8") as file:
1326             results = simplejson.load(file)
1327         last_unexpected_results = results['tests'].keys()
1328         if options.print_last_failures:
1329             printer.write("\n".join(last_unexpected_results) + "\n")
1330             return 0
1331
1332     if options.clobber_old_results:
1333         # Just clobber the actual test results directories since the other
1334         # files in the results directory are explicitly used for cross-run
1335         # tracking.
1336         printer.print_update("Clobbering old results in %s" %
1337                              options.results_directory)
1338         layout_tests_dir = port_obj.layout_tests_dir()
1339         possible_dirs = os.listdir(layout_tests_dir)
1340         for dirname in possible_dirs:
1341             if os.path.isdir(os.path.join(layout_tests_dir, dirname)):
1342                 shutil.rmtree(os.path.join(options.results_directory, dirname),
1343                               ignore_errors=True)
1344
1345     if not options.time_out_ms:
1346         if options.configuration == "Debug":
1347             options.time_out_ms = str(2 * TestRunner.DEFAULT_TEST_TIMEOUT_MS)
1348         else:
1349             options.time_out_ms = str(TestRunner.DEFAULT_TEST_TIMEOUT_MS)
1350
1351     options.slow_time_out_ms = str(5 * int(options.time_out_ms))
1352     printer.print_config("Regular timeout: %s, slow test timeout: %s" %
1353                    (options.time_out_ms, options.slow_time_out_ms))
1354
1355     if int(options.child_processes) == 1:
1356         printer.print_config("Running one %s" % port_obj.driver_name())
1357     else:
1358         printer.print_config("Running %s %ss in parallel" % (
1359                        options.child_processes, port_obj.driver_name()))
1360
1361     # Include all tests if none are specified.
1362     new_args = []
1363     for arg in args:
1364         if arg and arg != '':
1365             new_args.append(arg)
1366
1367     paths = new_args
1368     if not paths:
1369         paths = []
1370     paths += last_unexpected_results
1371     if options.test_list:
1372         paths += read_test_files(options.test_list)
1373
1374     # Create the output directory if it doesn't already exist.
1375     port_obj.maybe_make_directory(options.results_directory)
1376     printer.print_update("Collecting tests ...")
1377
1378     test_runner = TestRunner(port_obj, options, printer)
1379     test_runner.gather_file_paths(paths)
1380
1381     if options.lint_test_files:
1382         # Creating the expecations for each platform/configuration pair does
1383         # all the test list parsing and ensures it's correct syntax (e.g. no
1384         # dupes).
1385         for platform_name in port_obj.test_platform_names():
1386             test_runner.parse_expectations(platform_name, is_debug_mode=True)
1387             test_runner.parse_expectations(platform_name, is_debug_mode=False)
1388         printer.write("")
1389         _log.info("If there are no fail messages, errors or exceptions, "
1390                   "then the lint succeeded.")
1391         return 0
1392
1393     printer.print_config("Using port '%s'" % port_obj.name())
1394     printer.print_config("Placing test results in %s" %
1395                          options.results_directory)
1396     if options.new_baseline:
1397         printer.print_config("Placing new baselines in %s" %
1398                              port_obj.baseline_path())
1399     printer.print_config("Using %s build" % options.configuration)
1400     if options.pixel_tests:
1401         printer.print_config("Pixel tests enabled")
1402     else:
1403         printer.print_config("Pixel tests disabled")
1404     printer.print_config("")
1405
1406     printer.print_update("Parsing expectations ...")
1407     test_runner.parse_expectations(port_obj.test_platform_name(),
1408                                    options.configuration == 'Debug')
1409
1410     printer.print_update("Checking build ...")
1411     if not port_obj.check_build(test_runner.needs_http()):
1412         return -1
1413
1414     printer.print_update("Starting helper ...")
1415     port_obj.start_helper()
1416
1417     # Check that the system dependencies (themes, fonts, ...) are correct.
1418     if not options.nocheck_sys_deps:
1419         printer.print_update("Checking system dependencies ...")
1420         if not port_obj.check_sys_deps(test_runner.needs_http()):
1421             return -1
1422
1423     printer.print_update("Preparing tests ...")
1424     result_summary = test_runner.prepare_lists_and_print_output()
1425
1426     port_obj.setup_test_run()
1427
1428     test_runner.add_test_type(text_diff.TestTextDiff)
1429     if options.pixel_tests:
1430         test_runner.add_test_type(image_diff.ImageDiff)
1431
1432     num_unexpected_results = test_runner.run(result_summary)
1433
1434     port_obj.stop_helper()
1435
1436     _log.debug("Exit status: %d" % num_unexpected_results)
1437     return num_unexpected_results
1438
1439
1440 def _compat_shim_callback(option, opt_str, value, parser):
1441     print "Ignoring unsupported option: %s" % opt_str
1442
1443
1444 def _compat_shim_option(option_name, **kwargs):
1445     return optparse.make_option(option_name, action="callback",
1446         callback=_compat_shim_callback,
1447         help="Ignored, for old-run-webkit-tests compat only.", **kwargs)
1448
1449
1450 def parse_args(args=None):
1451     """Provides a default set of command line args.
1452
1453     Returns a tuple of options, args from optparse"""
1454
1455     # FIXME: All of these options should be stored closer to the code which
1456     # FIXME: actually uses them. configuration_options should move
1457     # FIXME: to WebKitPort and be shared across all scripts.
1458     configuration_options = [
1459         optparse.make_option("-t", "--target", dest="configuration",
1460                              help="(DEPRECATED)"),
1461         # FIXME: --help should display which configuration is default.
1462         optparse.make_option('--debug', action='store_const', const='Debug',
1463                              dest="configuration",
1464                              help='Set the configuration to Debug'),
1465         optparse.make_option('--release', action='store_const',
1466                              const='Release', dest="configuration",
1467                              help='Set the configuration to Release'),
1468         # old-run-webkit-tests also accepts -c, --configuration CONFIGURATION.
1469     ]
1470
1471     print_options = printing.print_options()
1472
1473     # FIXME: These options should move onto the ChromiumPort.
1474     chromium_options = [
1475         optparse.make_option("--chromium", action="store_true", default=False,
1476             help="use the Chromium port"),
1477         optparse.make_option("--startup-dialog", action="store_true",
1478             default=False, help="create a dialog on DumpRenderTree startup"),
1479         optparse.make_option("--gp-fault-error-box", action="store_true",
1480             default=False, help="enable Windows GP fault error box"),
1481         optparse.make_option("--nocheck-sys-deps", action="store_true",
1482             default=False,
1483             help="Don't check the system dependencies (themes)"),
1484         optparse.make_option("--use-drt", action="store_true",
1485             default=False,
1486             help="Use DumpRenderTree instead of test_shell"),
1487     ]
1488
1489     # Missing Mac-specific old-run-webkit-tests options:
1490     # FIXME: Need: -g, --guard for guard malloc support on Mac.
1491     # FIXME: Need: -l --leaks    Enable leaks checking.
1492     # FIXME: Need: --sample-on-timeout Run sample on timeout
1493
1494     old_run_webkit_tests_compat = [
1495         # NRWT doesn't generate results by default anyway.
1496         _compat_shim_option("--no-new-test-results"),
1497         # NRWT doesn't sample on timeout yet anyway.
1498         _compat_shim_option("--no-sample-on-timeout"),
1499         # FIXME: NRWT needs to support remote links eventually.
1500         _compat_shim_option("--use-remote-links-to-tests"),
1501         # FIXME: NRWT doesn't need this option as much since failures are
1502         # designed to be cheap.  We eventually plan to add this support.
1503         _compat_shim_option("--exit-after-n-failures", nargs=1, type="int"),
1504     ]
1505
1506     results_options = [
1507         # NEED for bots: --use-remote-links-to-tests Link to test files
1508         # within the SVN repository in the results.
1509         optparse.make_option("-p", "--pixel-tests", action="store_true",
1510             dest="pixel_tests", help="Enable pixel-to-pixel PNG comparisons"),
1511         optparse.make_option("--no-pixel-tests", action="store_false",
1512             dest="pixel_tests", help="Disable pixel-to-pixel PNG comparisons"),
1513         # old-run-webkit-tests allows a specific tolerance: --tolerance t
1514         # Ignore image differences less than this percentage (default: 0.1)
1515         optparse.make_option("--results-directory",
1516             default="layout-test-results",
1517             help="Output results directory source dir, relative to Debug or "
1518                  "Release"),
1519         optparse.make_option("--new-baseline", action="store_true",
1520             default=False, help="Save all generated results as new baselines "
1521                  "into the platform directory, overwriting whatever's "
1522                  "already there."),
1523         optparse.make_option("--reset-results", action="store_true",
1524             default=False, help="Reset any existing baselines to the "
1525                  "generated results"),
1526         optparse.make_option("--no-show-results", action="store_false",
1527             default=True, dest="show_results",
1528             help="Don't launch a browser with results after the tests "
1529                  "are done"),
1530         # FIXME: We should have a helper function to do this sort of
1531         # deprectated mapping and automatically log, etc.
1532         optparse.make_option("--noshow-results", action="store_false",
1533             dest="show_results",
1534             help="Deprecated, same as --no-show-results."),
1535         optparse.make_option("--no-launch-safari", action="store_false",
1536             dest="show_results",
1537             help="old-run-webkit-tests compat, same as --noshow-results."),
1538         # old-run-webkit-tests:
1539         # --[no-]launch-safari    Launch (or do not launch) Safari to display
1540         #                         test results (default: launch)
1541         optparse.make_option("--full-results-html", action="store_true",
1542             default=False,
1543             help="Show all failures in results.html, rather than only "
1544                  "regressions"),
1545         optparse.make_option("--clobber-old-results", action="store_true",
1546             default=False, help="Clobbers test results from previous runs."),
1547         optparse.make_option("--platform",
1548             help="Override the platform for expected results"),
1549         optparse.make_option("--no-record-results", action="store_false",
1550             default=True, dest="record_results",
1551             help="Don't record the results."),
1552         # old-run-webkit-tests also has HTTP toggle options:
1553         # --[no-]http                     Run (or do not run) http tests
1554         #                                 (default: run)
1555         # --[no-]wait-for-httpd           Wait for httpd if some other test
1556         #                                 session is using it already (same
1557         #                                 as WEBKIT_WAIT_FOR_HTTPD=1).
1558         #                                 (default: 0)
1559     ]
1560
1561     test_options = [
1562         optparse.make_option("--build", dest="build",
1563             action="store_true", default=True,
1564             help="Check to ensure the DumpRenderTree build is up-to-date "
1565                  "(default)."),
1566         optparse.make_option("--no-build", dest="build",
1567             action="store_false", help="Don't check to see if the "
1568                                        "DumpRenderTree build is up-to-date."),
1569         # old-run-webkit-tests has --valgrind instead of wrapper.
1570         optparse.make_option("--wrapper",
1571             help="wrapper command to insert before invocations of "
1572                  "DumpRenderTree; option is split on whitespace before "
1573                  "running. (Example: --wrapper='valgrind --smc-check=all')"),
1574         # old-run-webkit-tests:
1575         # -i|--ignore-tests               Comma-separated list of directories
1576         #                                 or tests to ignore
1577         optparse.make_option("--test-list", action="append",
1578             help="read list of tests to run from file", metavar="FILE"),
1579         # old-run-webkit-tests uses --skipped==[default|ignore|only]
1580         # instead of --force:
1581         optparse.make_option("--force", action="store_true", default=False,
1582             help="Run all tests, even those marked SKIP in the test list"),
1583         optparse.make_option("--use-apache", action="store_true",
1584             default=False, help="Whether to use apache instead of lighttpd."),
1585         optparse.make_option("--time-out-ms",
1586             help="Set the timeout for each test"),
1587         # old-run-webkit-tests calls --randomize-order --random:
1588         optparse.make_option("--randomize-order", action="store_true",
1589             default=False, help=("Run tests in random order (useful "
1590                                 "for tracking down corruption)")),
1591         optparse.make_option("--run-chunk",
1592             help=("Run a specified chunk (n:l), the nth of len l, "
1593                  "of the layout tests")),
1594         optparse.make_option("--run-part", help=("Run a specified part (n:m), "
1595                   "the nth of m parts, of the layout tests")),
1596         # old-run-webkit-tests calls --batch-size: --nthly n
1597         #   Restart DumpRenderTree every n tests (default: 1000)
1598         optparse.make_option("--batch-size",
1599             help=("Run a the tests in batches (n), after every n tests, "
1600                   "DumpRenderTree is relaunched.")),
1601         # old-run-webkit-tests calls --run-singly: -1|--singly
1602         # Isolate each test case run (implies --nthly 1 --verbose)
1603         optparse.make_option("--run-singly", action="store_true",
1604             default=False, help="run a separate DumpRenderTree for each test"),
1605         optparse.make_option("--child-processes",
1606             help="Number of DumpRenderTrees to run in parallel."),
1607         # FIXME: Display default number of child processes that will run.
1608         optparse.make_option("--experimental-fully-parallel",
1609             action="store_true", default=False,
1610             help="run all tests in parallel"),
1611         # FIXME: Need --exit-after-n-failures N
1612         #      Exit after the first N failures instead of running all tests
1613         # FIXME: Need --exit-after-n-crashes N
1614         #      Exit after the first N crashes instead of running all tests
1615         # FIXME: consider: --iterations n
1616         #      Number of times to run the set of tests (e.g. ABCABCABC)
1617         optparse.make_option("--print-last-failures", action="store_true",
1618             default=False, help="Print the tests in the last run that "
1619             "had unexpected failures (or passes)."),
1620         optparse.make_option("--retest-last-failures", action="store_true",
1621             default=False, help="re-test the tests in the last run that "
1622             "had unexpected failures (or passes)."),
1623         optparse.make_option("--retry-failures", action="store_true",
1624             default=True,
1625             help="Re-try any tests that produce unexpected results (default)"),
1626         optparse.make_option("--no-retry-failures", action="store_false",
1627             dest="retry_failures",
1628             help="Don't re-try any tests that produce unexpected results."),
1629     ]
1630
1631     misc_options = [
1632         optparse.make_option("--lint-test-files", action="store_true",
1633         default=False, help=("Makes sure the test files parse for all "
1634                             "configurations. Does not run any tests.")),
1635     ]
1636
1637     # FIXME: Move these into json_results_generator.py
1638     results_json_options = [
1639         optparse.make_option("--builder-name", default="DUMMY_BUILDER_NAME",
1640             help=("The name of the builder shown on the waterfall running "
1641                   "this script e.g. WebKit.")),
1642         optparse.make_option("--build-name", default="DUMMY_BUILD_NAME",
1643             help=("The name of the builder used in its path, e.g. "
1644                   "webkit-rel.")),
1645         optparse.make_option("--build-number", default="DUMMY_BUILD_NUMBER",
1646             help=("The build number of the builder running this script.")),
1647         optparse.make_option("--test-results-server", default="",
1648             help=("If specified, upload results json files to this appengine "
1649                   "server.")),
1650         optparse.make_option("--upload-full-results",
1651             action="store_true",
1652             default=False,
1653             help="If true, upload full json results to server."),
1654     ]
1655
1656     option_list = (configuration_options + print_options +
1657                    chromium_options + results_options + test_options +
1658                    misc_options + results_json_options +
1659                    old_run_webkit_tests_compat)
1660     option_parser = optparse.OptionParser(option_list=option_list)
1661
1662     options, args = option_parser.parse_args(args)
1663
1664     return options, args
1665
1666
1667 def _find_thread_stack(id):
1668     """Returns a stack object that can be used to dump a stack trace for
1669     the given thread id (or None if the id is not found)."""
1670     for thread_id, stack in sys._current_frames().items():
1671         if thread_id == id:
1672             return stack
1673     return None
1674
1675
1676 def _log_stack(stack):
1677     """Log a stack trace to log.error()."""
1678     for filename, lineno, name, line in traceback.extract_stack(stack):
1679         _log.error('File: "%s", line %d, in %s' % (filename, lineno, name))
1680         if line:
1681             _log.error('  %s' % line.strip())
1682
1683
1684 def _log_wedged_thread(thread):
1685     """Log information about the given thread state."""
1686     id = thread.id()
1687     stack = _find_thread_stack(id)
1688     assert(stack is not None)
1689     _log.error("")
1690     _log.error("thread %s (%d) is wedged" % (thread.getName(), id))
1691     _log_stack(stack)
1692     _log.error("")
1693
1694
1695 def main():
1696     options, args = parse_args()
1697     port_obj = port.get(options.platform, options)
1698     return run(port_obj, options, args)
1699
1700 if '__main__' == __name__:
1701     try:
1702         sys.exit(main())
1703     except KeyboardInterrupt:
1704         # this mirrors what the shell normally does
1705         sys.exit(signal.SIGINT + 128)