WebKitTools/Scripts/webkitpy/layout_tests/run_webkit_tests.py

   1 #!/usr/bin/env python
   2 # Copyright (C) 2010 Google Inc. All rights reserved.
   3 #
   4 # Redistribution and use in source and binary forms, with or without
   5 # modification, are permitted provided that the following conditions are
   6 # met:
   7 #
   8 #     * Redistributions of source code must retain the above copyright
   9 # notice, this list of conditions and the following disclaimer.
  10 #     * Redistributions in binary form must reproduce the above
  11 # copyright notice, this list of conditions and the following disclaimer
  12 # in the documentation and/or other materials provided with the
  13 # distribution.
  14 #     * Neither the name of Google Inc. nor the names of its
  15 # contributors may be used to endorse or promote products derived from
  16 # this software without specific prior written permission.
  17 #
  18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30 """Run layout tests.
  31
  32 This is a port of the existing webkit test script run-webkit-tests.
  33
  34 The TestRunner class runs a series of tests (TestType interface) against a set
  35 of test files.  If a test file fails a TestType, it returns a list TestFailure
  36 objects to the TestRunner.  The TestRunner then aggregates the TestFailures to
  37 create a final report.
  38
  39 This script reads several files, if they exist in the test_lists subdirectory
  40 next to this script itself.  Each should contain a list of paths to individual
  41 tests or entire subdirectories of tests, relative to the outermost test
  42 directory.  Entire lines starting with '//' (comments) will be ignored.
  43
  44 For details of the files' contents and purposes, see test_lists/README.
  45 """
  46
  47 from __future__ import with_statement
  48
  49 import codecs
  50 import errno
  51 import glob
  52 import logging
  53 import math
  54 import optparse
  55 import os
  56 import platform
  57 import Queue
  58 import random
  59 import re
  60 import shutil
  61 import signal
  62 import sys
  63 import time
  64 import traceback
  65
  66 from layout_package import dump_render_tree_thread
  67 from layout_package import json_layout_results_generator
  68 from layout_package import printing
  69 from layout_package import test_expectations
  70 from layout_package import test_failures
  71 from layout_package import test_results_uploader
  72 from test_types import image_diff
  73 from test_types import text_diff
  74 from test_types import test_type_base
  75
  76 from webkitpy.common.system import user
  77 from webkitpy.thirdparty import simplejson
  78
  79 import port
  80
  81 _log = logging.getLogger("webkitpy.layout_tests.run_webkit_tests")
  82
  83 # Builder base URL where we have the archived test results.
  84 BUILDER_BASE_URL = "http://build.chromium.org/buildbot/layout_test_results/"
  85
  86 TestExpectationsFile = test_expectations.TestExpectationsFile
  87
  88
  89 class TestInfo:
  90     """Groups information about a test for easy passing of data."""
  91
  92     def __init__(self, port, filename, timeout):
  93         """Generates the URI and stores the filename and timeout for this test.
  94         Args:
  95           filename: Full path to the test.
  96           timeout: Timeout for running the test in TestShell.
  97           """
  98         self.filename = filename
  99         self._port = port
 100         self.uri = port.filename_to_uri(filename)
 101         self.timeout = timeout
 102         self._image_checksum = -1
 103
 104     def image_hash(self):
 105         # Read the image_hash lazily to reduce startup time.
 106         # This class is accessed across threads, but only one thread should
 107         # ever be dealing with any given TestInfo so no locking is needed.
 108         #
 109         # Note that we use -1 to indicate that we haven't read the value,
 110         # because expected_checksum() returns a string or None.
 111         if self._image_checksum == -1:
 112             self._image_checksum = self._port.expected_checksum(self.filename)
 113         return self._image_checksum
 114
 115
 116 class ResultSummary(object):
 117     """A class for partitioning the test results we get into buckets.
 118
 119     This class is basically a glorified struct and it's private to this file
 120     so we don't bother with any information hiding."""
 121
 122     def __init__(self, expectations, test_files):
 123         self.total = len(test_files)
 124         self.remaining = self.total
 125         self.expectations = expectations
 126         self.expected = 0
 127         self.unexpected = 0
 128         self.tests_by_expectation = {}
 129         self.tests_by_timeline = {}
 130         self.results = {}
 131         self.unexpected_results = {}
 132         self.failures = {}
 133         self.tests_by_expectation[test_expectations.SKIP] = set()
 134         for expectation in TestExpectationsFile.EXPECTATIONS.values():
 135             self.tests_by_expectation[expectation] = set()
 136         for timeline in TestExpectationsFile.TIMELINES.values():
 137             self.tests_by_timeline[timeline] = (
 138                 expectations.get_tests_with_timeline(timeline))
 139
 140     def add(self, result, expected):
 141         """Add a TestResult into the appropriate bin.
 142
 143         Args:
 144           result: TestResult from dump_render_tree_thread.
 145           expected: whether the result was what we expected it to be.
 146         """
 147
 148         self.tests_by_expectation[result.type].add(result.filename)
 149         self.results[result.filename] = result
 150         self.remaining -= 1
 151         if len(result.failures):
 152             self.failures[result.filename] = result.failures
 153         if expected:
 154             self.expected += 1
 155         else:
 156             self.unexpected_results[result.filename] = result.type
 157             self.unexpected += 1
 158
 159
 160 def summarize_unexpected_results(port_obj, expectations, result_summary,
 161                                  retry_summary):
 162     """Summarize any unexpected results as a dict.
 163
 164     FIXME: split this data structure into a separate class?
 165
 166     Args:
 167         port_obj: interface to port-specific hooks
 168         expectations: test_expectations.TestExpectations object
 169         result_summary: summary object from initial test runs
 170         retry_summary: summary object from final test run of retried tests
 171     Returns:
 172         A dictionary containing a summary of the unexpected results from the
 173         run, with the following fields:
 174         'version': a version indicator (1 in this version)
 175         'fixable': # of fixable tests (NOW - PASS)
 176         'skipped': # of skipped tests (NOW & SKIPPED)
 177         'num_regressions': # of non-flaky failures
 178         'num_flaky': # of flaky failures
 179         'num_passes': # of unexpected passes
 180         'tests': a dict of tests -> {'expected': '...', 'actual': '...'}
 181     """
 182     results = {}
 183     results['version'] = 1
 184
 185     tbe = result_summary.tests_by_expectation
 186     tbt = result_summary.tests_by_timeline
 187     results['fixable'] = len(tbt[test_expectations.NOW] -
 188                                 tbe[test_expectations.PASS])
 189     results['skipped'] = len(tbt[test_expectations.NOW] &
 190                                 tbe[test_expectations.SKIP])
 191
 192     num_passes = 0
 193     num_flaky = 0
 194     num_regressions = 0
 195     keywords = {}
 196     for k, v in TestExpectationsFile.EXPECTATIONS.iteritems():
 197         keywords[v] = k.upper()
 198
 199     tests = {}
 200     for filename, result in result_summary.unexpected_results.iteritems():
 201         # Note that if a test crashed in the original run, we ignore
 202         # whether or not it crashed when we retried it (if we retried it),
 203         # and always consider the result not flaky.
 204         test = port_obj.relative_test_filename(filename)
 205         expected = expectations.get_expectations_string(filename)
 206         actual = [keywords[result]]
 207
 208         if result == test_expectations.PASS:
 209             num_passes += 1
 210         elif result == test_expectations.CRASH:
 211             num_regressions += 1
 212         else:
 213             if filename not in retry_summary.unexpected_results:
 214                 actual.extend(expectations.get_expectations_string(
 215                     filename).split(" "))
 216                 num_flaky += 1
 217             else:
 218                 retry_result = retry_summary.unexpected_results[filename]
 219                 if result != retry_result:
 220                     actual.append(keywords[retry_result])
 221                     num_flaky += 1
 222                 else:
 223                     num_regressions += 1
 224
 225         tests[test] = {}
 226         tests[test]['expected'] = expected
 227         tests[test]['actual'] = " ".join(actual)
 228
 229     results['tests'] = tests
 230     results['num_passes'] = num_passes
 231     results['num_flaky'] = num_flaky
 232     results['num_regressions'] = num_regressions
 233
 234     return results
 235
 236
 237 class TestRunner:
 238     """A class for managing running a series of tests on a series of layout
 239     test files."""
 240
 241     HTTP_SUBDIR = os.sep.join(['', 'http', ''])
 242     WEBSOCKET_SUBDIR = os.sep.join(['', 'websocket', ''])
 243
 244     # The per-test timeout in milliseconds, if no --time-out-ms option was
 245     # given to run_webkit_tests. This should correspond to the default timeout
 246     # in DumpRenderTree.
 247     DEFAULT_TEST_TIMEOUT_MS = 6 * 1000
 248
 249     def __init__(self, port, options, printer):
 250         """Initialize test runner data structures.
 251
 252         Args:
 253           port: an object implementing port-specific
 254           options: a dictionary of command line options
 255           printer: a Printer object to record updates to.
 256         """
 257         self._port = port
 258         self._options = options
 259         self._printer = printer
 260
 261         # disable wss server. need to install pyOpenSSL on buildbots.
 262         # self._websocket_secure_server = websocket_server.PyWebSocket(
 263         #        options.results_directory, use_tls=True, port=9323)
 264
 265         # a list of TestType objects
 266         self._test_types = [text_diff.TestTextDiff]
 267         if options.pixel_tests:
 268             self._test_types.append(image_diff.ImageDiff)
 269
 270         # a set of test files, and the same tests as a list
 271         self._test_files = set()
 272         self._test_files_list = None
 273         self._result_queue = Queue.Queue()
 274         self._retrying = False
 275
 276     def collect_tests(self, args, last_unexpected_results):
 277         """Find all the files to test.
 278
 279         Args:
 280           args: list of test arguments from the command line
 281           last_unexpected_results: list of unexpected results to retest, if any
 282
 283         """
 284         paths = [arg for arg in args if arg and arg != '']
 285         paths += last_unexpected_results
 286         if self._options.test_list:
 287             paths += read_test_files(self._options.test_list)
 288         self._test_files = self._port.tests(paths)
 289
 290     def lint(self):
 291         # Creating the expecations for each platform/configuration pair does
 292         # all the test list parsing and ensures it's correct syntax (e.g. no
 293         # dupes).
 294         for platform_name in self._port.test_platform_names():
 295             self.parse_expectations(platform_name, is_debug_mode=True)
 296             self.parse_expectations(platform_name, is_debug_mode=False)
 297         self._printer.write("")
 298         _log.info("If there are no fail messages, errors or exceptions, "
 299                   "then the lint succeeded.")
 300         return 0
 301
 302     def parse_expectations(self, test_platform_name, is_debug_mode):
 303         """Parse the expectations from the test_list files and return a data
 304         structure holding them. Throws an error if the test_list files have
 305         invalid syntax."""
 306         if self._options.lint_test_files:
 307             test_files = None
 308         else:
 309             test_files = self._test_files
 310
 311         try:
 312             expectations_str = self._port.test_expectations()
 313             overrides_str = self._port.test_expectations_overrides()
 314             self._expectations = test_expectations.TestExpectations(
 315                 self._port, test_files, expectations_str, test_platform_name,
 316                 is_debug_mode, self._options.lint_test_files,
 317                 overrides=overrides_str)
 318             return self._expectations
 319         except SyntaxError, err:
 320             if self._options.lint_test_files:
 321                 print str(err)
 322             else:
 323                 raise err
 324
 325     def prepare_lists_and_print_output(self):
 326         """Create appropriate subsets of test lists and returns a
 327         ResultSummary object. Also prints expected test counts.
 328         """
 329
 330         # Remove skipped - both fixable and ignored - files from the
 331         # top-level list of files to test.
 332         num_all_test_files = len(self._test_files)
 333         self._printer.print_expected("Found:  %d tests" %
 334                                      (len(self._test_files)))
 335         if not num_all_test_files:
 336             _log.critical('No tests to run.')
 337             return None
 338
 339         skipped = set()
 340         if num_all_test_files > 1 and not self._options.force:
 341             skipped = self._expectations.get_tests_with_result_type(
 342                            test_expectations.SKIP)
 343             self._test_files -= skipped
 344
 345         # Create a sorted list of test files so the subset chunk,
 346         # if used, contains alphabetically consecutive tests.
 347         self._test_files_list = list(self._test_files)
 348         if self._options.randomize_order:
 349             random.shuffle(self._test_files_list)
 350         else:
 351             self._test_files_list.sort()
 352
 353         # If the user specifies they just want to run a subset of the tests,
 354         # just grab a subset of the non-skipped tests.
 355         if self._options.run_chunk or self._options.run_part:
 356             chunk_value = self._options.run_chunk or self._options.run_part
 357             test_files = self._test_files_list
 358             try:
 359                 (chunk_num, chunk_len) = chunk_value.split(":")
 360                 chunk_num = int(chunk_num)
 361                 assert(chunk_num >= 0)
 362                 test_size = int(chunk_len)
 363                 assert(test_size > 0)
 364             except:
 365                 _log.critical("invalid chunk '%s'" % chunk_value)
 366                 return None
 367
 368             # Get the number of tests
 369             num_tests = len(test_files)
 370
 371             # Get the start offset of the slice.
 372             if self._options.run_chunk:
 373                 chunk_len = test_size
 374                 # In this case chunk_num can be really large. We need
 375                 # to make the slave fit in the current number of tests.
 376                 slice_start = (chunk_num * chunk_len) % num_tests
 377             else:
 378                 # Validate the data.
 379                 assert(test_size <= num_tests)
 380                 assert(chunk_num <= test_size)
 381
 382                 # To count the chunk_len, and make sure we don't skip
 383                 # some tests, we round to the next value that fits exactly
 384                 # all the parts.
 385                 rounded_tests = num_tests
 386                 if rounded_tests % test_size != 0:
 387                     rounded_tests = (num_tests + test_size -
 388                                      (num_tests % test_size))
 389
 390                 chunk_len = rounded_tests / test_size
 391                 slice_start = chunk_len * (chunk_num - 1)
 392                 # It does not mind if we go over test_size.
 393
 394             # Get the end offset of the slice.
 395             slice_end = min(num_tests, slice_start + chunk_len)
 396
 397             files = test_files[slice_start:slice_end]
 398
 399             tests_run_msg = 'Running: %d tests (chunk slice [%d:%d] of %d)' % (
 400                 (slice_end - slice_start), slice_start, slice_end, num_tests)
 401             self._printer.print_expected(tests_run_msg)
 402
 403             # If we reached the end and we don't have enough tests, we run some
 404             # from the beginning.
 405             if (self._options.run_chunk and
 406                 (slice_end - slice_start < chunk_len)):
 407                 extra = 1 + chunk_len - (slice_end - slice_start)
 408                 extra_msg = ('   last chunk is partial, appending [0:%d]' %
 409                             extra)
 410                 self._printer.print_expected(extra_msg)
 411                 tests_run_msg += "\n" + extra_msg
 412                 files.extend(test_files[0:extra])
 413             tests_run_filename = os.path.join(self._options.results_directory,
 414                                               "tests_run.txt")
 415             with codecs.open(tests_run_filename, "w", "utf-8") as file:
 416                 file.write(tests_run_msg + "\n")
 417
 418             len_skip_chunk = int(len(files) * len(skipped) /
 419                                  float(len(self._test_files)))
 420             skip_chunk_list = list(skipped)[0:len_skip_chunk]
 421             skip_chunk = set(skip_chunk_list)
 422
 423             # Update expectations so that the stats are calculated correctly.
 424             # We need to pass a list that includes the right # of skipped files
 425             # to ParseExpectations so that ResultSummary() will get the correct
 426             # stats. So, we add in the subset of skipped files, and then
 427             # subtract them back out.
 428             self._test_files_list = files + skip_chunk_list
 429             self._test_files = set(self._test_files_list)
 430
 431             self._expectations = self.parse_expectations(
 432                 self._port.test_platform_name(),
 433                 self._options.configuration == 'Debug')
 434
 435             self._test_files = set(files)
 436             self._test_files_list = files
 437         else:
 438             skip_chunk = skipped
 439
 440         result_summary = ResultSummary(self._expectations,
 441             self._test_files | skip_chunk)
 442         self._print_expected_results_of_type(result_summary,
 443             test_expectations.PASS, "passes")
 444         self._print_expected_results_of_type(result_summary,
 445             test_expectations.FAIL, "failures")
 446         self._print_expected_results_of_type(result_summary,
 447             test_expectations.FLAKY, "flaky")
 448         self._print_expected_results_of_type(result_summary,
 449             test_expectations.SKIP, "skipped")
 450
 451         if self._options.force:
 452             self._printer.print_expected('Running all tests, including '
 453                                          'skips (--force)')
 454         else:
 455             # Note that we don't actually run the skipped tests (they were
 456             # subtracted out of self._test_files, above), but we stub out the
 457             # results here so the statistics can remain accurate.
 458             for test in skip_chunk:
 459                 result = dump_render_tree_thread.TestResult(test,
 460                     failures=[], test_run_time=0, total_time_for_all_diffs=0,
 461                     time_for_diffs=0)
 462                 result.type = test_expectations.SKIP
 463                 result_summary.add(result, expected=True)
 464         self._printer.print_expected('')
 465
 466         return result_summary
 467
 468     def _get_dir_for_test_file(self, test_file):
 469         """Returns the highest-level directory by which to shard the given
 470         test file."""
 471         index = test_file.rfind(os.sep + 'LayoutTests' + os.sep)
 472
 473         test_file = test_file[index + len('LayoutTests/'):]
 474         test_file_parts = test_file.split(os.sep, 1)
 475         directory = test_file_parts[0]
 476         test_file = test_file_parts[1]
 477
 478         # The http tests are very stable on mac/linux.
 479         # TODO(ojan): Make the http server on Windows be apache so we can
 480         # turn shard the http tests there as well. Switching to apache is
 481         # what made them stable on linux/mac.
 482         return_value = directory
 483         while ((directory != 'http' or sys.platform in ('darwin', 'linux2'))
 484                 and test_file.find(os.sep) >= 0):
 485             test_file_parts = test_file.split(os.sep, 1)
 486             directory = test_file_parts[0]
 487             return_value = os.path.join(return_value, directory)
 488             test_file = test_file_parts[1]
 489
 490         return return_value
 491
 492     def _get_test_info_for_file(self, test_file):
 493         """Returns the appropriate TestInfo object for the file. Mostly this
 494         is used for looking up the timeout value (in ms) to use for the given
 495         test."""
 496         if self._expectations.has_modifier(test_file, test_expectations.SLOW):
 497             return TestInfo(self._port, test_file,
 498                             self._options.slow_time_out_ms)
 499         return TestInfo(self._port, test_file, self._options.time_out_ms)
 500
 501     def _get_test_file_queue(self, test_files):
 502         """Create the thread safe queue of lists of (test filenames, test URIs)
 503         tuples. Each TestShellThread pulls a list from this queue and runs
 504         those tests in order before grabbing the next available list.
 505
 506         Shard the lists by directory. This helps ensure that tests that depend
 507         on each other (aka bad tests!) continue to run together as most
 508         cross-tests dependencies tend to occur within the same directory.
 509
 510         Return:
 511           The Queue of lists of TestInfo objects.
 512         """
 513
 514         if (self._options.experimental_fully_parallel or
 515             self._is_single_threaded()):
 516             filename_queue = Queue.Queue()
 517             for test_file in test_files:
 518                 filename_queue.put(
 519                     ('.', [self._get_test_info_for_file(test_file)]))
 520             return filename_queue
 521
 522         tests_by_dir = {}
 523         for test_file in test_files:
 524             directory = self._get_dir_for_test_file(test_file)
 525             tests_by_dir.setdefault(directory, [])
 526             tests_by_dir[directory].append(
 527                 self._get_test_info_for_file(test_file))
 528
 529         # Sort by the number of tests in the dir so that the ones with the
 530         # most tests get run first in order to maximize parallelization.
 531         # Number of tests is a good enough, but not perfect, approximation
 532         # of how long that set of tests will take to run. We can't just use
 533         # a PriorityQueue until we move # to Python 2.6.
 534         test_lists = []
 535         http_tests = None
 536         for directory in tests_by_dir:
 537             test_list = tests_by_dir[directory]
 538             # Keep the tests in alphabetical order.
 539             # TODO: Remove once tests are fixed so they can be run in any
 540             # order.
 541             test_list.reverse()
 542             test_list_tuple = (directory, test_list)
 543             if directory == 'LayoutTests' + os.sep + 'http':
 544                 http_tests = test_list_tuple
 545             else:
 546                 test_lists.append(test_list_tuple)
 547         test_lists.sort(lambda a, b: cmp(len(b[1]), len(a[1])))
 548
 549         # Put the http tests first. There are only a couple hundred of them,
 550         # but each http test takes a very long time to run, so sorting by the
 551         # number of tests doesn't accurately capture how long they take to run.
 552         if http_tests:
 553             test_lists.insert(0, http_tests)
 554
 555         filename_queue = Queue.Queue()
 556         for item in test_lists:
 557             filename_queue.put(item)
 558         return filename_queue
 559
 560     def _get_test_args(self, index):
 561         """Returns the tuple of arguments for tests and for DumpRenderTree."""
 562         test_args = test_type_base.TestArguments()
 563         test_args.png_path = None
 564         if self._options.pixel_tests:
 565             png_path = os.path.join(self._options.results_directory,
 566                                     "png_result%s.png" % index)
 567             test_args.png_path = png_path
 568         test_args.new_baseline = self._options.new_baseline
 569         test_args.reset_results = self._options.reset_results
 570
 571         return test_args
 572
 573     def _contains_tests(self, subdir):
 574         for test_file in self._test_files:
 575             if test_file.find(subdir) >= 0:
 576                 return True
 577         return False
 578
 579     def _instantiate_dump_render_tree_threads(self, test_files,
 580                                               result_summary):
 581         """Instantitates and starts the TestShellThread(s).
 582
 583         Return:
 584           The list of threads.
 585         """
 586         filename_queue = self._get_test_file_queue(test_files)
 587
 588         # Instantiate TestShellThreads and start them.
 589         threads = []
 590         for i in xrange(int(self._options.child_processes)):
 591             # Create separate TestTypes instances for each thread.
 592             test_types = []
 593             for test_type in self._test_types:
 594                 test_types.append(test_type(self._port,
 595                                     self._options.results_directory))
 596
 597             test_args = self._get_test_args(i)
 598             thread = dump_render_tree_thread.TestShellThread(self._port,
 599                 self._options, filename_queue, self._result_queue,
 600                 test_types, test_args)
 601             if self._is_single_threaded():
 602                 thread.run_in_main_thread(self, result_summary)
 603             else:
 604                 thread.start()
 605             threads.append(thread)
 606
 607         return threads
 608
 609     def _is_single_threaded(self):
 610         """Returns whether we should run all the tests in the main thread."""
 611         return int(self._options.child_processes) == 1
 612
 613     def _run_tests(self, file_list, result_summary):
 614         """Runs the tests in the file_list.
 615
 616         Return: A tuple (keyboard_interrupted, thread_timings, test_timings,
 617             individual_test_timings)
 618             keyboard_interrupted is whether someone typed Ctrl^C
 619             thread_timings is a list of dicts with the total runtime
 620               of each thread with 'name', 'num_tests', 'total_time' properties
 621             test_timings is a list of timings for each sharded subdirectory
 622               of the form [time, directory_name, num_tests]
 623             individual_test_timings is a list of run times for each test
 624               in the form {filename:filename, test_run_time:test_run_time}
 625             result_summary: summary object to populate with the results
 626         """
 627         # FIXME: We should use webkitpy.tool.grammar.pluralize here.
 628         plural = ""
 629         if not self._is_single_threaded():
 630             plural = "s"
 631         self._printer.print_update('Starting %s%s ...' %
 632                                    (self._port.driver_name(), plural))
 633         threads = self._instantiate_dump_render_tree_threads(file_list,
 634                                                              result_summary)
 635         self._printer.print_update("Starting testing ...")
 636
 637         keyboard_interrupted = self._wait_for_threads_to_finish(threads,
 638                                                                 result_summary)
 639         (thread_timings, test_timings, individual_test_timings) = \
 640             self._collect_timing_info(threads)
 641
 642         return (keyboard_interrupted, thread_timings, test_timings,
 643                 individual_test_timings)
 644
 645     def _wait_for_threads_to_finish(self, threads, result_summary):
 646         keyboard_interrupted = False
 647         try:
 648             # Loop through all the threads waiting for them to finish.
 649             some_thread_is_alive = True
 650             while some_thread_is_alive:
 651                 some_thread_is_alive = False
 652                 t = time.time()
 653                 for thread in threads:
 654                     exception_info = thread.exception_info()
 655                     if exception_info is not None:
 656                         # Re-raise the thread's exception here to make it
 657                         # clear that testing was aborted. Otherwise,
 658                         # the tests that did not run would be assumed
 659                         # to have passed.
 660                         raise exception_info[0], exception_info[1], exception_info[2]
 661
 662                     if thread.isAlive():
 663                         some_thread_is_alive = True
 664                         next_timeout = thread.next_timeout()
 665                         if (next_timeout and t > next_timeout):
 666                             _log_wedged_thread(thread)
 667                             thread.clear_next_timeout()
 668
 669                 self.update_summary(result_summary)
 670
 671                 if some_thread_is_alive:
 672                     time.sleep(0.01)
 673
 674         except KeyboardInterrupt:
 675             keyboard_interrupted = True
 676             for thread in threads:
 677                 thread.cancel()
 678
 679         return keyboard_interrupted
 680
 681     def _collect_timing_info(self, threads):
 682         test_timings = {}
 683         individual_test_timings = []
 684         thread_timings = []
 685
 686         for thread in threads:
 687             thread_timings.append({'name': thread.getName(),
 688                                    'num_tests': thread.get_num_tests(),
 689                                    'total_time': thread.get_total_time()})
 690             test_timings.update(thread.get_directory_timing_stats())
 691             individual_test_timings.extend(thread.get_test_results())
 692
 693         return (thread_timings, test_timings, individual_test_timings)
 694
 695     def needs_http(self):
 696         """Returns whether the test runner needs an HTTP server."""
 697         return self._contains_tests(self.HTTP_SUBDIR)
 698
 699     def set_up_run(self):
 700         """Configures the system to be ready to run tests.
 701
 702         Returns a ResultSummary object if we should continue to run tests,
 703         or None if we should abort.
 704
 705         """
 706         # This must be started before we check the system dependencies,
 707         # since the helper may do things to make the setup correct.
 708         self._printer.print_update("Starting helper ...")
 709         self._port.start_helper()
 710
 711         # Check that the system dependencies (themes, fonts, ...) are correct.
 712         if not self._options.nocheck_sys_deps:
 713             self._printer.print_update("Checking system dependencies ...")
 714             if not self._port.check_sys_deps(self.needs_http()):
 715                 self._port.stop_helper()
 716                 return None
 717
 718         if self._options.clobber_old_results:
 719             self._clobber_old_results()
 720
 721         # Create the output directory if it doesn't already exist.
 722         self._port.maybe_make_directory(self._options.results_directory)
 723
 724         self._port.setup_test_run()
 725
 726         self._printer.print_update("Preparing tests ...")
 727         result_summary = self.prepare_lists_and_print_output()
 728         if not result_summary:
 729             return None
 730
 731         if self.needs_http():
 732             self._printer.print_update('Starting HTTP server ...')
 733             self._port.start_http_server()
 734
 735         if self._contains_tests(self.WEBSOCKET_SUBDIR):
 736             self._printer.print_update('Starting WebSocket server ...')
 737             self._port.start_websocket_server()
 738             # self._websocket_secure_server.Start()
 739
 740         return result_summary
 741
 742     def run(self, result_summary):
 743         """Run all our tests on all our test files.
 744
 745         For each test file, we run each test type. If there are any failures,
 746         we collect them for reporting.
 747
 748         Args:
 749           result_summary: a summary object tracking the test results.
 750
 751         Return:
 752           The number of unexpected results (0 == success)
 753         """
 754         # gather_test_files() must have been called first to initialize us.
 755         # If we didn't find any files to test, we've errored out already in
 756         # prepare_lists_and_print_output().
 757         assert(len(self._test_files))
 758
 759         start_time = time.time()
 760
 761         keyboard_interrupted, thread_timings, test_timings, \
 762             individual_test_timings = (
 763             self._run_tests(self._test_files_list, result_summary))
 764
 765         # We exclude the crashes from the list of results to retry, because
 766         # we want to treat even a potentially flaky crash as an error.
 767         failures = self._get_failures(result_summary, include_crashes=False)
 768         retry_summary = result_summary
 769         while (len(failures) and self._options.retry_failures and
 770             not self._retrying and not keyboard_interrupted):
 771             _log.info('')
 772             _log.info("Retrying %d unexpected failure(s) ..." % len(failures))
 773             _log.info('')
 774             self._retrying = True
 775             retry_summary = ResultSummary(self._expectations, failures.keys())
 776             # Note that we intentionally ignore the return value here.
 777             self._run_tests(failures.keys(), retry_summary)
 778             failures = self._get_failures(retry_summary, include_crashes=True)
 779
 780         end_time = time.time()
 781
 782         self._print_timing_statistics(end_time - start_time,
 783                                       thread_timings, test_timings,
 784                                       individual_test_timings,
 785                                       result_summary)
 786
 787         self._print_result_summary(result_summary)
 788
 789         sys.stdout.flush()
 790         sys.stderr.flush()
 791
 792         self._printer.print_one_line_summary(result_summary.total,
 793                                              result_summary.expected,
 794                                              result_summary.unexpected)
 795
 796         unexpected_results = summarize_unexpected_results(self._port,
 797             self._expectations, result_summary, retry_summary)
 798         self._printer.print_unexpected_results(unexpected_results)
 799
 800         if self._options.record_results:
 801             # Write the same data to log files.
 802             self._write_json_files(unexpected_results, result_summary,
 803                                    individual_test_timings)
 804
 805             # Upload generated JSON files to appengine server.
 806             self._upload_json_files()
 807
 808         # Write the summary to disk (results.html) and display it if requested.
 809         wrote_results = self._write_results_html_file(result_summary)
 810         if self._options.show_results and wrote_results:
 811             self._show_results_html_file()
 812
 813         # Now that we've completed all the processing we can, we re-raise
 814         # a KeyboardInterrupt if necessary so the caller can handle it.
 815         if keyboard_interrupted:
 816             raise KeyboardInterrupt
 817
 818         # Ignore flaky failures and unexpected passes so we don't turn the
 819         # bot red for those.
 820         return unexpected_results['num_regressions']
 821
 822     def clean_up_run(self):
 823         """Restores the system after we're done running tests."""
 824
 825         _log.debug("flushing stdout")
 826         sys.stdout.flush()
 827         _log.debug("flushing stderr")
 828         sys.stderr.flush()
 829         _log.debug("stopping http server")
 830         self._port.stop_http_server()
 831         _log.debug("stopping websocket server")
 832         self._port.stop_websocket_server()
 833         _log.debug("stopping helper")
 834         self._port.stop_helper()
 835
 836     def update_summary(self, result_summary):
 837         """Update the summary and print results with any completed tests."""
 838         while True:
 839             try:
 840                 result = self._result_queue.get_nowait()
 841             except Queue.Empty:
 842                 return
 843
 844             expected = self._expectations.matches_an_expected_result(
 845                 result.filename, result.type, self._options.pixel_tests)
 846             result_summary.add(result, expected)
 847             exp_str = self._expectations.get_expectations_string(
 848                 result.filename)
 849             got_str = self._expectations.expectation_to_string(result.type)
 850             self._printer.print_test_result(result, expected, exp_str, got_str)
 851             self._printer.print_progress(result_summary, self._retrying,
 852                                          self._test_files_list)
 853
 854     def _clobber_old_results(self):
 855         # Just clobber the actual test results directories since the other
 856         # files in the results directory are explicitly used for cross-run
 857         # tracking.
 858         self._printer.print_update("Clobbering old results in %s" %
 859                                    self._options.results_directory)
 860         layout_tests_dir = self._port.layout_tests_dir()
 861         possible_dirs = self._port.test_dirs()
 862         for dirname in possible_dirs:
 863             if os.path.isdir(os.path.join(layout_tests_dir, dirname)):
 864                 shutil.rmtree(os.path.join(self._options.results_directory,
 865                                            dirname),
 866                               ignore_errors=True)
 867
 868     def _get_failures(self, result_summary, include_crashes):
 869         """Filters a dict of results and returns only the failures.
 870
 871         Args:
 872           result_summary: the results of the test run
 873           include_crashes: whether crashes are included in the output.
 874             We use False when finding the list of failures to retry
 875             to see if the results were flaky. Although the crashes may also be
 876             flaky, we treat them as if they aren't so that they're not ignored.
 877         Returns:
 878           a dict of files -> results
 879         """
 880         failed_results = {}
 881         for test, result in result_summary.unexpected_results.iteritems():
 882             if (result == test_expectations.PASS or
 883                 result == test_expectations.CRASH and not include_crashes):
 884                 continue
 885             failed_results[test] = result
 886
 887         return failed_results
 888
 889     def _write_json_files(self, unexpected_results, result_summary,
 890                         individual_test_timings):
 891         """Writes the results of the test run as JSON files into the results
 892         dir.
 893
 894         There are three different files written into the results dir:
 895           unexpected_results.json: A short list of any unexpected results.
 896             This is used by the buildbots to display results.
 897           expectations.json: This is used by the flakiness dashboard.
 898           results.json: A full list of the results - used by the flakiness
 899             dashboard and the aggregate results dashboard.
 900
 901         Args:
 902           unexpected_results: dict of unexpected results
 903           result_summary: full summary object
 904           individual_test_timings: list of test times (used by the flakiness
 905             dashboard).
 906         """
 907         results_directory = self._options.results_directory
 908         _log.debug("Writing JSON files in %s." % results_directory)
 909         unexpected_json_path = os.path.join(results_directory, "unexpected_results.json")
 910         with codecs.open(unexpected_json_path, "w", "utf-8") as file:
 911             simplejson.dump(unexpected_results, file, sort_keys=True, indent=2)
 912
 913         # Write a json file of the test_expectations.txt file for the layout
 914         # tests dashboard.
 915         expectations_path = os.path.join(results_directory, "expectations.json")
 916         expectations_json = \
 917             self._expectations.get_expectations_json_for_all_platforms()
 918         with codecs.open(expectations_path, "w", "utf-8") as file:
 919             file.write(u"ADD_EXPECTATIONS(%s);" % expectations_json)
 920
 921         json_layout_results_generator.JSONLayoutResultsGenerator(
 922             self._port, self._options.builder_name, self._options.build_name,
 923             self._options.build_number, self._options.results_directory,
 924             BUILDER_BASE_URL, individual_test_timings,
 925             self._expectations, result_summary, self._test_files_list,
 926             not self._options.upload_full_results,
 927             self._options.test_results_server)
 928
 929         _log.debug("Finished writing JSON files.")
 930
 931     def _upload_json_files(self):
 932         if not self._options.test_results_server:
 933             return
 934
 935         _log.info("Uploading JSON files for builder: %s",
 936                    self._options.builder_name)
 937
 938         attrs = [("builder", self._options.builder_name)]
 939         json_files = ["expectations.json"]
 940         if self._options.upload_full_results:
 941             json_files.append("results.json")
 942         else:
 943             json_files.append("incremental_results.json")
 944
 945         files = [(file, os.path.join(self._options.results_directory, file))
 946             for file in json_files]
 947
 948         uploader = test_results_uploader.TestResultsUploader(
 949             self._options.test_results_server)
 950         try:
 951             # Set uploading timeout in case appengine server is having problem.
 952             # 120 seconds are more than enough to upload test results.
 953             uploader.upload(attrs, files, 120)
 954         except Exception, err:
 955             _log.error("Upload failed: %s" % err)
 956             return
 957
 958         _log.info("JSON files uploaded.")
 959
 960     def _print_config(self):
 961         """Prints the configuration for the test run."""
 962         p = self._printer
 963         p.print_config("Using port '%s'" % self._port.name())
 964         p.print_config("Placing test results in %s" %
 965                        self._options.results_directory)
 966         if self._options.new_baseline:
 967             p.print_config("Placing new baselines in %s" %
 968                            self._port.baseline_path())
 969         p.print_config("Using %s build" % self._options.configuration)
 970         if self._options.pixel_tests:
 971             p.print_config("Pixel tests enabled")
 972         else:
 973             p.print_config("Pixel tests disabled")
 974
 975         p.print_config("Regular timeout: %s, slow test timeout: %s" %
 976                        (self._options.time_out_ms,
 977                         self._options.slow_time_out_ms))
 978
 979         if self._is_single_threaded():
 980             p.print_config("Running one %s" % self._port.driver_name())
 981         else:
 982             p.print_config("Running %s %ss in parallel" %
 983                            (self._options.child_processes,
 984                             self._port.driver_name()))
 985         p.print_config("")
 986
 987     def _print_expected_results_of_type(self, result_summary,
 988                                         result_type, result_type_str):
 989         """Print the number of the tests in a given result class.
 990
 991         Args:
 992           result_summary - the object containing all the results to report on
 993           result_type - the particular result type to report in the summary.
 994           result_type_str - a string description of the result_type.
 995         """
 996         tests = self._expectations.get_tests_with_result_type(result_type)
 997         now = result_summary.tests_by_timeline[test_expectations.NOW]
 998         wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX]
 999         defer = result_summary.tests_by_timeline[test_expectations.DEFER]
1000
1001         # We use a fancy format string in order to print the data out in a
1002         # nicely-aligned table.
1003         fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd defer, %%%dd wontfix)"
1004                   % (self._num_digits(now), self._num_digits(defer),
1005                   self._num_digits(wontfix)))
1006         self._printer.print_expected(fmtstr %
1007             (len(tests), result_type_str, len(tests & now),
1008              len(tests & defer), len(tests & wontfix)))
1009
1010     def _num_digits(self, num):
1011         """Returns the number of digits needed to represent the length of a
1012         sequence."""
1013         ndigits = 1
1014         if len(num):
1015             ndigits = int(math.log10(len(num))) + 1
1016         return ndigits
1017
1018     def _print_timing_statistics(self, total_time, thread_timings,
1019                                directory_test_timings, individual_test_timings,
1020                                result_summary):
1021         """Record timing-specific information for the test run.
1022
1023         Args:
1024           total_time: total elapsed time (in seconds) for the test run
1025           thread_timings: wall clock time each thread ran for
1026           directory_test_timings: timing by directory
1027           individual_test_timings: timing by file
1028           result_summary: summary object for the test run
1029         """
1030         self._printer.print_timing("Test timing:")
1031         self._printer.print_timing("  %6.2f total testing time" % total_time)
1032         self._printer.print_timing("")
1033         self._printer.print_timing("Thread timing:")
1034         cuml_time = 0
1035         for t in thread_timings:
1036             self._printer.print_timing("    %10s: %5d tests, %6.2f secs" %
1037                   (t['name'], t['num_tests'], t['total_time']))
1038             cuml_time += t['total_time']
1039         self._printer.print_timing("   %6.2f cumulative, %6.2f optimal" %
1040               (cuml_time, cuml_time / int(self._options.child_processes)))
1041         self._printer.print_timing("")
1042
1043         self._print_aggregate_test_statistics(individual_test_timings)
1044         self._print_individual_test_times(individual_test_timings,
1045                                           result_summary)
1046         self._print_directory_timings(directory_test_timings)
1047
1048     def _print_aggregate_test_statistics(self, individual_test_timings):
1049         """Prints aggregate statistics (e.g. median, mean, etc.) for all tests.
1050         Args:
1051           individual_test_timings: List of dump_render_tree_thread.TestStats
1052               for all tests.
1053         """
1054         test_types = []  # Unit tests don't actually produce any timings.
1055         if individual_test_timings:
1056             test_types = individual_test_timings[0].time_for_diffs.keys()
1057         times_for_dump_render_tree = []
1058         times_for_diff_processing = []
1059         times_per_test_type = {}
1060         for test_type in test_types:
1061             times_per_test_type[test_type] = []
1062
1063         for test_stats in individual_test_timings:
1064             times_for_dump_render_tree.append(test_stats.test_run_time)
1065             times_for_diff_processing.append(
1066                 test_stats.total_time_for_all_diffs)
1067             time_for_diffs = test_stats.time_for_diffs
1068             for test_type in test_types:
1069                 times_per_test_type[test_type].append(
1070                     time_for_diffs[test_type])
1071
1072         self._print_statistics_for_test_timings(
1073             "PER TEST TIME IN TESTSHELL (seconds):",
1074             times_for_dump_render_tree)
1075         self._print_statistics_for_test_timings(
1076             "PER TEST DIFF PROCESSING TIMES (seconds):",
1077             times_for_diff_processing)
1078         for test_type in test_types:
1079             self._print_statistics_for_test_timings(
1080                 "PER TEST TIMES BY TEST TYPE: %s" % test_type,
1081                 times_per_test_type[test_type])
1082
1083     def _print_individual_test_times(self, individual_test_timings,
1084                                   result_summary):
1085         """Prints the run times for slow, timeout and crash tests.
1086         Args:
1087           individual_test_timings: List of dump_render_tree_thread.TestStats
1088               for all tests.
1089           result_summary: summary object for test run
1090         """
1091         # Reverse-sort by the time spent in DumpRenderTree.
1092         individual_test_timings.sort(lambda a, b:
1093             cmp(b.test_run_time, a.test_run_time))
1094
1095         num_printed = 0
1096         slow_tests = []
1097         timeout_or_crash_tests = []
1098         unexpected_slow_tests = []
1099         for test_tuple in individual_test_timings:
1100             filename = test_tuple.filename
1101             is_timeout_crash_or_slow = False
1102             if self._expectations.has_modifier(filename,
1103                                                test_expectations.SLOW):
1104                 is_timeout_crash_or_slow = True
1105                 slow_tests.append(test_tuple)
1106
1107             if filename in result_summary.failures:
1108                 result = result_summary.results[filename].type
1109                 if (result == test_expectations.TIMEOUT or
1110                     result == test_expectations.CRASH):
1111                     is_timeout_crash_or_slow = True
1112                     timeout_or_crash_tests.append(test_tuple)
1113
1114             if (not is_timeout_crash_or_slow and
1115                 num_printed < printing.NUM_SLOW_TESTS_TO_LOG):
1116                 num_printed = num_printed + 1
1117                 unexpected_slow_tests.append(test_tuple)
1118
1119         self._printer.print_timing("")
1120         self._print_test_list_timing("%s slowest tests that are not "
1121             "marked as SLOW and did not timeout/crash:" %
1122             printing.NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests)
1123         self._printer.print_timing("")
1124         self._print_test_list_timing("Tests marked as SLOW:", slow_tests)
1125         self._printer.print_timing("")
1126         self._print_test_list_timing("Tests that timed out or crashed:",
1127                                      timeout_or_crash_tests)
1128         self._printer.print_timing("")
1129
1130     def _print_test_list_timing(self, title, test_list):
1131         """Print timing info for each test.
1132
1133         Args:
1134           title: section heading
1135           test_list: tests that fall in this section
1136         """
1137         if self._printer.disabled('slowest'):
1138             return
1139
1140         self._printer.print_timing(title)
1141         for test_tuple in test_list:
1142             filename = test_tuple.filename[len(
1143                 self._port.layout_tests_dir()) + 1:]
1144             filename = filename.replace('\\', '/')
1145             test_run_time = round(test_tuple.test_run_time, 1)
1146             self._printer.print_timing("  %s took %s seconds" %
1147                                        (filename, test_run_time))
1148
1149     def _print_directory_timings(self, directory_test_timings):
1150         """Print timing info by directory for any directories that
1151         take > 10 seconds to run.
1152
1153         Args:
1154           directory_test_timing: time info for each directory
1155         """
1156         timings = []
1157         for directory in directory_test_timings:
1158             num_tests, time_for_directory = directory_test_timings[directory]
1159             timings.append((round(time_for_directory, 1), directory,
1160                             num_tests))
1161         timings.sort()
1162
1163         self._printer.print_timing("Time to process slowest subdirectories:")
1164         min_seconds_to_print = 10
1165         for timing in timings:
1166             if timing[0] > min_seconds_to_print:
1167                 self._printer.print_timing(
1168                     "  %s took %s seconds to run %s tests." % (timing[1],
1169                     timing[0], timing[2]))
1170         self._printer.print_timing("")
1171
1172     def _print_statistics_for_test_timings(self, title, timings):
1173         """Prints the median, mean and standard deviation of the values in
1174         timings.
1175
1176         Args:
1177           title: Title for these timings.
1178           timings: A list of floats representing times.
1179         """
1180         self._printer.print_timing(title)
1181         timings.sort()
1182
1183         num_tests = len(timings)
1184         if not num_tests:
1185             return
1186         percentile90 = timings[int(.9 * num_tests)]
1187         percentile99 = timings[int(.99 * num_tests)]
1188
1189         if num_tests % 2 == 1:
1190             median = timings[((num_tests - 1) / 2) - 1]
1191         else:
1192             lower = timings[num_tests / 2 - 1]
1193             upper = timings[num_tests / 2]
1194             median = (float(lower + upper)) / 2
1195
1196         mean = sum(timings) / num_tests
1197
1198         for time in timings:
1199             sum_of_deviations = math.pow(time - mean, 2)
1200
1201         std_deviation = math.sqrt(sum_of_deviations / num_tests)
1202         self._printer.print_timing("  Median:          %6.3f" % median)
1203         self._printer.print_timing("  Mean:            %6.3f" % mean)
1204         self._printer.print_timing("  90th percentile: %6.3f" % percentile90)
1205         self._printer.print_timing("  99th percentile: %6.3f" % percentile99)
1206         self._printer.print_timing("  Standard dev:    %6.3f" % std_deviation)
1207         self._printer.print_timing("")
1208
1209     def _print_result_summary(self, result_summary):
1210         """Print a short summary about how many tests passed.
1211
1212         Args:
1213           result_summary: information to log
1214         """
1215         failed = len(result_summary.failures)
1216         skipped = len(
1217             result_summary.tests_by_expectation[test_expectations.SKIP])
1218         total = result_summary.total
1219         passed = total - failed - skipped
1220         pct_passed = 0.0
1221         if total > 0:
1222             pct_passed = float(passed) * 100 / total
1223
1224         self._printer.print_actual("")
1225         self._printer.print_actual("=> Results: %d/%d tests passed (%.1f%%)" %
1226                      (passed, total, pct_passed))
1227         self._printer.print_actual("")
1228         self._print_result_summary_entry(result_summary,
1229             test_expectations.NOW, "Tests to be fixed for the current release")
1230
1231         self._printer.print_actual("")
1232         self._print_result_summary_entry(result_summary,
1233             test_expectations.DEFER,
1234             "Tests we'll fix in the future if they fail (DEFER)")
1235
1236         self._printer.print_actual("")
1237         self._print_result_summary_entry(result_summary,
1238             test_expectations.WONTFIX,
1239             "Tests that will only be fixed if they crash (WONTFIX)")
1240         self._printer.print_actual("")
1241
1242     def _print_result_summary_entry(self, result_summary, timeline,
1243                                     heading):
1244         """Print a summary block of results for a particular timeline of test.
1245
1246         Args:
1247           result_summary: summary to print results for
1248           timeline: the timeline to print results for (NOT, WONTFIX, etc.)
1249           heading: a textual description of the timeline
1250         """
1251         total = len(result_summary.tests_by_timeline[timeline])
1252         not_passing = (total -
1253            len(result_summary.tests_by_expectation[test_expectations.PASS] &
1254                result_summary.tests_by_timeline[timeline]))
1255         self._printer.print_actual("=> %s (%d):" % (heading, not_passing))
1256
1257         for result in TestExpectationsFile.EXPECTATION_ORDER:
1258             if result == test_expectations.PASS:
1259                 continue
1260             results = (result_summary.tests_by_expectation[result] &
1261                        result_summary.tests_by_timeline[timeline])
1262             desc = TestExpectationsFile.EXPECTATION_DESCRIPTIONS[result]
1263             if not_passing and len(results):
1264                 pct = len(results) * 100.0 / not_passing
1265                 self._printer.print_actual("  %5d %-24s (%4.1f%%)" %
1266                     (len(results), desc[len(results) != 1], pct))
1267
1268     def _results_html(self, test_files, failures, title="Test Failures", override_time=None):
1269         """
1270         test_files = a list of file paths
1271         failures = dictionary mapping test paths to failure objects
1272         title = title printed at top of test
1273         override_time = current time (used by unit tests)
1274         """
1275         page = """<html>
1276   <head>
1277     <title>Layout Test Results (%(time)s)</title>
1278   </head>
1279   <body>
1280     <h2>%(title)s (%(time)s)</h2>
1281         """ % {'title': title, 'time': override_time or time.asctime()}
1282
1283         for test_file in sorted(test_files):
1284             test_name = self._port.relative_test_filename(test_file)
1285             test_url = self._port.filename_to_uri(test_file)
1286             page += u"<p><a href='%s'>%s</a><br />\n" % (test_url, test_name)
1287             test_failures = failures.get(test_file, [])
1288             for failure in test_failures:
1289                 page += u"&nbsp;&nbsp;%s<br/>" % failure.result_html_output(test_name)
1290             page += "</p>\n"
1291         page += "</body></html>\n"
1292         return page
1293
1294     def _write_results_html_file(self, result_summary):
1295         """Write results.html which is a summary of tests that failed.
1296
1297         Args:
1298           result_summary: a summary of the results :)
1299
1300         Returns:
1301           True if any results were written (since expected failures may be
1302           omitted)
1303         """
1304         # test failures
1305         if self._options.full_results_html:
1306             results_title = "Test Failures"
1307             test_files = result_summary.failures.keys()
1308         else:
1309             results_title = "Unexpected Test Failures"
1310             unexpected_failures = self._get_failures(result_summary,
1311                 include_crashes=True)
1312             test_files = unexpected_failures.keys()
1313         if not len(test_files):
1314             return False
1315
1316         out_filename = os.path.join(self._options.results_directory,
1317                                     "results.html")
1318         with codecs.open(out_filename, "w", "utf-8") as results_file:
1319             html = self._results_html(test_files, result_summary.failures, results_title)
1320             results_file.write(html)
1321
1322         return True
1323
1324     def _show_results_html_file(self):
1325         """Shows the results.html page."""
1326         results_filename = os.path.join(self._options.results_directory,
1327                                         "results.html")
1328         self._port.show_results_html_file(results_filename)
1329
1330
1331 def read_test_files(files):
1332     tests = []
1333     for file in files:
1334         try:
1335             with codecs.open(file, 'r', 'utf-8') as file_contents:
1336                 # FIXME: This could be cleaner using a list comprehension.
1337                 for line in file_contents:
1338                     line = test_expectations.strip_comments(line)
1339                     if line:
1340                         tests.append(line)
1341         except IOError, e:
1342             if e.errno == errno.ENOENT:
1343                 _log.critical('')
1344                 _log.critical('--test-list file "%s" not found' % file)
1345             raise
1346     return tests
1347
1348
1349 def run(port, options, args, regular_output=sys.stderr,
1350         buildbot_output=sys.stdout):
1351     """Run the tests.
1352
1353     Args:
1354       port: Port object for port-specific behavior
1355       options: a dictionary of command line options
1356       args: a list of sub directories or files to test
1357       regular_output: a stream-like object that we can send logging/debug
1358           output to
1359       buildbot_output: a stream-like object that we can write all output that
1360           is intended to be parsed by the buildbot to
1361     Returns:
1362       the number of unexpected results that occurred, or -1 if there is an
1363           error.
1364
1365     """
1366     _set_up_derived_options(port, options)
1367
1368     printer = printing.Printer(port, options, regular_output, buildbot_output,
1369         int(options.child_processes), options.experimental_fully_parallel)
1370     if options.help_printing:
1371         printer.help_printing()
1372         printer.cleanup()
1373         return 0
1374
1375     last_unexpected_results = _gather_unexpected_results(options)
1376     if options.print_last_failures:
1377         printer.write("\n".join(last_unexpected_results) + "\n")
1378         printer.cleanup()
1379         return 0
1380
1381     # We wrap any parts of the run that are slow or likely to raise exceptions
1382     # in a try/finally to ensure that we clean up the logging configuration.
1383     num_unexpected_results = -1
1384     try:
1385         test_runner = TestRunner(port, options, printer)
1386         test_runner._print_config()
1387
1388         printer.print_update("Collecting tests ...")
1389         try:
1390             test_runner.collect_tests(args, last_unexpected_results)
1391         except IOError, e:
1392             if e.errno == errno.ENOENT:
1393                 return -1
1394             raise
1395
1396         printer.print_update("Parsing expectations ...")
1397         if options.lint_test_files:
1398             return test_runner.lint()
1399         test_runner.parse_expectations(port.test_platform_name(),
1400                                        options.configuration == 'Debug')
1401
1402         printer.print_update("Checking build ...")
1403         if not port.check_build(test_runner.needs_http()):
1404             _log.error("Build check failed")
1405             return -1
1406
1407         result_summary = test_runner.set_up_run()
1408         if result_summary:
1409             num_unexpected_results = test_runner.run(result_summary)
1410             test_runner.clean_up_run()
1411             _log.debug("Testing completed, Exit status: %d" %
1412                        num_unexpected_results)
1413     finally:
1414         printer.cleanup()
1415
1416     return num_unexpected_results
1417
1418
1419 def _set_up_derived_options(port_obj, options):
1420     """Sets the options values that depend on other options values."""
1421
1422     if not options.child_processes:
1423         # FIXME: Investigate perf/flakiness impact of using cpu_count + 1.
1424         options.child_processes = str(port_obj.default_child_processes())
1425
1426     if not options.configuration:
1427         options.configuration = port_obj.default_configuration()
1428
1429     if options.pixel_tests is None:
1430         options.pixel_tests = True
1431
1432     if not options.use_apache:
1433         options.use_apache = sys.platform in ('darwin', 'linux2')
1434
1435     if options.results_directory.startswith("/"):
1436         # Assume it's an absolute path and normalize.
1437         options.results_directory = port_obj.get_absolute_path(
1438             options.results_directory)
1439     else:
1440         # If it's a relative path, make the output directory relative to
1441         # Debug or Release.
1442         options.results_directory = port_obj.results_directory()
1443
1444     if not options.time_out_ms:
1445         if options.configuration == "Debug":
1446             options.time_out_ms = str(2 * TestRunner.DEFAULT_TEST_TIMEOUT_MS)
1447         else:
1448             options.time_out_ms = str(TestRunner.DEFAULT_TEST_TIMEOUT_MS)
1449
1450     options.slow_time_out_ms = str(5 * int(options.time_out_ms))
1451
1452
1453 def _gather_unexpected_results(options):
1454     """Returns the unexpected results from the previous run, if any."""
1455     last_unexpected_results = []
1456     if options.print_last_failures or options.retest_last_failures:
1457         unexpected_results_filename = os.path.join(
1458         options.results_directory, "unexpected_results.json")
1459         with codecs.open(unexpected_results_filename, "r", "utf-8") as file:
1460             results = simplejson.load(file)
1461         last_unexpected_results = results['tests'].keys()
1462     return last_unexpected_results
1463
1464
1465 def _compat_shim_callback(option, opt_str, value, parser):
1466     print "Ignoring unsupported option: %s" % opt_str
1467
1468
1469 def _compat_shim_option(option_name, **kwargs):
1470     return optparse.make_option(option_name, action="callback",
1471         callback=_compat_shim_callback,
1472         help="Ignored, for old-run-webkit-tests compat only.", **kwargs)
1473
1474
1475 def parse_args(args=None):
1476     """Provides a default set of command line args.
1477
1478     Returns a tuple of options, args from optparse"""
1479
1480     # FIXME: All of these options should be stored closer to the code which
1481     # FIXME: actually uses them. configuration_options should move
1482     # FIXME: to WebKitPort and be shared across all scripts.
1483     configuration_options = [
1484         optparse.make_option("-t", "--target", dest="configuration",
1485                              help="(DEPRECATED)"),
1486         # FIXME: --help should display which configuration is default.
1487         optparse.make_option('--debug', action='store_const', const='Debug',
1488                              dest="configuration",
1489                              help='Set the configuration to Debug'),
1490         optparse.make_option('--release', action='store_const',
1491                              const='Release', dest="configuration",
1492                              help='Set the configuration to Release'),
1493         # old-run-webkit-tests also accepts -c, --configuration CONFIGURATION.
1494     ]
1495
1496     print_options = printing.print_options()
1497
1498     # FIXME: These options should move onto the ChromiumPort.
1499     chromium_options = [
1500         optparse.make_option("--chromium", action="store_true", default=False,
1501             help="use the Chromium port"),
1502         optparse.make_option("--startup-dialog", action="store_true",
1503             default=False, help="create a dialog on DumpRenderTree startup"),
1504         optparse.make_option("--gp-fault-error-box", action="store_true",
1505             default=False, help="enable Windows GP fault error box"),
1506         optparse.make_option("--nocheck-sys-deps", action="store_true",
1507             default=False,
1508             help="Don't check the system dependencies (themes)"),
1509         optparse.make_option("--use-drt", action="store_true",
1510             default=False,
1511             help="Use DumpRenderTree instead of test_shell"),
1512         optparse.make_option("--accelerated-compositing",
1513             action="store_true",
1514             help="Use hardware-accelated compositing for rendering"),
1515         optparse.make_option("--no-accelerated-compositing",
1516             action="store_false",
1517             dest="accelerated_compositing",
1518             help="Don't use hardware-accelerated compositing for rendering"),
1519         optparse.make_option("--accelerated-2d-canvas",
1520             action="store_true",
1521             help="Use hardware-accelerated 2D Canvas calls"),
1522         optparse.make_option("--no-accelerated-2d-canvas",
1523             action="store_false",
1524             dest="accelerated_2d_canvas",
1525             help="Don't use hardware-accelerated 2D Canvas calls"),
1526     ]
1527
1528     # Missing Mac-specific old-run-webkit-tests options:
1529     # FIXME: Need: -g, --guard for guard malloc support on Mac.
1530     # FIXME: Need: -l --leaks    Enable leaks checking.
1531     # FIXME: Need: --sample-on-timeout Run sample on timeout
1532
1533     old_run_webkit_tests_compat = [
1534         # NRWT doesn't generate results by default anyway.
1535         _compat_shim_option("--no-new-test-results"),
1536         # NRWT doesn't sample on timeout yet anyway.
1537         _compat_shim_option("--no-sample-on-timeout"),
1538         # FIXME: NRWT needs to support remote links eventually.
1539         _compat_shim_option("--use-remote-links-to-tests"),
1540         # FIXME: NRWT doesn't need this option as much since failures are
1541         # designed to be cheap.  We eventually plan to add this support.
1542         _compat_shim_option("--exit-after-n-failures", nargs=1, type="int"),
1543     ]
1544
1545     results_options = [
1546         # NEED for bots: --use-remote-links-to-tests Link to test files
1547         # within the SVN repository in the results.
1548         optparse.make_option("-p", "--pixel-tests", action="store_true",
1549             dest="pixel_tests", help="Enable pixel-to-pixel PNG comparisons"),
1550         optparse.make_option("--no-pixel-tests", action="store_false",
1551             dest="pixel_tests", help="Disable pixel-to-pixel PNG comparisons"),
1552         # old-run-webkit-tests allows a specific tolerance: --tolerance t
1553         # Ignore image differences less than this percentage (default: 0.1)
1554         optparse.make_option("--results-directory",
1555             default="layout-test-results",
1556             help="Output results directory source dir, relative to Debug or "
1557                  "Release"),
1558         optparse.make_option("--new-baseline", action="store_true",
1559             default=False, help="Save all generated results as new baselines "
1560                  "into the platform directory, overwriting whatever's "
1561                  "already there."),
1562         optparse.make_option("--reset-results", action="store_true",
1563             default=False, help="Reset any existing baselines to the "
1564                  "generated results"),
1565         optparse.make_option("--no-show-results", action="store_false",
1566             default=True, dest="show_results",
1567             help="Don't launch a browser with results after the tests "
1568                  "are done"),
1569         # FIXME: We should have a helper function to do this sort of
1570         # deprectated mapping and automatically log, etc.
1571         optparse.make_option("--noshow-results", action="store_false",
1572             dest="show_results",
1573             help="Deprecated, same as --no-show-results."),
1574         optparse.make_option("--no-launch-safari", action="store_false",
1575             dest="show_results",
1576             help="old-run-webkit-tests compat, same as --noshow-results."),
1577         # old-run-webkit-tests:
1578         # --[no-]launch-safari    Launch (or do not launch) Safari to display
1579         #                         test results (default: launch)
1580         optparse.make_option("--full-results-html", action="store_true",
1581             default=False,
1582             help="Show all failures in results.html, rather than only "
1583                  "regressions"),
1584         optparse.make_option("--clobber-old-results", action="store_true",
1585             default=False, help="Clobbers test results from previous runs."),
1586         optparse.make_option("--platform",
1587             help="Override the platform for expected results"),
1588         optparse.make_option("--no-record-results", action="store_false",
1589             default=True, dest="record_results",
1590             help="Don't record the results."),
1591         # old-run-webkit-tests also has HTTP toggle options:
1592         # --[no-]http                     Run (or do not run) http tests
1593         #                                 (default: run)
1594         # --[no-]wait-for-httpd           Wait for httpd if some other test
1595         #                                 session is using it already (same
1596         #                                 as WEBKIT_WAIT_FOR_HTTPD=1).
1597         #                                 (default: 0)
1598     ]
1599
1600     test_options = [
1601         optparse.make_option("--build", dest="build",
1602             action="store_true", default=True,
1603             help="Check to ensure the DumpRenderTree build is up-to-date "
1604                  "(default)."),
1605         optparse.make_option("--no-build", dest="build",
1606             action="store_false", help="Don't check to see if the "
1607                                        "DumpRenderTree build is up-to-date."),
1608         # old-run-webkit-tests has --valgrind instead of wrapper.
1609         optparse.make_option("--wrapper",
1610             help="wrapper command to insert before invocations of "
1611                  "DumpRenderTree; option is split on whitespace before "
1612                  "running. (Example: --wrapper='valgrind --smc-check=all')"),
1613         # old-run-webkit-tests:
1614         # -i|--ignore-tests               Comma-separated list of directories
1615         #                                 or tests to ignore
1616         optparse.make_option("--test-list", action="append",
1617             help="read list of tests to run from file", metavar="FILE"),
1618         # old-run-webkit-tests uses --skipped==[default|ignore|only]
1619         # instead of --force:
1620         optparse.make_option("--force", action="store_true", default=False,
1621             help="Run all tests, even those marked SKIP in the test list"),
1622         optparse.make_option("--use-apache", action="store_true",
1623             default=False, help="Whether to use apache instead of lighttpd."),
1624         optparse.make_option("--time-out-ms",
1625             help="Set the timeout for each test"),
1626         # old-run-webkit-tests calls --randomize-order --random:
1627         optparse.make_option("--randomize-order", action="store_true",
1628             default=False, help=("Run tests in random order (useful "
1629                                 "for tracking down corruption)")),
1630         optparse.make_option("--run-chunk",
1631             help=("Run a specified chunk (n:l), the nth of len l, "
1632                  "of the layout tests")),
1633         optparse.make_option("--run-part", help=("Run a specified part (n:m), "
1634                   "the nth of m parts, of the layout tests")),
1635         # old-run-webkit-tests calls --batch-size: --nthly n
1636         #   Restart DumpRenderTree every n tests (default: 1000)
1637         optparse.make_option("--batch-size",
1638             help=("Run a the tests in batches (n), after every n tests, "
1639                   "DumpRenderTree is relaunched."), type="int", default=0),
1640         # old-run-webkit-tests calls --run-singly: -1|--singly
1641         # Isolate each test case run (implies --nthly 1 --verbose)
1642         optparse.make_option("--run-singly", action="store_true",
1643             default=False, help="run a separate DumpRenderTree for each test"),
1644         optparse.make_option("--child-processes",
1645             help="Number of DumpRenderTrees to run in parallel."),
1646         # FIXME: Display default number of child processes that will run.
1647         optparse.make_option("--experimental-fully-parallel",
1648             action="store_true", default=False,
1649             help="run all tests in parallel"),
1650         # FIXME: Need --exit-after-n-failures N
1651         #      Exit after the first N failures instead of running all tests
1652         # FIXME: Need --exit-after-n-crashes N
1653         #      Exit after the first N crashes instead of running all tests
1654         # FIXME: consider: --iterations n
1655         #      Number of times to run the set of tests (e.g. ABCABCABC)
1656         optparse.make_option("--print-last-failures", action="store_true",
1657             default=False, help="Print the tests in the last run that "
1658             "had unexpected failures (or passes)."),
1659         optparse.make_option("--retest-last-failures", action="store_true",
1660             default=False, help="re-test the tests in the last run that "
1661             "had unexpected failures (or passes)."),
1662         optparse.make_option("--retry-failures", action="store_true",
1663             default=True,
1664             help="Re-try any tests that produce unexpected results (default)"),
1665         optparse.make_option("--no-retry-failures", action="store_false",
1666             dest="retry_failures",
1667             help="Don't re-try any tests that produce unexpected results."),
1668     ]
1669
1670     misc_options = [
1671         optparse.make_option("--lint-test-files", action="store_true",
1672         default=False, help=("Makes sure the test files parse for all "
1673                             "configurations. Does not run any tests.")),
1674     ]
1675
1676     # FIXME: Move these into json_results_generator.py
1677     results_json_options = [
1678         optparse.make_option("--builder-name", default="DUMMY_BUILDER_NAME",
1679             help=("The name of the builder shown on the waterfall running "
1680                   "this script e.g. WebKit.")),
1681         optparse.make_option("--build-name", default="DUMMY_BUILD_NAME",
1682             help=("The name of the builder used in its path, e.g. "
1683                   "webkit-rel.")),
1684         optparse.make_option("--build-number", default="DUMMY_BUILD_NUMBER",
1685             help=("The build number of the builder running this script.")),
1686         optparse.make_option("--test-results-server", default="",
1687             help=("If specified, upload results json files to this appengine "
1688                   "server.")),
1689         optparse.make_option("--upload-full-results",
1690             action="store_true",
1691             default=False,
1692             help="If true, upload full json results to server."),
1693     ]
1694
1695     option_list = (configuration_options + print_options +
1696                    chromium_options + results_options + test_options +
1697                    misc_options + results_json_options +
1698                    old_run_webkit_tests_compat)
1699     option_parser = optparse.OptionParser(option_list=option_list)
1700
1701     options, args = option_parser.parse_args(args)
1702
1703     return options, args
1704
1705
1706 def _log_wedged_thread(thread):
1707     """Log information about the given thread state."""
1708     id = thread.id()
1709     stack = dump_render_tree_thread.find_thread_stack(id)
1710     assert(stack is not None)
1711     _log.error("")
1712     _log.error("thread %s (%d) is wedged" % (thread.getName(), id))
1713     dump_render_tree_thread.log_stack(stack)
1714     _log.error("")
1715
1716
1717 def main():
1718     options, args = parse_args()
1719     port_obj = port.get(options.platform, options)
1720     return run(port_obj, options, args)
1721
1722 if '__main__' == __name__:
1723     try:
1724         sys.exit(main())
1725     except KeyboardInterrupt:
1726         # this mirrors what the shell normally does
1727         sys.exit(signal.SIGINT + 128)