WebKitTools/Scripts/webkitpy/layout_tests/run_webkit_tests.py

   1 #!/usr/bin/env python
   2 # Copyright (C) 2010 Google Inc. All rights reserved.
   3 # Copyright (C) 2010 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged
   4 #
   5 # Redistribution and use in source and binary forms, with or without
   6 # modification, are permitted provided that the following conditions are
   7 # met:
   8 #
   9 #     * Redistributions of source code must retain the above copyright
  10 # notice, this list of conditions and the following disclaimer.
  11 #     * Redistributions in binary form must reproduce the above
  12 # copyright notice, this list of conditions and the following disclaimer
  13 # in the documentation and/or other materials provided with the
  14 # distribution.
  15 #     * Neither the name of Google Inc. nor the names of its
  16 # contributors may be used to endorse or promote products derived from
  17 # this software without specific prior written permission.
  18 #
  19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 """Run layout tests.
  32
  33 This is a port of the existing webkit test script run-webkit-tests.
  34
  35 The TestRunner class runs a series of tests (TestType interface) against a set
  36 of test files.  If a test file fails a TestType, it returns a list TestFailure
  37 objects to the TestRunner.  The TestRunner then aggregates the TestFailures to
  38 create a final report.
  39
  40 This script reads several files, if they exist in the test_lists subdirectory
  41 next to this script itself.  Each should contain a list of paths to individual
  42 tests or entire subdirectories of tests, relative to the outermost test
  43 directory.  Entire lines starting with '//' (comments) will be ignored.
  44
  45 For details of the files' contents and purposes, see test_lists/README.
  46 """
  47
  48 from __future__ import with_statement
  49
  50 import codecs
  51 import errno
  52 import glob
  53 import logging
  54 import math
  55 import optparse
  56 import os
  57 import platform
  58 import Queue
  59 import random
  60 import re
  61 import shutil
  62 import signal
  63 import sys
  64 import time
  65 import traceback
  66
  67 from layout_package import dump_render_tree_thread
  68 from layout_package import json_layout_results_generator
  69 from layout_package import printing
  70 from layout_package import test_expectations
  71 from layout_package import test_failures
  72 from layout_package import test_results
  73 from layout_package import test_results_uploader
  74 from test_types import image_diff
  75 from test_types import text_diff
  76 from test_types import test_type_base
  77
  78 from webkitpy.common.system import user
  79 from webkitpy.thirdparty import simplejson
  80
  81 import port
  82
  83 _log = logging.getLogger("webkitpy.layout_tests.run_webkit_tests")
  84
  85 # Builder base URL where we have the archived test results.
  86 BUILDER_BASE_URL = "http://build.chromium.org/buildbot/layout_test_results/"
  87
  88 LAYOUT_TESTS_DIRECTORY = "LayoutTests" + os.sep
  89
  90 TestExpectationsFile = test_expectations.TestExpectationsFile
  91
  92
  93 class TestInfo:
  94     """Groups information about a test for easy passing of data."""
  95
  96     def __init__(self, port, filename, timeout):
  97         """Generates the URI and stores the filename and timeout for this test.
  98         Args:
  99           filename: Full path to the test.
 100           timeout: Timeout for running the test in TestShell.
 101           """
 102         self.filename = filename
 103         self._port = port
 104         self.uri = port.filename_to_uri(filename)
 105         self.timeout = timeout
 106         self._image_checksum = -1
 107
 108     def image_hash(self):
 109         # Read the image_hash lazily to reduce startup time.
 110         # This class is accessed across threads, but only one thread should
 111         # ever be dealing with any given TestInfo so no locking is needed.
 112         #
 113         # Note that we use -1 to indicate that we haven't read the value,
 114         # because expected_checksum() returns a string or None.
 115         if self._image_checksum == -1:
 116             self._image_checksum = self._port.expected_checksum(self.filename)
 117         return self._image_checksum
 118
 119
 120 class ResultSummary(object):
 121     """A class for partitioning the test results we get into buckets.
 122
 123     This class is basically a glorified struct and it's private to this file
 124     so we don't bother with any information hiding."""
 125
 126     def __init__(self, expectations, test_files):
 127         self.total = len(test_files)
 128         self.remaining = self.total
 129         self.expectations = expectations
 130         self.expected = 0
 131         self.unexpected = 0
 132         self.tests_by_expectation = {}
 133         self.tests_by_timeline = {}
 134         self.results = {}
 135         self.unexpected_results = {}
 136         self.failures = {}
 137         self.tests_by_expectation[test_expectations.SKIP] = set()
 138         for expectation in TestExpectationsFile.EXPECTATIONS.values():
 139             self.tests_by_expectation[expectation] = set()
 140         for timeline in TestExpectationsFile.TIMELINES.values():
 141             self.tests_by_timeline[timeline] = (
 142                 expectations.get_tests_with_timeline(timeline))
 143
 144     def add(self, result, expected):
 145         """Add a TestResult into the appropriate bin.
 146
 147         Args:
 148           result: TestResult from dump_render_tree_thread.
 149           expected: whether the result was what we expected it to be.
 150         """
 151
 152         self.tests_by_expectation[result.type].add(result.filename)
 153         self.results[result.filename] = result
 154         self.remaining -= 1
 155         if len(result.failures):
 156             self.failures[result.filename] = result.failures
 157         if expected:
 158             self.expected += 1
 159         else:
 160             self.unexpected_results[result.filename] = result.type
 161             self.unexpected += 1
 162
 163
 164 def summarize_unexpected_results(port_obj, expectations, result_summary,
 165                                  retry_summary):
 166     """Summarize any unexpected results as a dict.
 167
 168     FIXME: split this data structure into a separate class?
 169
 170     Args:
 171         port_obj: interface to port-specific hooks
 172         expectations: test_expectations.TestExpectations object
 173         result_summary: summary object from initial test runs
 174         retry_summary: summary object from final test run of retried tests
 175     Returns:
 176         A dictionary containing a summary of the unexpected results from the
 177         run, with the following fields:
 178         'version': a version indicator (1 in this version)
 179         'fixable': # of fixable tests (NOW - PASS)
 180         'skipped': # of skipped tests (NOW & SKIPPED)
 181         'num_regressions': # of non-flaky failures
 182         'num_flaky': # of flaky failures
 183         'num_passes': # of unexpected passes
 184         'tests': a dict of tests -> {'expected': '...', 'actual': '...'}
 185     """
 186     results = {}
 187     results['version'] = 1
 188
 189     tbe = result_summary.tests_by_expectation
 190     tbt = result_summary.tests_by_timeline
 191     results['fixable'] = len(tbt[test_expectations.NOW] -
 192                                 tbe[test_expectations.PASS])
 193     results['skipped'] = len(tbt[test_expectations.NOW] &
 194                                 tbe[test_expectations.SKIP])
 195
 196     num_passes = 0
 197     num_flaky = 0
 198     num_regressions = 0
 199     keywords = {}
 200     for k, v in TestExpectationsFile.EXPECTATIONS.iteritems():
 201         keywords[v] = k.upper()
 202
 203     tests = {}
 204     for filename, result in result_summary.unexpected_results.iteritems():
 205         # Note that if a test crashed in the original run, we ignore
 206         # whether or not it crashed when we retried it (if we retried it),
 207         # and always consider the result not flaky.
 208         test = port_obj.relative_test_filename(filename)
 209         expected = expectations.get_expectations_string(filename)
 210         actual = [keywords[result]]
 211
 212         if result == test_expectations.PASS:
 213             num_passes += 1
 214         elif result == test_expectations.CRASH:
 215             num_regressions += 1
 216         else:
 217             if filename not in retry_summary.unexpected_results:
 218                 actual.extend(expectations.get_expectations_string(
 219                     filename).split(" "))
 220                 num_flaky += 1
 221             else:
 222                 retry_result = retry_summary.unexpected_results[filename]
 223                 if result != retry_result:
 224                     actual.append(keywords[retry_result])
 225                     num_flaky += 1
 226                 else:
 227                     num_regressions += 1
 228
 229         tests[test] = {}
 230         tests[test]['expected'] = expected
 231         tests[test]['actual'] = " ".join(actual)
 232
 233     results['tests'] = tests
 234     results['num_passes'] = num_passes
 235     results['num_flaky'] = num_flaky
 236     results['num_regressions'] = num_regressions
 237
 238     return results
 239
 240
 241 class TestRunner:
 242     """A class for managing running a series of tests on a series of layout
 243     test files."""
 244
 245     HTTP_SUBDIR = os.sep.join(['', 'http', ''])
 246     WEBSOCKET_SUBDIR = os.sep.join(['', 'websocket', ''])
 247
 248     # The per-test timeout in milliseconds, if no --time-out-ms option was
 249     # given to run_webkit_tests. This should correspond to the default timeout
 250     # in DumpRenderTree.
 251     DEFAULT_TEST_TIMEOUT_MS = 6 * 1000
 252
 253     def __init__(self, port, options, printer):
 254         """Initialize test runner data structures.
 255
 256         Args:
 257           port: an object implementing port-specific
 258           options: a dictionary of command line options
 259           printer: a Printer object to record updates to.
 260         """
 261         self._port = port
 262         self._options = options
 263         self._printer = printer
 264
 265         # disable wss server. need to install pyOpenSSL on buildbots.
 266         # self._websocket_secure_server = websocket_server.PyWebSocket(
 267         #        options.results_directory, use_tls=True, port=9323)
 268
 269         # a list of TestType objects
 270         self._test_types = [text_diff.TestTextDiff]
 271         if options.pixel_tests:
 272             self._test_types.append(image_diff.ImageDiff)
 273
 274         # a set of test files, and the same tests as a list
 275         self._test_files = set()
 276         self._test_files_list = None
 277         self._result_queue = Queue.Queue()
 278         self._retrying = False
 279
 280     def collect_tests(self, args, last_unexpected_results):
 281         """Find all the files to test.
 282
 283         Args:
 284           args: list of test arguments from the command line
 285           last_unexpected_results: list of unexpected results to retest, if any
 286
 287         """
 288         paths = [self._strip_test_dir_prefix(arg) for arg in args if arg and arg != '']
 289         paths += last_unexpected_results
 290         if self._options.test_list:
 291             paths += read_test_files(self._options.test_list)
 292         self._test_files = self._port.tests(paths)
 293
 294     def _strip_test_dir_prefix(self, path):
 295         if path.startswith(LAYOUT_TESTS_DIRECTORY):
 296             return path[len(LAYOUT_TESTS_DIRECTORY):]
 297         return path
 298
 299     def lint(self):
 300         # Creating the expecations for each platform/configuration pair does
 301         # all the test list parsing and ensures it's correct syntax (e.g. no
 302         # dupes).
 303         for platform_name in self._port.test_platform_names():
 304             self.parse_expectations(platform_name, is_debug_mode=True)
 305             self.parse_expectations(platform_name, is_debug_mode=False)
 306         self._printer.write("")
 307         _log.info("If there are no fail messages, errors or exceptions, "
 308                   "then the lint succeeded.")
 309         return 0
 310
 311     def parse_expectations(self, test_platform_name, is_debug_mode):
 312         """Parse the expectations from the test_list files and return a data
 313         structure holding them. Throws an error if the test_list files have
 314         invalid syntax."""
 315         if self._options.lint_test_files:
 316             test_files = None
 317         else:
 318             test_files = self._test_files
 319
 320         try:
 321             expectations_str = self._port.test_expectations()
 322             overrides_str = self._port.test_expectations_overrides()
 323             self._expectations = test_expectations.TestExpectations(
 324                 self._port, test_files, expectations_str, test_platform_name,
 325                 is_debug_mode, self._options.lint_test_files,
 326                 overrides=overrides_str)
 327             return self._expectations
 328         except SyntaxError, err:
 329             if self._options.lint_test_files:
 330                 print str(err)
 331             else:
 332                 raise err
 333
 334     def prepare_lists_and_print_output(self):
 335         """Create appropriate subsets of test lists and returns a
 336         ResultSummary object. Also prints expected test counts.
 337         """
 338
 339         # Remove skipped - both fixable and ignored - files from the
 340         # top-level list of files to test.
 341         num_all_test_files = len(self._test_files)
 342         self._printer.print_expected("Found:  %d tests" %
 343                                      (len(self._test_files)))
 344         if not num_all_test_files:
 345             _log.critical('No tests to run.')
 346             return None
 347
 348         skipped = set()
 349         if num_all_test_files > 1 and not self._options.force:
 350             skipped = self._expectations.get_tests_with_result_type(
 351                            test_expectations.SKIP)
 352             self._test_files -= skipped
 353
 354         # Create a sorted list of test files so the subset chunk,
 355         # if used, contains alphabetically consecutive tests.
 356         self._test_files_list = list(self._test_files)
 357         if self._options.randomize_order:
 358             random.shuffle(self._test_files_list)
 359         else:
 360             self._test_files_list.sort()
 361
 362         # If the user specifies they just want to run a subset of the tests,
 363         # just grab a subset of the non-skipped tests.
 364         if self._options.run_chunk or self._options.run_part:
 365             chunk_value = self._options.run_chunk or self._options.run_part
 366             test_files = self._test_files_list
 367             try:
 368                 (chunk_num, chunk_len) = chunk_value.split(":")
 369                 chunk_num = int(chunk_num)
 370                 assert(chunk_num >= 0)
 371                 test_size = int(chunk_len)
 372                 assert(test_size > 0)
 373             except:
 374                 _log.critical("invalid chunk '%s'" % chunk_value)
 375                 return None
 376
 377             # Get the number of tests
 378             num_tests = len(test_files)
 379
 380             # Get the start offset of the slice.
 381             if self._options.run_chunk:
 382                 chunk_len = test_size
 383                 # In this case chunk_num can be really large. We need
 384                 # to make the slave fit in the current number of tests.
 385                 slice_start = (chunk_num * chunk_len) % num_tests
 386             else:
 387                 # Validate the data.
 388                 assert(test_size <= num_tests)
 389                 assert(chunk_num <= test_size)
 390
 391                 # To count the chunk_len, and make sure we don't skip
 392                 # some tests, we round to the next value that fits exactly
 393                 # all the parts.
 394                 rounded_tests = num_tests
 395                 if rounded_tests % test_size != 0:
 396                     rounded_tests = (num_tests + test_size -
 397                                      (num_tests % test_size))
 398
 399                 chunk_len = rounded_tests / test_size
 400                 slice_start = chunk_len * (chunk_num - 1)
 401                 # It does not mind if we go over test_size.
 402
 403             # Get the end offset of the slice.
 404             slice_end = min(num_tests, slice_start + chunk_len)
 405
 406             files = test_files[slice_start:slice_end]
 407
 408             tests_run_msg = 'Running: %d tests (chunk slice [%d:%d] of %d)' % (
 409                 (slice_end - slice_start), slice_start, slice_end, num_tests)
 410             self._printer.print_expected(tests_run_msg)
 411
 412             # If we reached the end and we don't have enough tests, we run some
 413             # from the beginning.
 414             if slice_end - slice_start < chunk_len:
 415                 extra = chunk_len - (slice_end - slice_start)
 416                 extra_msg = ('   last chunk is partial, appending [0:%d]' %
 417                             extra)
 418                 self._printer.print_expected(extra_msg)
 419                 tests_run_msg += "\n" + extra_msg
 420                 files.extend(test_files[0:extra])
 421             tests_run_filename = os.path.join(self._options.results_directory,
 422                                               "tests_run.txt")
 423             with codecs.open(tests_run_filename, "w", "utf-8") as file:
 424                 file.write(tests_run_msg + "\n")
 425
 426             len_skip_chunk = int(len(files) * len(skipped) /
 427                                  float(len(self._test_files)))
 428             skip_chunk_list = list(skipped)[0:len_skip_chunk]
 429             skip_chunk = set(skip_chunk_list)
 430
 431             # Update expectations so that the stats are calculated correctly.
 432             # We need to pass a list that includes the right # of skipped files
 433             # to ParseExpectations so that ResultSummary() will get the correct
 434             # stats. So, we add in the subset of skipped files, and then
 435             # subtract them back out.
 436             self._test_files_list = files + skip_chunk_list
 437             self._test_files = set(self._test_files_list)
 438
 439             self._expectations = self.parse_expectations(
 440                 self._port.test_platform_name(),
 441                 self._options.configuration == 'Debug')
 442
 443             self._test_files = set(files)
 444             self._test_files_list = files
 445         else:
 446             skip_chunk = skipped
 447
 448         result_summary = ResultSummary(self._expectations,
 449             self._test_files | skip_chunk)
 450         self._print_expected_results_of_type(result_summary,
 451             test_expectations.PASS, "passes")
 452         self._print_expected_results_of_type(result_summary,
 453             test_expectations.FAIL, "failures")
 454         self._print_expected_results_of_type(result_summary,
 455             test_expectations.FLAKY, "flaky")
 456         self._print_expected_results_of_type(result_summary,
 457             test_expectations.SKIP, "skipped")
 458
 459         if self._options.force:
 460             self._printer.print_expected('Running all tests, including '
 461                                          'skips (--force)')
 462         else:
 463             # Note that we don't actually run the skipped tests (they were
 464             # subtracted out of self._test_files, above), but we stub out the
 465             # results here so the statistics can remain accurate.
 466             for test in skip_chunk:
 467                 result = test_results.TestResult(test,
 468                     failures=[], test_run_time=0, total_time_for_all_diffs=0,
 469                     time_for_diffs=0)
 470                 result.type = test_expectations.SKIP
 471                 result_summary.add(result, expected=True)
 472         self._printer.print_expected('')
 473
 474         return result_summary
 475
 476     def _get_dir_for_test_file(self, test_file):
 477         """Returns the highest-level directory by which to shard the given
 478         test file."""
 479         index = test_file.rfind(os.sep + LAYOUT_TESTS_DIRECTORY)
 480
 481         test_file = test_file[index + len(LAYOUT_TESTS_DIRECTORY):]
 482         test_file_parts = test_file.split(os.sep, 1)
 483         directory = test_file_parts[0]
 484         test_file = test_file_parts[1]
 485
 486         # The http tests are very stable on mac/linux.
 487         # TODO(ojan): Make the http server on Windows be apache so we can
 488         # turn shard the http tests there as well. Switching to apache is
 489         # what made them stable on linux/mac.
 490         return_value = directory
 491         while ((directory != 'http' or sys.platform in ('darwin', 'linux2'))
 492                 and test_file.find(os.sep) >= 0):
 493             test_file_parts = test_file.split(os.sep, 1)
 494             directory = test_file_parts[0]
 495             return_value = os.path.join(return_value, directory)
 496             test_file = test_file_parts[1]
 497
 498         return return_value
 499
 500     def _get_test_info_for_file(self, test_file):
 501         """Returns the appropriate TestInfo object for the file. Mostly this
 502         is used for looking up the timeout value (in ms) to use for the given
 503         test."""
 504         if self._expectations.has_modifier(test_file, test_expectations.SLOW):
 505             return TestInfo(self._port, test_file,
 506                             self._options.slow_time_out_ms)
 507         return TestInfo(self._port, test_file, self._options.time_out_ms)
 508
 509     def _test_requires_lock(self, test_file):
 510         """Return True if the test needs to be locked when
 511         running multiple copies of NRWTs."""
 512         split_path = test_file.split(os.sep)
 513         return 'http' in split_path or 'websocket' in split_path
 514
 515     def _get_test_file_queue(self, test_files):
 516         """Create the thread safe queue of lists of (test filenames, test URIs)
 517         tuples. Each TestShellThread pulls a list from this queue and runs
 518         those tests in order before grabbing the next available list.
 519
 520         Shard the lists by directory. This helps ensure that tests that depend
 521         on each other (aka bad tests!) continue to run together as most
 522         cross-tests dependencies tend to occur within the same directory.
 523
 524         Return:
 525           The Queue of lists of TestInfo objects.
 526         """
 527
 528         test_lists = []
 529         tests_to_http_lock = []
 530         if (self._options.experimental_fully_parallel or
 531             self._is_single_threaded()):
 532             for test_file in test_files:
 533                 test_info = self._get_test_info_for_file(test_file)
 534                 if self._test_requires_lock(test_file):
 535                     tests_to_http_lock.append(test_info)
 536                 else:
 537                     test_lists.append((".", [test_info]))
 538         else:
 539             tests_by_dir = {}
 540             for test_file in test_files:
 541                 directory = self._get_dir_for_test_file(test_file)
 542                 test_info = self._get_test_info_for_file(test_file)
 543                 if self._test_requires_lock(test_file):
 544                     tests_to_http_lock.append(test_info)
 545                 else:
 546                     tests_by_dir.setdefault(directory, [])
 547                     tests_by_dir[directory].append(test_info)
 548             # Sort by the number of tests in the dir so that the ones with the
 549             # most tests get run first in order to maximize parallelization.
 550             # Number of tests is a good enough, but not perfect, approximation
 551             # of how long that set of tests will take to run. We can't just use
 552             # a PriorityQueue until we move to Python 2.6.
 553             for directory in tests_by_dir:
 554                 test_list = tests_by_dir[directory]
 555                 # Keep the tests in alphabetical order.
 556                 # FIXME: Remove once tests are fixed so they can be run in any
 557                 # order.
 558                 test_list.reverse()
 559                 test_list_tuple = (directory, test_list)
 560                 test_lists.append(test_list_tuple)
 561             test_lists.sort(lambda a, b: cmp(len(b[1]), len(a[1])))
 562
 563         # Put the http tests first. There are only a couple hundred of them,
 564         # but each http test takes a very long time to run, so sorting by the
 565         # number of tests doesn't accurately capture how long they take to run.
 566         if tests_to_http_lock:
 567             tests_to_http_lock.reverse()
 568             test_lists.insert(0, ("tests_to_http_lock", tests_to_http_lock))
 569
 570         filename_queue = Queue.Queue()
 571         for item in test_lists:
 572             filename_queue.put(item)
 573         return filename_queue
 574
 575     def _get_test_args(self, index):
 576         """Returns the tuple of arguments for tests and for DumpRenderTree."""
 577         test_args = test_type_base.TestArguments()
 578         test_args.png_path = None
 579         if self._options.pixel_tests:
 580             png_path = os.path.join(self._options.results_directory,
 581                                     "png_result%s.png" % index)
 582             test_args.png_path = png_path
 583         test_args.new_baseline = self._options.new_baseline
 584         test_args.reset_results = self._options.reset_results
 585
 586         return test_args
 587
 588     def _contains_tests(self, subdir):
 589         for test_file in self._test_files:
 590             if test_file.find(subdir) >= 0:
 591                 return True
 592         return False
 593
 594     def _instantiate_dump_render_tree_threads(self, test_files,
 595                                               result_summary):
 596         """Instantitates and starts the TestShellThread(s).
 597
 598         Return:
 599           The list of threads.
 600         """
 601         filename_queue = self._get_test_file_queue(test_files)
 602
 603         # Instantiate TestShellThreads and start them.
 604         threads = []
 605         for i in xrange(int(self._options.child_processes)):
 606             # Create separate TestTypes instances for each thread.
 607             test_types = []
 608             for test_type in self._test_types:
 609                 test_types.append(test_type(self._port,
 610                                     self._options.results_directory))
 611
 612             test_args = self._get_test_args(i)
 613             thread = dump_render_tree_thread.TestShellThread(self._port,
 614                 self._options, filename_queue, self._result_queue,
 615                 test_types, test_args)
 616             if self._is_single_threaded():
 617                 thread.run_in_main_thread(self, result_summary)
 618             else:
 619                 thread.start()
 620             threads.append(thread)
 621
 622         return threads
 623
 624     def _is_single_threaded(self):
 625         """Returns whether we should run all the tests in the main thread."""
 626         return int(self._options.child_processes) == 1
 627
 628     def _run_tests(self, file_list, result_summary):
 629         """Runs the tests in the file_list.
 630
 631         Return: A tuple (keyboard_interrupted, thread_timings, test_timings,
 632             individual_test_timings)
 633             keyboard_interrupted is whether someone typed Ctrl^C
 634             thread_timings is a list of dicts with the total runtime
 635               of each thread with 'name', 'num_tests', 'total_time' properties
 636             test_timings is a list of timings for each sharded subdirectory
 637               of the form [time, directory_name, num_tests]
 638             individual_test_timings is a list of run times for each test
 639               in the form {filename:filename, test_run_time:test_run_time}
 640             result_summary: summary object to populate with the results
 641         """
 642         # FIXME: We should use webkitpy.tool.grammar.pluralize here.
 643         plural = ""
 644         if not self._is_single_threaded():
 645             plural = "s"
 646         self._printer.print_update('Starting %s%s ...' %
 647                                    (self._port.driver_name(), plural))
 648         threads = self._instantiate_dump_render_tree_threads(file_list,
 649                                                              result_summary)
 650         self._printer.print_update("Starting testing ...")
 651
 652         keyboard_interrupted = self._wait_for_threads_to_finish(threads,
 653                                                                 result_summary)
 654         (thread_timings, test_timings, individual_test_timings) = \
 655             self._collect_timing_info(threads)
 656
 657         return (keyboard_interrupted, thread_timings, test_timings,
 658                 individual_test_timings)
 659
 660     def _wait_for_threads_to_finish(self, threads, result_summary):
 661         keyboard_interrupted = False
 662         try:
 663             # Loop through all the threads waiting for them to finish.
 664             some_thread_is_alive = True
 665             while some_thread_is_alive:
 666                 some_thread_is_alive = False
 667                 t = time.time()
 668                 for thread in threads:
 669                     exception_info = thread.exception_info()
 670                     if exception_info is not None:
 671                         # Re-raise the thread's exception here to make it
 672                         # clear that testing was aborted. Otherwise,
 673                         # the tests that did not run would be assumed
 674                         # to have passed.
 675                         raise exception_info[0], exception_info[1], exception_info[2]
 676
 677                     if thread.isAlive():
 678                         some_thread_is_alive = True
 679                         next_timeout = thread.next_timeout()
 680                         if (next_timeout and t > next_timeout):
 681                             _log_wedged_thread(thread)
 682                             thread.clear_next_timeout()
 683
 684                 self.update_summary(result_summary)
 685
 686                 if some_thread_is_alive:
 687                     time.sleep(0.01)
 688
 689         except KeyboardInterrupt:
 690             keyboard_interrupted = True
 691             for thread in threads:
 692                 thread.cancel()
 693
 694         return keyboard_interrupted
 695
 696     def _collect_timing_info(self, threads):
 697         test_timings = {}
 698         individual_test_timings = []
 699         thread_timings = []
 700
 701         for thread in threads:
 702             thread_timings.append({'name': thread.getName(),
 703                                    'num_tests': thread.get_num_tests(),
 704                                    'total_time': thread.get_total_time()})
 705             test_timings.update(thread.get_test_group_timing_stats())
 706             individual_test_timings.extend(thread.get_test_results())
 707
 708         return (thread_timings, test_timings, individual_test_timings)
 709
 710     def needs_http(self):
 711         """Returns whether the test runner needs an HTTP server."""
 712         return self._contains_tests(self.HTTP_SUBDIR)
 713
 714     def needs_websocket(self):
 715         """Returns whether the test runner needs a WEBSOCKET server."""
 716         return self._contains_tests(self.WEBSOCKET_SUBDIR)
 717
 718     def set_up_run(self):
 719         """Configures the system to be ready to run tests.
 720
 721         Returns a ResultSummary object if we should continue to run tests,
 722         or None if we should abort.
 723
 724         """
 725         # This must be started before we check the system dependencies,
 726         # since the helper may do things to make the setup correct.
 727         self._printer.print_update("Starting helper ...")
 728         self._port.start_helper()
 729
 730         # Check that the system dependencies (themes, fonts, ...) are correct.
 731         if not self._options.nocheck_sys_deps:
 732             self._printer.print_update("Checking system dependencies ...")
 733             if not self._port.check_sys_deps(self.needs_http()):
 734                 self._port.stop_helper()
 735                 return None
 736
 737         if self._options.clobber_old_results:
 738             self._clobber_old_results()
 739
 740         # Create the output directory if it doesn't already exist.
 741         self._port.maybe_make_directory(self._options.results_directory)
 742
 743         self._port.setup_test_run()
 744
 745         self._printer.print_update("Preparing tests ...")
 746         result_summary = self.prepare_lists_and_print_output()
 747         if not result_summary:
 748             return None
 749
 750         return result_summary
 751
 752     def run(self, result_summary):
 753         """Run all our tests on all our test files.
 754
 755         For each test file, we run each test type. If there are any failures,
 756         we collect them for reporting.
 757
 758         Args:
 759           result_summary: a summary object tracking the test results.
 760
 761         Return:
 762           The number of unexpected results (0 == success)
 763         """
 764         # gather_test_files() must have been called first to initialize us.
 765         # If we didn't find any files to test, we've errored out already in
 766         # prepare_lists_and_print_output().
 767         assert(len(self._test_files))
 768
 769         start_time = time.time()
 770
 771         keyboard_interrupted, thread_timings, test_timings, \
 772             individual_test_timings = (
 773             self._run_tests(self._test_files_list, result_summary))
 774
 775         # We exclude the crashes from the list of results to retry, because
 776         # we want to treat even a potentially flaky crash as an error.
 777         failures = self._get_failures(result_summary, include_crashes=False)
 778         retry_summary = result_summary
 779         while (len(failures) and self._options.retry_failures and
 780             not self._retrying and not keyboard_interrupted):
 781             _log.info('')
 782             _log.info("Retrying %d unexpected failure(s) ..." % len(failures))
 783             _log.info('')
 784             self._retrying = True
 785             retry_summary = ResultSummary(self._expectations, failures.keys())
 786             # Note that we intentionally ignore the return value here.
 787             self._run_tests(failures.keys(), retry_summary)
 788             failures = self._get_failures(retry_summary, include_crashes=True)
 789
 790         end_time = time.time()
 791
 792         self._print_timing_statistics(end_time - start_time,
 793                                       thread_timings, test_timings,
 794                                       individual_test_timings,
 795                                       result_summary)
 796
 797         self._print_result_summary(result_summary)
 798
 799         sys.stdout.flush()
 800         sys.stderr.flush()
 801
 802         self._printer.print_one_line_summary(result_summary.total,
 803                                              result_summary.expected,
 804                                              result_summary.unexpected)
 805
 806         unexpected_results = summarize_unexpected_results(self._port,
 807             self._expectations, result_summary, retry_summary)
 808         self._printer.print_unexpected_results(unexpected_results)
 809
 810         if self._options.record_results:
 811             # Write the same data to log files.
 812             self._write_json_files(unexpected_results, result_summary,
 813                                    individual_test_timings)
 814
 815             # Upload generated JSON files to appengine server.
 816             self._upload_json_files()
 817
 818         # Write the summary to disk (results.html) and display it if requested.
 819         wrote_results = self._write_results_html_file(result_summary)
 820         if self._options.show_results and wrote_results:
 821             self._show_results_html_file()
 822
 823         # Now that we've completed all the processing we can, we re-raise
 824         # a KeyboardInterrupt if necessary so the caller can handle it.
 825         if keyboard_interrupted:
 826             raise KeyboardInterrupt
 827
 828         # Ignore flaky failures and unexpected passes so we don't turn the
 829         # bot red for those.
 830         return unexpected_results['num_regressions']
 831
 832     def clean_up_run(self):
 833         """Restores the system after we're done running tests."""
 834
 835         _log.debug("flushing stdout")
 836         sys.stdout.flush()
 837         _log.debug("flushing stderr")
 838         sys.stderr.flush()
 839         _log.debug("stopping helper")
 840         self._port.stop_helper()
 841
 842     def update_summary(self, result_summary):
 843         """Update the summary and print results with any completed tests."""
 844         while True:
 845             try:
 846                 result = test_results.TestResult.loads(self._result_queue.get_nowait())
 847             except Queue.Empty:
 848                 return
 849
 850             expected = self._expectations.matches_an_expected_result(
 851                 result.filename, result.type, self._options.pixel_tests)
 852             result_summary.add(result, expected)
 853             exp_str = self._expectations.get_expectations_string(
 854                 result.filename)
 855             got_str = self._expectations.expectation_to_string(result.type)
 856             self._printer.print_test_result(result, expected, exp_str, got_str)
 857             self._printer.print_progress(result_summary, self._retrying,
 858                                          self._test_files_list)
 859
 860     def _clobber_old_results(self):
 861         # Just clobber the actual test results directories since the other
 862         # files in the results directory are explicitly used for cross-run
 863         # tracking.
 864         self._printer.print_update("Clobbering old results in %s" %
 865                                    self._options.results_directory)
 866         layout_tests_dir = self._port.layout_tests_dir()
 867         possible_dirs = self._port.test_dirs()
 868         for dirname in possible_dirs:
 869             if os.path.isdir(os.path.join(layout_tests_dir, dirname)):
 870                 shutil.rmtree(os.path.join(self._options.results_directory,
 871                                            dirname),
 872                               ignore_errors=True)
 873
 874     def _get_failures(self, result_summary, include_crashes):
 875         """Filters a dict of results and returns only the failures.
 876
 877         Args:
 878           result_summary: the results of the test run
 879           include_crashes: whether crashes are included in the output.
 880             We use False when finding the list of failures to retry
 881             to see if the results were flaky. Although the crashes may also be
 882             flaky, we treat them as if they aren't so that they're not ignored.
 883         Returns:
 884           a dict of files -> results
 885         """
 886         failed_results = {}
 887         for test, result in result_summary.unexpected_results.iteritems():
 888             if (result == test_expectations.PASS or
 889                 result == test_expectations.CRASH and not include_crashes):
 890                 continue
 891             failed_results[test] = result
 892
 893         return failed_results
 894
 895     def _write_json_files(self, unexpected_results, result_summary,
 896                         individual_test_timings):
 897         """Writes the results of the test run as JSON files into the results
 898         dir.
 899
 900         There are three different files written into the results dir:
 901           unexpected_results.json: A short list of any unexpected results.
 902             This is used by the buildbots to display results.
 903           expectations.json: This is used by the flakiness dashboard.
 904           results.json: A full list of the results - used by the flakiness
 905             dashboard and the aggregate results dashboard.
 906
 907         Args:
 908           unexpected_results: dict of unexpected results
 909           result_summary: full summary object
 910           individual_test_timings: list of test times (used by the flakiness
 911             dashboard).
 912         """
 913         results_directory = self._options.results_directory
 914         _log.debug("Writing JSON files in %s." % results_directory)
 915         unexpected_json_path = os.path.join(results_directory, "unexpected_results.json")
 916         with codecs.open(unexpected_json_path, "w", "utf-8") as file:
 917             simplejson.dump(unexpected_results, file, sort_keys=True, indent=2)
 918
 919         # Write a json file of the test_expectations.txt file for the layout
 920         # tests dashboard.
 921         expectations_path = os.path.join(results_directory, "expectations.json")
 922         expectations_json = \
 923             self._expectations.get_expectations_json_for_all_platforms()
 924         with codecs.open(expectations_path, "w", "utf-8") as file:
 925             file.write(u"ADD_EXPECTATIONS(%s);" % expectations_json)
 926
 927         json_layout_results_generator.JSONLayoutResultsGenerator(
 928             self._port, self._options.builder_name, self._options.build_name,
 929             self._options.build_number, self._options.results_directory,
 930             BUILDER_BASE_URL, individual_test_timings,
 931             self._expectations, result_summary, self._test_files_list,
 932             not self._options.upload_full_results,
 933             self._options.test_results_server)
 934
 935         _log.debug("Finished writing JSON files.")
 936
 937     def _upload_json_files(self):
 938         if not self._options.test_results_server:
 939             return
 940
 941         if not self._options.master_name:
 942             _log.error("--test-results-server was set, but --master-name was not. Not uploading JSON files.")
 943             return
 944
 945         _log.info("Uploading JSON files for builder: %s",
 946                    self._options.builder_name)
 947
 948         attrs = [("builder", self._options.builder_name), ("testtype", "layout-tests"),
 949             ("master", self._options.master_name)]
 950
 951         json_files = ["expectations.json"]
 952         if self._options.upload_full_results:
 953             json_files.append("results.json")
 954         else:
 955             json_files.append("incremental_results.json")
 956
 957         files = [(file, os.path.join(self._options.results_directory, file))
 958             for file in json_files]
 959
 960         uploader = test_results_uploader.TestResultsUploader(
 961             self._options.test_results_server)
 962         try:
 963             # Set uploading timeout in case appengine server is having problem.
 964             # 120 seconds are more than enough to upload test results.
 965             uploader.upload(attrs, files, 120)
 966         except Exception, err:
 967             _log.error("Upload failed: %s" % err)
 968             return
 969
 970         _log.info("JSON files uploaded.")
 971
 972     def _print_config(self):
 973         """Prints the configuration for the test run."""
 974         p = self._printer
 975         p.print_config("Using port '%s'" % self._port.name())
 976         p.print_config("Placing test results in %s" %
 977                        self._options.results_directory)
 978         if self._options.new_baseline:
 979             p.print_config("Placing new baselines in %s" %
 980                            self._port.baseline_path())
 981         p.print_config("Using %s build" % self._options.configuration)
 982         if self._options.pixel_tests:
 983             p.print_config("Pixel tests enabled")
 984         else:
 985             p.print_config("Pixel tests disabled")
 986
 987         p.print_config("Regular timeout: %s, slow test timeout: %s" %
 988                        (self._options.time_out_ms,
 989                         self._options.slow_time_out_ms))
 990
 991         if self._is_single_threaded():
 992             p.print_config("Running one %s" % self._port.driver_name())
 993         else:
 994             p.print_config("Running %s %ss in parallel" %
 995                            (self._options.child_processes,
 996                             self._port.driver_name()))
 997         p.print_config("")
 998
 999     def _print_expected_results_of_type(self, result_summary,
1000                                         result_type, result_type_str):
1001         """Print the number of the tests in a given result class.
1002
1003         Args:
1004           result_summary - the object containing all the results to report on
1005           result_type - the particular result type to report in the summary.
1006           result_type_str - a string description of the result_type.
1007         """
1008         tests = self._expectations.get_tests_with_result_type(result_type)
1009         now = result_summary.tests_by_timeline[test_expectations.NOW]
1010         wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX]
1011
1012         # We use a fancy format string in order to print the data out in a
1013         # nicely-aligned table.
1014         fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd wontfix)"
1015                   % (self._num_digits(now), self._num_digits(wontfix)))
1016         self._printer.print_expected(fmtstr %
1017             (len(tests), result_type_str, len(tests & now), len(tests & wontfix)))
1018
1019     def _num_digits(self, num):
1020         """Returns the number of digits needed to represent the length of a
1021         sequence."""
1022         ndigits = 1
1023         if len(num):
1024             ndigits = int(math.log10(len(num))) + 1
1025         return ndigits
1026
1027     def _print_timing_statistics(self, total_time, thread_timings,
1028                                directory_test_timings, individual_test_timings,
1029                                result_summary):
1030         """Record timing-specific information for the test run.
1031
1032         Args:
1033           total_time: total elapsed time (in seconds) for the test run
1034           thread_timings: wall clock time each thread ran for
1035           directory_test_timings: timing by directory
1036           individual_test_timings: timing by file
1037           result_summary: summary object for the test run
1038         """
1039         self._printer.print_timing("Test timing:")
1040         self._printer.print_timing("  %6.2f total testing time" % total_time)
1041         self._printer.print_timing("")
1042         self._printer.print_timing("Thread timing:")
1043         cuml_time = 0
1044         for t in thread_timings:
1045             self._printer.print_timing("    %10s: %5d tests, %6.2f secs" %
1046                   (t['name'], t['num_tests'], t['total_time']))
1047             cuml_time += t['total_time']
1048         self._printer.print_timing("   %6.2f cumulative, %6.2f optimal" %
1049               (cuml_time, cuml_time / int(self._options.child_processes)))
1050         self._printer.print_timing("")
1051
1052         self._print_aggregate_test_statistics(individual_test_timings)
1053         self._print_individual_test_times(individual_test_timings,
1054                                           result_summary)
1055         self._print_directory_timings(directory_test_timings)
1056
1057     def _print_aggregate_test_statistics(self, individual_test_timings):
1058         """Prints aggregate statistics (e.g. median, mean, etc.) for all tests.
1059         Args:
1060           individual_test_timings: List of dump_render_tree_thread.TestStats
1061               for all tests.
1062         """
1063         test_types = []  # Unit tests don't actually produce any timings.
1064         if individual_test_timings:
1065             test_types = individual_test_timings[0].time_for_diffs.keys()
1066         times_for_dump_render_tree = []
1067         times_for_diff_processing = []
1068         times_per_test_type = {}
1069         for test_type in test_types:
1070             times_per_test_type[test_type] = []
1071
1072         for test_stats in individual_test_timings:
1073             times_for_dump_render_tree.append(test_stats.test_run_time)
1074             times_for_diff_processing.append(
1075                 test_stats.total_time_for_all_diffs)
1076             time_for_diffs = test_stats.time_for_diffs
1077             for test_type in test_types:
1078                 times_per_test_type[test_type].append(
1079                     time_for_diffs[test_type])
1080
1081         self._print_statistics_for_test_timings(
1082             "PER TEST TIME IN TESTSHELL (seconds):",
1083             times_for_dump_render_tree)
1084         self._print_statistics_for_test_timings(
1085             "PER TEST DIFF PROCESSING TIMES (seconds):",
1086             times_for_diff_processing)
1087         for test_type in test_types:
1088             self._print_statistics_for_test_timings(
1089                 "PER TEST TIMES BY TEST TYPE: %s" % test_type,
1090                 times_per_test_type[test_type])
1091
1092     def _print_individual_test_times(self, individual_test_timings,
1093                                   result_summary):
1094         """Prints the run times for slow, timeout and crash tests.
1095         Args:
1096           individual_test_timings: List of dump_render_tree_thread.TestStats
1097               for all tests.
1098           result_summary: summary object for test run
1099         """
1100         # Reverse-sort by the time spent in DumpRenderTree.
1101         individual_test_timings.sort(lambda a, b:
1102             cmp(b.test_run_time, a.test_run_time))
1103
1104         num_printed = 0
1105         slow_tests = []
1106         timeout_or_crash_tests = []
1107         unexpected_slow_tests = []
1108         for test_tuple in individual_test_timings:
1109             filename = test_tuple.filename
1110             is_timeout_crash_or_slow = False
1111             if self._expectations.has_modifier(filename,
1112                                                test_expectations.SLOW):
1113                 is_timeout_crash_or_slow = True
1114                 slow_tests.append(test_tuple)
1115
1116             if filename in result_summary.failures:
1117                 result = result_summary.results[filename].type
1118                 if (result == test_expectations.TIMEOUT or
1119                     result == test_expectations.CRASH):
1120                     is_timeout_crash_or_slow = True
1121                     timeout_or_crash_tests.append(test_tuple)
1122
1123             if (not is_timeout_crash_or_slow and
1124                 num_printed < printing.NUM_SLOW_TESTS_TO_LOG):
1125                 num_printed = num_printed + 1
1126                 unexpected_slow_tests.append(test_tuple)
1127
1128         self._printer.print_timing("")
1129         self._print_test_list_timing("%s slowest tests that are not "
1130             "marked as SLOW and did not timeout/crash:" %
1131             printing.NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests)
1132         self._printer.print_timing("")
1133         self._print_test_list_timing("Tests marked as SLOW:", slow_tests)
1134         self._printer.print_timing("")
1135         self._print_test_list_timing("Tests that timed out or crashed:",
1136                                      timeout_or_crash_tests)
1137         self._printer.print_timing("")
1138
1139     def _print_test_list_timing(self, title, test_list):
1140         """Print timing info for each test.
1141
1142         Args:
1143           title: section heading
1144           test_list: tests that fall in this section
1145         """
1146         if self._printer.disabled('slowest'):
1147             return
1148
1149         self._printer.print_timing(title)
1150         for test_tuple in test_list:
1151             filename = test_tuple.filename[len(
1152                 self._port.layout_tests_dir()) + 1:]
1153             filename = filename.replace('\\', '/')
1154             test_run_time = round(test_tuple.test_run_time, 1)
1155             self._printer.print_timing("  %s took %s seconds" %
1156                                        (filename, test_run_time))
1157
1158     def _print_directory_timings(self, directory_test_timings):
1159         """Print timing info by directory for any directories that
1160         take > 10 seconds to run.
1161
1162         Args:
1163           directory_test_timing: time info for each directory
1164         """
1165         timings = []
1166         for directory in directory_test_timings:
1167             num_tests, time_for_directory = directory_test_timings[directory]
1168             timings.append((round(time_for_directory, 1), directory,
1169                             num_tests))
1170         timings.sort()
1171
1172         self._printer.print_timing("Time to process slowest subdirectories:")
1173         min_seconds_to_print = 10
1174         for timing in timings:
1175             if timing[0] > min_seconds_to_print:
1176                 self._printer.print_timing(
1177                     "  %s took %s seconds to run %s tests." % (timing[1],
1178                     timing[0], timing[2]))
1179         self._printer.print_timing("")
1180
1181     def _print_statistics_for_test_timings(self, title, timings):
1182         """Prints the median, mean and standard deviation of the values in
1183         timings.
1184
1185         Args:
1186           title: Title for these timings.
1187           timings: A list of floats representing times.
1188         """
1189         self._printer.print_timing(title)
1190         timings.sort()
1191
1192         num_tests = len(timings)
1193         if not num_tests:
1194             return
1195         percentile90 = timings[int(.9 * num_tests)]
1196         percentile99 = timings[int(.99 * num_tests)]
1197
1198         if num_tests % 2 == 1:
1199             median = timings[((num_tests - 1) / 2) - 1]
1200         else:
1201             lower = timings[num_tests / 2 - 1]
1202             upper = timings[num_tests / 2]
1203             median = (float(lower + upper)) / 2
1204
1205         mean = sum(timings) / num_tests
1206
1207         for time in timings:
1208             sum_of_deviations = math.pow(time - mean, 2)
1209
1210         std_deviation = math.sqrt(sum_of_deviations / num_tests)
1211         self._printer.print_timing("  Median:          %6.3f" % median)
1212         self._printer.print_timing("  Mean:            %6.3f" % mean)
1213         self._printer.print_timing("  90th percentile: %6.3f" % percentile90)
1214         self._printer.print_timing("  99th percentile: %6.3f" % percentile99)
1215         self._printer.print_timing("  Standard dev:    %6.3f" % std_deviation)
1216         self._printer.print_timing("")
1217
1218     def _print_result_summary(self, result_summary):
1219         """Print a short summary about how many tests passed.
1220
1221         Args:
1222           result_summary: information to log
1223         """
1224         failed = len(result_summary.failures)
1225         skipped = len(
1226             result_summary.tests_by_expectation[test_expectations.SKIP])
1227         total = result_summary.total
1228         passed = total - failed - skipped
1229         pct_passed = 0.0
1230         if total > 0:
1231             pct_passed = float(passed) * 100 / total
1232
1233         self._printer.print_actual("")
1234         self._printer.print_actual("=> Results: %d/%d tests passed (%.1f%%)" %
1235                      (passed, total, pct_passed))
1236         self._printer.print_actual("")
1237         self._print_result_summary_entry(result_summary,
1238             test_expectations.NOW, "Tests to be fixed")
1239
1240         self._printer.print_actual("")
1241         self._print_result_summary_entry(result_summary,
1242             test_expectations.WONTFIX,
1243             "Tests that will only be fixed if they crash (WONTFIX)")
1244         self._printer.print_actual("")
1245
1246     def _print_result_summary_entry(self, result_summary, timeline,
1247                                     heading):
1248         """Print a summary block of results for a particular timeline of test.
1249
1250         Args:
1251           result_summary: summary to print results for
1252           timeline: the timeline to print results for (NOT, WONTFIX, etc.)
1253           heading: a textual description of the timeline
1254         """
1255         total = len(result_summary.tests_by_timeline[timeline])
1256         not_passing = (total -
1257            len(result_summary.tests_by_expectation[test_expectations.PASS] &
1258                result_summary.tests_by_timeline[timeline]))
1259         self._printer.print_actual("=> %s (%d):" % (heading, not_passing))
1260
1261         for result in TestExpectationsFile.EXPECTATION_ORDER:
1262             if result == test_expectations.PASS:
1263                 continue
1264             results = (result_summary.tests_by_expectation[result] &
1265                        result_summary.tests_by_timeline[timeline])
1266             desc = TestExpectationsFile.EXPECTATION_DESCRIPTIONS[result]
1267             if not_passing and len(results):
1268                 pct = len(results) * 100.0 / not_passing
1269                 self._printer.print_actual("  %5d %-24s (%4.1f%%)" %
1270                     (len(results), desc[len(results) != 1], pct))
1271
1272     def _results_html(self, test_files, failures, title="Test Failures", override_time=None):
1273         """
1274         test_files = a list of file paths
1275         failures = dictionary mapping test paths to failure objects
1276         title = title printed at top of test
1277         override_time = current time (used by unit tests)
1278         """
1279         page = """<html>
1280   <head>
1281     <title>Layout Test Results (%(time)s)</title>
1282   </head>
1283   <body>
1284     <h2>%(title)s (%(time)s)</h2>
1285         """ % {'title': title, 'time': override_time or time.asctime()}
1286
1287         for test_file in sorted(test_files):
1288             test_name = self._port.relative_test_filename(test_file)
1289             test_url = self._port.filename_to_uri(test_file)
1290             page += u"<p><a href='%s'>%s</a><br />\n" % (test_url, test_name)
1291             test_failures = failures.get(test_file, [])
1292             for failure in test_failures:
1293                 page += (u"&nbsp;&nbsp;%s<br/>" %
1294                          failure.result_html_output(test_name))
1295             page += "</p>\n"
1296         page += "</body></html>\n"
1297         return page
1298
1299     def _write_results_html_file(self, result_summary):
1300         """Write results.html which is a summary of tests that failed.
1301
1302         Args:
1303           result_summary: a summary of the results :)
1304
1305         Returns:
1306           True if any results were written (since expected failures may be
1307           omitted)
1308         """
1309         # test failures
1310         if self._options.full_results_html:
1311             results_title = "Test Failures"
1312             test_files = result_summary.failures.keys()
1313         else:
1314             results_title = "Unexpected Test Failures"
1315             unexpected_failures = self._get_failures(result_summary,
1316                 include_crashes=True)
1317             test_files = unexpected_failures.keys()
1318         if not len(test_files):
1319             return False
1320
1321         out_filename = os.path.join(self._options.results_directory,
1322                                     "results.html")
1323         with codecs.open(out_filename, "w", "utf-8") as results_file:
1324             html = self._results_html(test_files, result_summary.failures, results_title)
1325             results_file.write(html)
1326
1327         return True
1328
1329     def _show_results_html_file(self):
1330         """Shows the results.html page."""
1331         results_filename = os.path.join(self._options.results_directory,
1332                                         "results.html")
1333         self._port.show_results_html_file(results_filename)
1334
1335
1336 def read_test_files(files):
1337     tests = []
1338     for file in files:
1339         try:
1340             with codecs.open(file, 'r', 'utf-8') as file_contents:
1341                 # FIXME: This could be cleaner using a list comprehension.
1342                 for line in file_contents:
1343                     line = test_expectations.strip_comments(line)
1344                     if line:
1345                         tests.append(line)
1346         except IOError, e:
1347             if e.errno == errno.ENOENT:
1348                 _log.critical('')
1349                 _log.critical('--test-list file "%s" not found' % file)
1350             raise
1351     return tests
1352
1353
1354 def run(port, options, args, regular_output=sys.stderr,
1355         buildbot_output=sys.stdout):
1356     """Run the tests.
1357
1358     Args:
1359       port: Port object for port-specific behavior
1360       options: a dictionary of command line options
1361       args: a list of sub directories or files to test
1362       regular_output: a stream-like object that we can send logging/debug
1363           output to
1364       buildbot_output: a stream-like object that we can write all output that
1365           is intended to be parsed by the buildbot to
1366     Returns:
1367       the number of unexpected results that occurred, or -1 if there is an
1368           error.
1369
1370     """
1371     _set_up_derived_options(port, options)
1372
1373     printer = printing.Printer(port, options, regular_output, buildbot_output,
1374         int(options.child_processes), options.experimental_fully_parallel)
1375     if options.help_printing:
1376         printer.help_printing()
1377         printer.cleanup()
1378         return 0
1379
1380     last_unexpected_results = _gather_unexpected_results(options)
1381     if options.print_last_failures:
1382         printer.write("\n".join(last_unexpected_results) + "\n")
1383         printer.cleanup()
1384         return 0
1385
1386     # We wrap any parts of the run that are slow or likely to raise exceptions
1387     # in a try/finally to ensure that we clean up the logging configuration.
1388     num_unexpected_results = -1
1389     try:
1390         test_runner = TestRunner(port, options, printer)
1391         test_runner._print_config()
1392
1393         printer.print_update("Collecting tests ...")
1394         try:
1395             test_runner.collect_tests(args, last_unexpected_results)
1396         except IOError, e:
1397             if e.errno == errno.ENOENT:
1398                 return -1
1399             raise
1400
1401         printer.print_update("Parsing expectations ...")
1402         if options.lint_test_files:
1403             return test_runner.lint()
1404         test_runner.parse_expectations(port.test_platform_name(),
1405                                        options.configuration == 'Debug')
1406
1407         printer.print_update("Checking build ...")
1408         if not port.check_build(test_runner.needs_http()):
1409             _log.error("Build check failed")
1410             return -1
1411
1412         result_summary = test_runner.set_up_run()
1413         if result_summary:
1414             num_unexpected_results = test_runner.run(result_summary)
1415             test_runner.clean_up_run()
1416             _log.debug("Testing completed, Exit status: %d" %
1417                        num_unexpected_results)
1418     finally:
1419         printer.cleanup()
1420
1421     return num_unexpected_results
1422
1423
1424 def _set_up_derived_options(port_obj, options):
1425     """Sets the options values that depend on other options values."""
1426
1427     if not options.child_processes:
1428         # FIXME: Investigate perf/flakiness impact of using cpu_count + 1.
1429         options.child_processes = os.environ.get("WEBKIT_TEST_CHILD_PROCESSES",
1430                                                  str(port_obj.default_child_processes()))
1431
1432     if not options.configuration:
1433         options.configuration = port_obj.default_configuration()
1434
1435     if options.pixel_tests is None:
1436         options.pixel_tests = True
1437
1438     if not options.use_apache:
1439         options.use_apache = sys.platform in ('darwin', 'linux2')
1440
1441     if not os.path.isabs(options.results_directory):
1442         # This normalizes the path to the build dir.
1443         # FIXME: how this happens is not at all obvious; this is a dumb
1444         # interface and should be cleaned up.
1445         options.results_directory = port_obj.results_directory()
1446
1447     if not options.time_out_ms:
1448         if options.configuration == "Debug":
1449             options.time_out_ms = str(2 * TestRunner.DEFAULT_TEST_TIMEOUT_MS)
1450         else:
1451             options.time_out_ms = str(TestRunner.DEFAULT_TEST_TIMEOUT_MS)
1452
1453     options.slow_time_out_ms = str(5 * int(options.time_out_ms))
1454
1455
1456 def _gather_unexpected_results(options):
1457     """Returns the unexpected results from the previous run, if any."""
1458     last_unexpected_results = []
1459     if options.print_last_failures or options.retest_last_failures:
1460         unexpected_results_filename = os.path.join(
1461         options.results_directory, "unexpected_results.json")
1462         with codecs.open(unexpected_results_filename, "r", "utf-8") as file:
1463             results = simplejson.load(file)
1464         last_unexpected_results = results['tests'].keys()
1465     return last_unexpected_results
1466
1467
1468 def _compat_shim_callback(option, opt_str, value, parser):
1469     print "Ignoring unsupported option: %s" % opt_str
1470
1471
1472 def _compat_shim_option(option_name, **kwargs):
1473     return optparse.make_option(option_name, action="callback",
1474         callback=_compat_shim_callback,
1475         help="Ignored, for old-run-webkit-tests compat only.", **kwargs)
1476
1477
1478 def parse_args(args=None):
1479     """Provides a default set of command line args.
1480
1481     Returns a tuple of options, args from optparse"""
1482
1483     # FIXME: All of these options should be stored closer to the code which
1484     # FIXME: actually uses them. configuration_options should move
1485     # FIXME: to WebKitPort and be shared across all scripts.
1486     configuration_options = [
1487         optparse.make_option("-t", "--target", dest="configuration",
1488                              help="(DEPRECATED)"),
1489         # FIXME: --help should display which configuration is default.
1490         optparse.make_option('--debug', action='store_const', const='Debug',
1491                              dest="configuration",
1492                              help='Set the configuration to Debug'),
1493         optparse.make_option('--release', action='store_const',
1494                              const='Release', dest="configuration",
1495                              help='Set the configuration to Release'),
1496         # old-run-webkit-tests also accepts -c, --configuration CONFIGURATION.
1497     ]
1498
1499     print_options = printing.print_options()
1500
1501     # FIXME: These options should move onto the ChromiumPort.
1502     chromium_options = [
1503         optparse.make_option("--chromium", action="store_true", default=False,
1504             help="use the Chromium port"),
1505         optparse.make_option("--startup-dialog", action="store_true",
1506             default=False, help="create a dialog on DumpRenderTree startup"),
1507         optparse.make_option("--gp-fault-error-box", action="store_true",
1508             default=False, help="enable Windows GP fault error box"),
1509         optparse.make_option("--multiple-loads",
1510             type="int", help="turn on multiple loads of each test"),
1511         optparse.make_option("--js-flags",
1512             type="string", help="JavaScript flags to pass to tests"),
1513         optparse.make_option("--nocheck-sys-deps", action="store_true",
1514             default=False,
1515             help="Don't check the system dependencies (themes)"),
1516         optparse.make_option("--use-drt", action="store_true",
1517             default=None,
1518             help="Use DumpRenderTree instead of test_shell"),
1519         optparse.make_option("--accelerated-compositing",
1520             action="store_true",
1521             help="Use hardware-accelated compositing for rendering"),
1522         optparse.make_option("--no-accelerated-compositing",
1523             action="store_false",
1524             dest="accelerated_compositing",
1525             help="Don't use hardware-accelerated compositing for rendering"),
1526         optparse.make_option("--accelerated-2d-canvas",
1527             action="store_true",
1528             help="Use hardware-accelerated 2D Canvas calls"),
1529         optparse.make_option("--no-accelerated-2d-canvas",
1530             action="store_false",
1531             dest="accelerated_2d_canvas",
1532             help="Don't use hardware-accelerated 2D Canvas calls"),
1533     ]
1534
1535     # Missing Mac-specific old-run-webkit-tests options:
1536     # FIXME: Need: -g, --guard for guard malloc support on Mac.
1537     # FIXME: Need: -l --leaks    Enable leaks checking.
1538     # FIXME: Need: --sample-on-timeout Run sample on timeout
1539
1540     old_run_webkit_tests_compat = [
1541         # NRWT doesn't generate results by default anyway.
1542         _compat_shim_option("--no-new-test-results"),
1543         # NRWT doesn't sample on timeout yet anyway.
1544         _compat_shim_option("--no-sample-on-timeout"),
1545         # FIXME: NRWT needs to support remote links eventually.
1546         _compat_shim_option("--use-remote-links-to-tests"),
1547         # FIXME: NRWT doesn't need this option as much since failures are
1548         # designed to be cheap.  We eventually plan to add this support.
1549         _compat_shim_option("--exit-after-n-failures", nargs=1, type="int"),
1550     ]
1551
1552     results_options = [
1553         # NEED for bots: --use-remote-links-to-tests Link to test files
1554         # within the SVN repository in the results.
1555         optparse.make_option("-p", "--pixel-tests", action="store_true",
1556             dest="pixel_tests", help="Enable pixel-to-pixel PNG comparisons"),
1557         optparse.make_option("--no-pixel-tests", action="store_false",
1558             dest="pixel_tests", help="Disable pixel-to-pixel PNG comparisons"),
1559         optparse.make_option("--tolerance",
1560             help="Ignore image differences less than this percentage (some "
1561                 "ports may ignore this option)", type="float"),
1562         optparse.make_option("--results-directory",
1563             default="layout-test-results",
1564             help="Output results directory source dir, relative to Debug or "
1565                  "Release"),
1566         optparse.make_option("--new-baseline", action="store_true",
1567             default=False, help="Save all generated results as new baselines "
1568                  "into the platform directory, overwriting whatever's "
1569                  "already there."),
1570         optparse.make_option("--reset-results", action="store_true",
1571             default=False, help="Reset any existing baselines to the "
1572                  "generated results"),
1573         optparse.make_option("--no-show-results", action="store_false",
1574             default=True, dest="show_results",
1575             help="Don't launch a browser with results after the tests "
1576                  "are done"),
1577         # FIXME: We should have a helper function to do this sort of
1578         # deprectated mapping and automatically log, etc.
1579         optparse.make_option("--noshow-results", action="store_false",
1580             dest="show_results",
1581             help="Deprecated, same as --no-show-results."),
1582         optparse.make_option("--no-launch-safari", action="store_false",
1583             dest="show_results",
1584             help="old-run-webkit-tests compat, same as --noshow-results."),
1585         # old-run-webkit-tests:
1586         # --[no-]launch-safari    Launch (or do not launch) Safari to display
1587         #                         test results (default: launch)
1588         optparse.make_option("--full-results-html", action="store_true",
1589             default=False,
1590             help="Show all failures in results.html, rather than only "
1591                  "regressions"),
1592         optparse.make_option("--clobber-old-results", action="store_true",
1593             default=False, help="Clobbers test results from previous runs."),
1594         optparse.make_option("--platform",
1595             help="Override the platform for expected results"),
1596         optparse.make_option("--no-record-results", action="store_false",
1597             default=True, dest="record_results",
1598             help="Don't record the results."),
1599         # old-run-webkit-tests also has HTTP toggle options:
1600         # --[no-]http                     Run (or do not run) http tests
1601         #                                 (default: run)
1602     ]
1603
1604     test_options = [
1605         optparse.make_option("--build", dest="build",
1606             action="store_true", default=True,
1607             help="Check to ensure the DumpRenderTree build is up-to-date "
1608                  "(default)."),
1609         optparse.make_option("--no-build", dest="build",
1610             action="store_false", help="Don't check to see if the "
1611                                        "DumpRenderTree build is up-to-date."),
1612         # old-run-webkit-tests has --valgrind instead of wrapper.
1613         optparse.make_option("--wrapper",
1614             help="wrapper command to insert before invocations of "
1615                  "DumpRenderTree; option is split on whitespace before "
1616                  "running. (Example: --wrapper='valgrind --smc-check=all')"),
1617         # old-run-webkit-tests:
1618         # -i|--ignore-tests               Comma-separated list of directories
1619         #                                 or tests to ignore
1620         optparse.make_option("--test-list", action="append",
1621             help="read list of tests to run from file", metavar="FILE"),
1622         # old-run-webkit-tests uses --skipped==[default|ignore|only]
1623         # instead of --force:
1624         optparse.make_option("--force", action="store_true", default=False,
1625             help="Run all tests, even those marked SKIP in the test list"),
1626         optparse.make_option("--use-apache", action="store_true",
1627             default=False, help="Whether to use apache instead of lighttpd."),
1628         optparse.make_option("--time-out-ms",
1629             help="Set the timeout for each test"),
1630         # old-run-webkit-tests calls --randomize-order --random:
1631         optparse.make_option("--randomize-order", action="store_true",
1632             default=False, help=("Run tests in random order (useful "
1633                                 "for tracking down corruption)")),
1634         optparse.make_option("--run-chunk",
1635             help=("Run a specified chunk (n:l), the nth of len l, "
1636                  "of the layout tests")),
1637         optparse.make_option("--run-part", help=("Run a specified part (n:m), "
1638                   "the nth of m parts, of the layout tests")),
1639         # old-run-webkit-tests calls --batch-size: --nthly n
1640         #   Restart DumpRenderTree every n tests (default: 1000)
1641         optparse.make_option("--batch-size",
1642             help=("Run a the tests in batches (n), after every n tests, "
1643                   "DumpRenderTree is relaunched."), type="int", default=0),
1644         # old-run-webkit-tests calls --run-singly: -1|--singly
1645         # Isolate each test case run (implies --nthly 1 --verbose)
1646         optparse.make_option("--run-singly", action="store_true",
1647             default=False, help="run a separate DumpRenderTree for each test"),
1648         optparse.make_option("--child-processes",
1649             help="Number of DumpRenderTrees to run in parallel."),
1650         # FIXME: Display default number of child processes that will run.
1651         optparse.make_option("--experimental-fully-parallel",
1652             action="store_true", default=False,
1653             help="run all tests in parallel"),
1654         # FIXME: Need --exit-after-n-failures N
1655         #      Exit after the first N failures instead of running all tests
1656         # FIXME: Need --exit-after-n-crashes N
1657         #      Exit after the first N crashes instead of running all tests
1658         # FIXME: consider: --iterations n
1659         #      Number of times to run the set of tests (e.g. ABCABCABC)
1660         optparse.make_option("--print-last-failures", action="store_true",
1661             default=False, help="Print the tests in the last run that "
1662             "had unexpected failures (or passes)."),
1663         optparse.make_option("--retest-last-failures", action="store_true",
1664             default=False, help="re-test the tests in the last run that "
1665             "had unexpected failures (or passes)."),
1666         optparse.make_option("--retry-failures", action="store_true",
1667             default=True,
1668             help="Re-try any tests that produce unexpected results (default)"),
1669         optparse.make_option("--no-retry-failures", action="store_false",
1670             dest="retry_failures",
1671             help="Don't re-try any tests that produce unexpected results."),
1672     ]
1673
1674     misc_options = [
1675         optparse.make_option("--lint-test-files", action="store_true",
1676         default=False, help=("Makes sure the test files parse for all "
1677                             "configurations. Does not run any tests.")),
1678     ]
1679
1680     # FIXME: Move these into json_results_generator.py
1681     results_json_options = [
1682         optparse.make_option("--master-name", help="The name of the buildbot master."),
1683         optparse.make_option("--builder-name", default="DUMMY_BUILDER_NAME",
1684             help=("The name of the builder shown on the waterfall running "
1685                   "this script e.g. WebKit.")),
1686         optparse.make_option("--build-name", default="DUMMY_BUILD_NAME",
1687             help=("The name of the builder used in its path, e.g. "
1688                   "webkit-rel.")),
1689         optparse.make_option("--build-number", default="DUMMY_BUILD_NUMBER",
1690             help=("The build number of the builder running this script.")),
1691         optparse.make_option("--test-results-server", default="",
1692             help=("If specified, upload results json files to this appengine "
1693                   "server.")),
1694         optparse.make_option("--upload-full-results",
1695             action="store_true",
1696             default=False,
1697             help="If true, upload full json results to server."),
1698     ]
1699
1700     option_list = (configuration_options + print_options +
1701                    chromium_options + results_options + test_options +
1702                    misc_options + results_json_options +
1703                    old_run_webkit_tests_compat)
1704     option_parser = optparse.OptionParser(option_list=option_list)
1705
1706     options, args = option_parser.parse_args(args)
1707
1708     return options, args
1709
1710
1711 def _log_wedged_thread(thread):
1712     """Log information about the given thread state."""
1713     id = thread.id()
1714     stack = dump_render_tree_thread.find_thread_stack(id)
1715     assert(stack is not None)
1716     _log.error("")
1717     _log.error("thread %s (%d) is wedged" % (thread.getName(), id))
1718     dump_render_tree_thread.log_stack(stack)
1719     _log.error("")
1720
1721
1722 def main():
1723     options, args = parse_args()
1724     port_obj = port.get(options.platform, options)
1725     return run(port_obj, options, args)
1726
1727 if '__main__' == __name__:
1728     try:
1729         sys.exit(main())
1730     except KeyboardInterrupt:
1731         # this mirrors what the shell normally does
1732         sys.exit(signal.SIGINT + 128)