Merge WebKit at r84325: Initial merge by git.

[android-x86/external-webkit.git] / Tools / Scripts / VCSUtils.pm
diff --git a/Tools/Scripts/VCSUtils.pm b/Tools/Scripts/VCSUtils.pm

index faed7ed..8353f25 100644 (file)
--- a/Tools/Scripts/VCSUtils.pm
+++ b/Tools/Scripts/VCSUtils.pm
@@ -1,6 +1,6 @@
  # Copyright (C) 2007, 2008, 2009 Apple Inc.  All rights reserved.
  # Copyright (C) 2009, 2010 Chris Jerdonek (chris.jerdonek@gmail.com)
-# Copyright (C) Research In Motion Limited 2010. All rights reserved.
+# Copyright (C) 2010, 2011 Research In Motion Limited. All rights reserved.
  #
  # Redistribution and use in source and binary forms, with or without
  # modification, are permitted provided that the following conditions
@@ -44,11 +44,13 @@ BEGIN {
      $VERSION     = 1.00;
      @ISA         = qw(Exporter);
      @EXPORT      = qw(
+        &applyGitBinaryPatchDelta
          &callSilently
          &canonicalizePath
          &changeLogEmailAddress
          &changeLogName
          &chdirReturningRelativePath
+        &decodeGitBinaryChunk
          &decodeGitBinaryPatch
          &determineSVNRoot
          &determineVCSRoot
@@ -57,6 +59,7 @@ BEGIN {
          &gitBranch
          &gitdiff2svndiff
          &isGit
+        &isGitSVN
          &isGitBranchBuild
          &isGitDirectory
          &isSVN
@@ -65,6 +68,7 @@ BEGIN {
          &makeFilePathRelative
          &mergeChangeLogs
          &normalizePath
+        &parseFirstEOL
          &parsePatch
          &pathRelativeToSVNRepositoryRootForPath
          &prepareParsedPatch
@@ -86,6 +90,7 @@ our @EXPORT_OK;
  my $gitBranch;
  my $gitRoot;
  my $isGit;
+my $isGitSVN;
  my $isGitBranchBuild;
  my $isSVN;
  my $svnVersion;
@@ -93,6 +98,7 @@ my $svnVersion;
  # Project time zone for Cupertino, CA, US
  my $changeLogTimeZone = "PST8PDT";
  
+my $chunkRangeRegEx = qr#^\@\@ -(\d+),(\d+) \+\d+,(\d+) \@\@$#; # e.g. @@ -2,6 +2,18 @@
  my $gitDiffStartRegEx = qr#^diff --git (\w/)?(.+) (\w/)?([^\r\n]+)#;
  my $svnDiffStartRegEx = qr#^Index: ([^\r\n]+)#;
  my $svnPropertiesStartRegEx = qr#^Property changes on: ([^\r\n]+)#; # $1 is normally the same as the index path.
@@ -199,6 +205,18 @@ sub isGit()
      return $isGit;
  }
  
+sub isGitSVN()
+{
+    return $isGitSVN if defined $isGitSVN;
+
+    # There doesn't seem to be an officially documented way to determine
+    # if you're in a git-svn checkout. The best suggestions seen so far
+    # all use something like the following:
+    my $output = `git config --get svn-remote.svn.fetch 2>& 1`;
+    $isGitSVN = $output ne '';
+    return $isGitSVN;
+}
+
  sub gitBranch()
  {
      unless (defined $gitBranch) {
@@ -392,6 +410,19 @@ sub normalizePath($)
      return $path;
  }
  
+sub adjustPathForRecentRenamings($)
+{
+    my ($fullPath) = @_;
+
+    if ($fullPath =~ m|^WebCore/|
+        || $fullPath =~ m|^JavaScriptCore/|
+        || $fullPath =~ m|^WebKit/|
+        || $fullPath =~ m|^WebKit2/|) {
+        return "Source/$fullPath";
+    }
+    return $fullPath;
+}
+
  sub canonicalizePath($)
  {
      my ($file) = @_;
@@ -420,6 +451,40 @@ sub removeEOL($)
      return $line;
  }
  
+sub parseFirstEOL($)
+{
+    my ($fileHandle) = @_;
+
+    # Make input record separator the new-line character to simplify regex matching below.
+    my $savedInputRecordSeparator = $INPUT_RECORD_SEPARATOR;
+    $INPUT_RECORD_SEPARATOR = "\n";
+    my $firstLine  = <$fileHandle>;
+    $INPUT_RECORD_SEPARATOR = $savedInputRecordSeparator;
+
+    return unless defined($firstLine);
+
+    my $eol;
+    if ($firstLine =~ /\r\n/) {
+        $eol = "\r\n";
+    } elsif ($firstLine =~ /\r/) {
+        $eol = "\r";
+    } elsif ($firstLine =~ /\n/) {
+        $eol = "\n";
+    }
+    return $eol;
+}
+
+sub firstEOLInFile($)
+{
+    my ($file) = @_;
+    my $eol;
+    if (open(FILE, $file)) {
+        $eol = parseFirstEOL(*FILE);
+        close(FILE);
+    }
+    return $eol;
+}
+
  sub svnStatus($)
  {
      my ($fullPath) = @_;
@@ -511,7 +576,7 @@ sub parseGitDiffHeader($$)
          # The first and second paths can differ in the case of copies
          # and renames.  We use the second file path because it is the
          # destination path.
-        $indexPath = $4;
+        $indexPath = adjustPathForRecentRenamings($4);
          # Use $POSTMATCH to preserve the end-of-line character.
          $_ = "Index: $indexPath$POSTMATCH"; # Convert to SVN format.
      } else {
@@ -627,7 +692,7 @@ sub parseSvnDiffHeader($$)
  
      my $indexPath;
      if (/$svnDiffStartRegEx/) {
-        $indexPath = $1;
+        $indexPath = adjustPathForRecentRenamings($1);
      } else {
          die("First line of SVN diff does not begin with \"Index \": \"$_\"");
      }
@@ -793,23 +858,30 @@ sub parseDiffHeader($$)
  #   $fileHandle: a file handle advanced to the first line of the next
  #                header block. Leading junk is okay.
  #   $line: the line last read from $fileHandle.
+#   $optionsHashRef: a hash reference representing optional options to use
+#                    when processing a diff.
+#     shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead
+#                               instead of the line endings in the target file; the
+#                               value of 1 if svnConvertedText should use the line
+#                               endings in the diff.
  #
  # Returns ($diffHashRefs, $lastReadLine):
  #   $diffHashRefs: A reference to an array of references to %diffHash hashes.
  #                  See the %diffHash documentation above.
  #   $lastReadLine: the line last read from $fileHandle
-sub parseDiff($$)
+sub parseDiff($$;$)
  {
      # FIXME: Adjust this method so that it dies if the first line does not
      #        match the start of a diff.  This will require a change to
      #        parsePatch() so that parsePatch() skips over leading junk.
-    my ($fileHandle, $line) = @_;
+    my ($fileHandle, $line, $optionsHashRef) = @_;
  
      my $headerStartRegEx = $svnDiffStartRegEx; # SVN-style header for the default
  
      my $headerHashRef; # Last header found, as returned by parseDiffHeader().
      my $svnPropertiesHashRef; # Last SVN properties diff found, as returned by parseSvnDiffProperties().
      my $svnText;
+    my $indexPathEOL;
      while (defined($line)) {
          if (!$headerHashRef && ($line =~ $gitDiffStartRegEx)) {
              # Then assume all diffs in the patch are Git-formatted. This
@@ -832,6 +904,11 @@ sub parseDiff($$)
          }
          if ($line !~ $headerStartRegEx) {
              # Then we are in the body of the diff.
+            if ($indexPathEOL && $line !~ /$chunkRangeRegEx/) {
+                # The chunk range is part of the body of the diff, but its line endings should't be
+                # modified or patch(1) will complain. So, we only modify non-chunk range lines.
+                $line =~ s/\r\n|\r|\n/$indexPathEOL/g;
+            }
              $svnText .= $line;
              $line = <$fileHandle>;
              next;
@@ -844,6 +921,9 @@ sub parseDiff($$)
          }
  
          ($headerHashRef, $line) = parseDiffHeader($fileHandle, $line);
+        if (!$optionsHashRef || !$optionsHashRef->{shouldNotUseIndexPathEOL}) {
+            $indexPathEOL = firstEOLInFile($headerHashRef->{indexPath}) if !$headerHashRef->{isNew} && !$headerHashRef->{isBinary};
+        }
  
          $svnText .= $headerHashRef->{svnConvertedText};
      }
@@ -1138,13 +1218,19 @@ sub parseSvnPropertyValue($$)
  # Args:
  #   $fileHandle: A file handle to the patch file that has not yet been
  #                read from.
+#   $optionsHashRef: a hash reference representing optional options to use
+#                    when processing a diff.
+#     shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead
+#                               instead of the line endings in the target file; the
+#                               value of 1 if svnConvertedText should use the line
+#                               endings in the diff.
  #
  # Returns:
  #   @diffHashRefs: an array of diff hash references.
  #                  See the %diffHash documentation above.
-sub parsePatch($)
+sub parsePatch($;$)
  {
-    my ($fileHandle) = @_;
+    my ($fileHandle, $optionsHashRef) = @_;
  
      my $newDiffHashRefs;
      my @diffHashRefs; # return value
@@ -1153,7 +1239,7 @@ sub parsePatch($)
  
      while (defined($line)) { # Otherwise, at EOF.
  
-        ($newDiffHashRefs, $line) = parseDiff($fileHandle, $line);
+        ($newDiffHashRefs, $line) = parseDiff($fileHandle, $line, $optionsHashRef);
  
          push @diffHashRefs, @$newDiffHashRefs;
      }
@@ -1304,12 +1390,7 @@ sub setChangeLogDateAndReviewer($$$)
  # Returns $changeLogHashRef:
  #   $changeLogHashRef: a hash reference representing a change log patch.
  #     patch: a ChangeLog patch equivalent to the given one, but with the
-#            newest ChangeLog entry inserted at the top of the file, if possible.
-#     hasOverlappingLines: the value 1 if the change log entry overlaps
-#                          some lines of another change log entry. This can
-#                          happen when deliberately inserting a new ChangeLog
-#                          entry earlier in the file above an entry with
-#                          the same date and author.                     
+#            newest ChangeLog entry inserted at the top of the file, if possible.              
  sub fixChangeLogPatch($)
  {
      my $patch = shift; # $patch will only contain patch fragments for ChangeLog.
@@ -1403,9 +1484,19 @@ sub fixChangeLogPatch($)
          $lines[$i] = "+$text";
      }
  
-    # Finish moving whatever overlapping lines remain, and update
-    # the initial chunk range.
-    my $chunkRangeRegEx = '^\@\@ -(\d+),(\d+) \+\d+,(\d+) \@\@$'; # e.g. @@ -2,6 +2,18 @@
+    # If @overlappingLines > 0, this is where we make use of the
+    # assumption that the beginning of the source file was not modified.
+    splice(@lines, $chunkStartIndex, 0, @overlappingLines);
+
+    # Update the date start index as it may have changed after shifting
+    # the overlapping lines towards the front.
+    for ($i = $chunkStartIndex; $i < $dateStartIndex; ++$i) {
+        $dateStartIndex = $i if $lines[$i] =~ /$dateStartRegEx/;
+    }
+    splice(@lines, $chunkStartIndex, $dateStartIndex - $chunkStartIndex); # Remove context of later entry.
+    $deletedLineCount += $dateStartIndex - $chunkStartIndex;
+
+    # Update the initial chunk range.
      if ($lines[$chunkStartIndex - 1] !~ /$chunkRangeRegEx/) {
          # FIXME: Handle errors differently from ChangeLog files that
          # are okay but should not be altered. That way we can find out
@@ -1413,21 +1504,9 @@ sub fixChangeLogPatch($)
          $changeLogHashRef{patch} = $patch; # Error: unexpected patch string format.
          return \%changeLogHashRef;
      }
-    my $skippedFirstLineCount = $1 - 1;
      my $oldSourceLineCount = $2;
      my $oldTargetLineCount = $3;
  
-    if (@overlappingLines != $skippedFirstLineCount) {
-        # This can happen, for example, when deliberately inserting
-        # a new ChangeLog entry earlier in the file.
-        $changeLogHashRef{hasOverlappingLines} = 1;
-        $changeLogHashRef{patch} = $patch;
-        return \%changeLogHashRef;
-    }
-    # If @overlappingLines > 0, this is where we make use of the
-    # assumption that the beginning of the source file was not modified.
-    splice(@lines, $chunkStartIndex, 0, @overlappingLines);
-
      my $sourceLineCount = $oldSourceLineCount + @overlappingLines - $deletedLineCount;
      my $targetLineCount = $oldTargetLineCount + @overlappingLines - $deletedLineCount;
      $lines[$chunkStartIndex - 1] = "@@ -1,$sourceLineCount +1,$targetLineCount @@";
@@ -1738,7 +1817,6 @@ sub decodeGitBinaryPatch($$)
      #
      # Each chunk a line which starts from either "literal" or "delta",
      # followed by a number which specifies decoded size of the chunk.
-    # The "delta" type chunks aren't supported by this function yet.
      #
      # Then, content of the chunk comes. To decode the content, we
      # need decode it with base85 first, and then zlib.
@@ -1759,10 +1837,94 @@ sub decodeGitBinaryPatch($$)
      my $reverseBinaryChunk = decodeGitBinaryChunk($encodedReverseChunk, $fullPath);
      my $reverseBinaryChunkActualSize = length($reverseBinaryChunk);
  
-    die "$fullPath: unexpected size of the first chunk (expected $binaryChunkExpectedSize but was $binaryChunkActualSize" if ($binaryChunkExpectedSize != $binaryChunkActualSize);
-    die "$fullPath: unexpected size of the second chunk (expected $reverseBinaryChunkExpectedSize but was $reverseBinaryChunkActualSize" if ($reverseBinaryChunkExpectedSize != $reverseBinaryChunkActualSize);
+    die "$fullPath: unexpected size of the first chunk (expected $binaryChunkExpectedSize but was $binaryChunkActualSize" if ($binaryChunkType eq "literal" and $binaryChunkExpectedSize != $binaryChunkActualSize);
+    die "$fullPath: unexpected size of the second chunk (expected $reverseBinaryChunkExpectedSize but was $reverseBinaryChunkActualSize" if ($reverseBinaryChunkType eq "literal" and $reverseBinaryChunkExpectedSize != $reverseBinaryChunkActualSize);
  
      return ($binaryChunkType, $binaryChunk, $reverseBinaryChunkType, $reverseBinaryChunk);
  }
  
+sub readByte($$)
+{
+    my ($data, $location) = @_;
+    
+    # Return the byte at $location in $data as a numeric value. 
+    return ord(substr($data, $location, 1));
+}
+
+# The git binary delta format is undocumented, except in code:
+# - https://github.com/git/git/blob/master/delta.h:get_delta_hdr_size is the source
+#   of the algorithm in decodeGitBinaryPatchDeltaSize.
+# - https://github.com/git/git/blob/master/patch-delta.c:patch_delta is the source
+#   of the algorithm in applyGitBinaryPatchDelta.
+sub decodeGitBinaryPatchDeltaSize($)
+{
+    my ($binaryChunk) = @_;
+    
+    # Source and destination buffer sizes are stored in 7-bit chunks at the
+    # start of the binary delta patch data.  The highest bit in each byte
+    # except the last is set; the remaining 7 bits provide the next
+    # chunk of the size.  The chunks are stored in ascending significance
+    # order.
+    my $cmd;
+    my $size = 0;
+    my $shift = 0;
+    for (my $i = 0; $i < length($binaryChunk);) {
+        $cmd = readByte($binaryChunk, $i++);
+        $size |= ($cmd & 0x7f) << $shift;
+        $shift += 7;
+        if (!($cmd & 0x80)) {
+            return ($size, $i);
+        }
+    }
+}
+
+sub applyGitBinaryPatchDelta($$)
+{
+    my ($binaryChunk, $originalContents) = @_;
+    
+    # Git delta format consists of two headers indicating source buffer size
+    # and result size, then a series of commands.  Each command is either
+    # a copy-from-old-version (the 0x80 bit is set) or a copy-from-delta
+    # command.  Commands are applied sequentially to generate the result.
+    #
+    # A copy-from-old-version command encodes an offset and size to copy
+    # from in subsequent bits, while a copy-from-delta command consists only
+    # of the number of bytes to copy from the delta.
+
+    # We don't use these values, but we need to know how big they are so that
+    # we can skip to the diff data.
+    my ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk);
+    $binaryChunk = substr($binaryChunk, $bytesUsed);
+    ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk);
+    $binaryChunk = substr($binaryChunk, $bytesUsed);
+
+    my $out = "";
+    for (my $i = 0; $i < length($binaryChunk); ) {
+        my $cmd = ord(substr($binaryChunk, $i++, 1));
+        if ($cmd & 0x80) {
+            # Extract an offset and size from the delta data, then copy
+            # $size bytes from $offset in the original data into the output.
+            my $offset = 0;
+            my $size = 0;
+            if ($cmd & 0x01) { $offset = readByte($binaryChunk, $i++); }
+            if ($cmd & 0x02) { $offset |= readByte($binaryChunk, $i++) << 8; }
+            if ($cmd & 0x04) { $offset |= readByte($binaryChunk, $i++) << 16; }
+            if ($cmd & 0x08) { $offset |= readByte($binaryChunk, $i++) << 24; }
+            if ($cmd & 0x10) { $size = readByte($binaryChunk, $i++); }
+            if ($cmd & 0x20) { $size |= readByte($binaryChunk, $i++) << 8; }
+            if ($cmd & 0x40) { $size |= readByte($binaryChunk, $i++) << 16; }
+            if ($size == 0) { $size = 0x10000; }
+            $out .= substr($originalContents, $offset, $size);
+        } elsif ($cmd) {
+            # Copy $cmd bytes from the delta data into the output.
+            $out .= substr($binaryChunk, $i, $cmd);
+            $i += $cmd;
+        } else {
+            die "unexpected delta opcode 0";
+        }
+    }
+
+    return $out;
+}
+
  1;