PATCH: [ 1409651 ] fix diffutils handling trailing whitespace

author Perry Rapp <elsapo@users.sourceforge.net>

Fri, 3 Feb 2006 23:45:06 +0000 (23:45 +0000)

committer Perry Rapp <elsapo@users.sourceforge.net>

Fri, 3 Feb 2006 23:45:06 +0000 (23:45 +0000)
author Perry Rapp <elsapo@users.sourceforge.net>
Fri, 3 Feb 2006 23:45:06 +0000 (23:45 +0000)
committer Perry Rapp <elsapo@users.sourceforge.net>
Fri, 3 Feb 2006 23:45:06 +0000 (23:45 +0000)
diff --git a/Src/Changes.txt b/Src/Changes.txt

index c1bad4f..1074c96 100644 (file)
--- a/Src/Changes.txt
+++ b/Src/Changes.txt
@@ -1,4 +1,12 @@
  2006-02-03 Perry
+ PATCH: [ 1409651 ] fix diffutils handling trailing whitespace
+  Src/diffutils/src: IO.C UTIL.C
+ Current selftests status: Failure count: 24 (of 480)
+  Failing -b t002 (u & m platforms, all 4 builds)
+  Failing -bi t002 (u & m platforms, all 4 builds)
+  Failing -b bug1406950 (u & m platforms, all 4 builds)
+
+2006-02-03 Perry
   PATCH: [ 1423060 ] Modify perl selftest to test all 4 builds
   Changes to selftest
   Current selftests status: Failure count: 36 (of 480)
diff --git a/Src/diffutils/src/IO.C b/Src/diffutils/src/IO.C

index 580cb1f..8b468ab 100644 (file)
--- a/Src/diffutils/src/IO.C
+++ b/Src/diffutils/src/IO.C
@@ -213,6 +213,12 @@ slurp (current)
      }
  }
  
+static int
+ISWSPACE (char ch)
+{
+       return ch==' ' || ch=='\t';
+}
+
  /* Split the file into lines, simultaneously computing the equivalence class for
     each line. */
  static void
@@ -270,7 +276,7 @@ find_and_hash_each_line (current)
               {
                 if (ignore_eol_diff && (c=='\r' || c=='\n'))
                   continue;
-               if (! isspace (c))
+               if (! ISWSPACE (c))
                   h = HASH (h, isupper (c) ? tolower (c) : c);
               }
           else if (ignore_space_change_flag)
@@ -278,12 +284,36 @@ find_and_hash_each_line (current)
               {
                 if (ignore_eol_diff && (c=='\r' || c=='\n'))
                   continue;
-               if (isspace (c))
+               if (ISWSPACE (c))
                   {
-                   while (isspace (c = *p++))
-                     if (c == '\n' || (c == '\r' && *p != '\n'))
-                       goto hashing_done;
-                   h = HASH (h, ' ');
+                   /* skip whitespace after whitespace */
+                   while (ISWSPACE (c = *p++))
+                     ;
+                   if (c=='\n')
+                     {
+                       goto hashing_done; /* never hash trailing \n */
+                     }
+                   else if (c == '\r')
+                     {
+                       /*
+                           \r must be hashed if !ignore_eol_diff
+                           Also, we must always advance to end of line
+                           which means we can only stop on \r if not
+                           followed by \n
+                       */
+                       if (ignore_eol_diff)
+                         {
+                           if (*p == '\n') /* continue to LF after CR */
+                             continue;
+                           else
+                             goto hashing_done;
+                         }
+                     }
+                   else
+                     {
+                       /* runs of whitespace not ending line hashed as one space */
+                       h = HASH (h, ' ');
+                     }
                   }
                 /* c is now the first non-space.  */
                 h = HASH (h, isupper (c) ? tolower (c) : c);
@@ -303,7 +333,7 @@ find_and_hash_each_line (current)
               {
                 if (ignore_eol_diff && (c=='\r' || c=='\n'))
                   continue;
-               if (! isspace (c))
+               if (! ISWSPACE (c))
                   h = HASH (h, c);
               }
           else if (ignore_space_change_flag)
@@ -311,14 +341,39 @@ find_and_hash_each_line (current)
               {
                 if (ignore_eol_diff && (c=='\r' || c=='\n'))
                   continue;
-               if (isspace (c))
+               if (ISWSPACE (c))
                   {
-                   while (isspace (c = *p++))
-                     if (c == '\n' || (c == '\r' && *p != '\n'))
-                       goto hashing_done;
-                   h = HASH (h, ' ');
+                   /* skip whitespace after whitespace */
+                   while (ISWSPACE (c = *p++))
+                     ;
+                   if (c=='\n')
+                     {
+                       goto hashing_done; /* never hash trailing \n */
+                     }
+                   else if (c == '\r')
+                     {
+                       /*
+                           \r must be hashed if !ignore_eol_diff
+                           Also, we must always advance to end of line
+                           which means we can only stop on \r if not
+                           followed by \n
+                       */
+                       if (ignore_eol_diff)
+                         {
+                           if (*p == '\n') /* continue to LF after CR */
+                             continue;
+                           else
+                             goto hashing_done;
+                         }
+                     }
+                   else
+                     {
+                       /* runs of whitespace not ending line hashed as one space */
+                       h = HASH (h, ' ');
+                     }
                   }
                 /* c is now the first non-space.  */
+               /* c can be a \r (CR) if !ignore_eol_diff */
                 h = HASH (h, c);
               }
           else
@@ -367,6 +422,7 @@ find_and_hash_each_line (current)
             *bucket = i;
             break;
           }
+       /* "line_cmp" changed to "lines_differ" by diffutils 2.8.1 */
         else if (eqs[i].hash == h
                  && (eqs[i].length == length || varies)
                  && ! line_cmp (eqs[i].line, eqs[i].length, ip, length))
diff --git a/Src/diffutils/src/UTIL.C b/Src/diffutils/src/UTIL.C

index f7213a1..37e09a5 100644 (file)
--- a/Src/diffutils/src/UTIL.C
+++ b/Src/diffutils/src/UTIL.C
@@ -295,6 +295,13 @@ finish_output ()
    outfile = 0;
  }
  \f
+
+static int
+ISWSPACE (char ch)
+{
+       return ch==' ' || ch=='\t';
+}
+
  /* Compare two lines (typically one from each input file)
     according to the command line options.
     Return 1 if the lines differ, like `memcmp'.  */
@@ -347,7 +354,7 @@ line_cmp (s1, len1, s2, len2)
           if (ignore_all_space_flag)
             {
               /* For -w, just skip past any white space.  */
-             while (isspace (c1) && c1 != '\n' && c1 != '\r') 
+             while (ISWSPACE (c1))
                 {
                   if (t1-s1<(int)len1)
                     {
@@ -359,7 +366,7 @@ line_cmp (s1, len1, s2, len2)
                       break;
                     }
                 }
-             while (isspace (c2) && c2 != '\n' && c2 != '\r') 
+             while (ISWSPACE (c2))
                 {
                   if (t2-s2<(int)len2)
                     {
@@ -377,36 +384,46 @@ line_cmp (s1, len1, s2, len2)
               /* For -b, advance past any sequence of white space in line 1
                  and consider it just one Space, or nothing at all
                  if it is at the end of the line.  */
-             if (isspace (c1) && c1 != '\r' && c1 != '\n')
+             if (ISWSPACE (c1))
                 {
                   while (t1-s1<(int)len1)
                     {
                       c1 = *t1++;
-                     if (! isspace (c1))
+                     if (c1 == '\r' || c1 == '\n')
+                       {
+                         /* ignore whitespace but handle \r below (depending on ignore_eol_diff) */
+                         break; 
+                       }
+                     if (! ISWSPACE (c1))
                         {
                           --t1;
                           c1 = ' ';
                           break;
                         }
                     }
-                 if (t1-s1==(int)len1)
+                 if (t1-s1==(int)len1 && c1 != '\r')
                     c1 = 0;
                 }
  
               /* Likewise for line 2.  */
-             if (isspace (c2) && c2 != '\r' && c2 != '\n')
+             if (ISWSPACE (c2))
                 {
                   while (t2-s2<(int)len2)
                     {
                       c2 = *t2++;
-                     if (! isspace (c2))
+                     if (c2 == '\r' || c2 == '\n')
+                       {
+                         /* ignore whitespace but handle \r below (depending on ignore_eol_diff) */
+                         break; 
+                       }
+                     if (! ISWSPACE (c2))
                         {
                           --t2;
                           c2 = ' ';
                           break;
                         }
                     }
-                 if (t2-s2==(int)len2)
+                 if (t2-s2==(int)len2 && c2 != '\r')
                     c2 = 0;
                 }
  
@@ -446,6 +463,14 @@ line_cmp (s1, len1, s2, len2)
                 c2 = (unsigned char)toupper (c2);
             }
  
+         if (ignore_eol_diff)
+           {
+             if (c1 == '\r')
+               c1 = 0;
+             else if (c2 == '\r')
+               c2 = 0;
+           }
+
           if (c1 != c2)
             break;
           }
author	Perry Rapp <elsapo@users.sourceforge.net>
	Fri, 3 Feb 2006 23:45:06 +0000 (23:45 +0000)
committer	Perry Rapp <elsapo@users.sourceforge.net>
	Fri, 3 Feb 2006 23:45:06 +0000 (23:45 +0000)
Src/Changes.txt		patch \| blob \| history
Src/diffutils/src/IO.C		patch \| blob \| history
Src/diffutils/src/UTIL.C		patch \| blob \| history