OSDN Git Service

convert: tighten the safe autocrlf handling
authorTorsten Bögershausen <tboegi@web.de>
Sun, 26 Nov 2017 12:20:52 +0000 (13:20 +0100)
committerJunio C Hamano <gitster@pobox.com>
Mon, 27 Nov 2017 01:17:24 +0000 (10:17 +0900)
When a text file had been commited with CRLF and the file is commited
again, the CRLF are kept if .gitattributs has "text=auto".
This is done by analyzing the content of the blob stored in the index:
If a '\r' is found, Git assumes that the blob was commited with CRLF.

The simple search for a '\r' does not always work as expected:
A file is encoded in UTF-16 with CRLF and commited. Git treats it as binary.
Now the content is converted into UTF-8. At the next commit Git treats the
file as text, the CRLF should be converted into LF, but isn't.

Replace has_cr_in_index() with has_crlf_in_index(). When no '\r' is found,
0 is returned directly, this is the most common case.
If a '\r' is found, the content is analyzed more deeply.

Reported-By: Ashish Negi <ashishnegi33@gmail.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
convert.c
t/t0027-auto-crlf.sh

index d714420..d3b39dd 100644 (file)
--- a/convert.c
+++ b/convert.c
@@ -220,18 +220,27 @@ static void check_safe_crlf(const char *path, enum crlf_action crlf_action,
        }
 }
 
-static int has_cr_in_index(const struct index_state *istate, const char *path)
+static int has_crlf_in_index(const struct index_state *istate, const char *path)
 {
        unsigned long sz;
        void *data;
-       int has_cr;
+       const char *crp;
+       int has_crlf = 0;
 
        data = read_blob_data_from_index(istate, path, &sz);
        if (!data)
                return 0;
-       has_cr = memchr(data, '\r', sz) != NULL;
+
+       crp = memchr(data, '\r', sz);
+       if (crp) {
+               unsigned int ret_stats;
+               ret_stats = gather_convert_stats(data, sz);
+               if (!(ret_stats & CONVERT_STAT_BITS_BIN) &&
+                   (ret_stats & CONVERT_STAT_BITS_TXT_CRLF))
+                       has_crlf = 1;
+       }
        free(data);
-       return has_cr;
+       return has_crlf;
 }
 
 static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats,
@@ -290,7 +299,7 @@ static int crlf_to_git(const struct index_state *istate,
                 * cherry-pick.
                 */
                if ((checksafe != SAFE_CRLF_RENORMALIZE) &&
-                   has_cr_in_index(istate, path))
+                   has_crlf_in_index(istate, path))
                        convert_crlf_into_lf = 0;
        }
        if ((checksafe == SAFE_CRLF_WARN ||
index deb3ae7..c2c208f 100755 (executable)
@@ -43,19 +43,31 @@ create_gitattributes () {
        } >.gitattributes
 }
 
-create_NNO_files () {
+# Create 2 sets of files:
+# The NNO files are "Not NOrmalized in the repo. We use CRLF_mix_LF and store
+#   it under different names for the different test cases, see ${pfx}
+#   Depending on .gitattributes they are normalized at the next commit (or not)
+# The MIX files have different contents in the repo.
+#   Depending on its contents, the "new safer autocrlf" may kick in.
+create_NNO_MIX_files () {
        for crlf in false true input
        do
                for attr in "" auto text -text
                do
                        for aeol in "" lf crlf
                        do
-                               pfx=NNO_attr_${attr}_aeol_${aeol}_${crlf}
+                               pfx=NNO_attr_${attr}_aeol_${aeol}_${crlf} &&
                                cp CRLF_mix_LF ${pfx}_LF.txt &&
                                cp CRLF_mix_LF ${pfx}_CRLF.txt &&
                                cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
                                cp CRLF_mix_LF ${pfx}_LF_mix_CR.txt &&
-                               cp CRLF_mix_LF ${pfx}_CRLF_nul.txt
+                               cp CRLF_mix_LF ${pfx}_CRLF_nul.txt &&
+                               pfx=MIX_attr_${attr}_aeol_${aeol}_${crlf} &&
+                               cp LF          ${pfx}_LF.txt &&
+                               cp CRLF        ${pfx}_CRLF.txt &&
+                               cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
+                               cp LF_mix_CR   ${pfx}_LF_mix_CR.txt &&
+                               cp CRLF_nul    ${pfx}_CRLF_nul.txt
                        done
                done
        done
@@ -136,6 +148,49 @@ commit_chk_wrnNNO () {
        '
 }
 
+# Commit a file with mixed line endings on top of different files
+# in the index. Check for warnings
+commit_MIX_chkwrn () {
+       attr=$1 ; shift
+       aeol=$1 ; shift
+       crlf=$1 ; shift
+       lfwarn=$1 ; shift
+       crlfwarn=$1 ; shift
+       lfmixcrlf=$1 ; shift
+       lfmixcr=$1 ; shift
+       crlfnul=$1 ; shift
+       pfx=MIX_attr_${attr}_aeol_${aeol}_${crlf}
+       #Commit file with CLRF_mix_LF on top of existing file
+       create_gitattributes "$attr" $aeol &&
+       for f in LF CRLF CRLF_mix_LF LF_mix_CR CRLF_nul
+       do
+               fname=${pfx}_$f.txt &&
+               cp CRLF_mix_LF $fname &&
+               printf Z >>"$fname" &&
+               git -c core.autocrlf=$crlf add $fname 2>"${pfx}_$f.err"
+       done
+
+       test_expect_success "commit file with mixed EOL crlf=$crlf attr=$attr LF" '
+               check_warning "$lfwarn" ${pfx}_LF.err
+       '
+       test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF" '
+               check_warning "$crlfwarn" ${pfx}_CRLF.err
+       '
+
+       test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF_mix_LF" '
+               check_warning "$lfmixcrlf" ${pfx}_CRLF_mix_LF.err
+       '
+
+       test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf LF_mix_cr" '
+               check_warning "$lfmixcr" ${pfx}_LF_mix_CR.err
+       '
+
+       test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF_nul" '
+               check_warning "$crlfnul" ${pfx}_CRLF_nul.err
+       '
+}
+
+
 stats_ascii () {
        case "$1" in
        LF)
@@ -323,8 +378,8 @@ test_expect_success 'setup master' '
        printf "\$Id: 0000000000000000000000000000000000000000 \$\r\nLINEONE\r\nLINETWO\rLINETHREE"   >CRLF_mix_CR &&
        printf "\$Id: 0000000000000000000000000000000000000000 \$\r\nLINEONEQ\r\nLINETWO\r\nLINETHREE" | q_to_nul >CRLF_nul &&
        printf "\$Id: 0000000000000000000000000000000000000000 \$\nLINEONEQ\nLINETWO\nLINETHREE" | q_to_nul >LF_nul &&
-       create_NNO_files CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF &&
-       git -c core.autocrlf=false add NNO_*.txt &&
+       create_NNO_MIX_files CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF &&
+       git -c core.autocrlf=false add NNO_*.txt MIX_*.txt &&
        git commit -m "mixed line endings" &&
        test_tick
 '
@@ -385,6 +440,17 @@ test_expect_success 'commit files attr=crlf' '
        commit_check_warn input "crlf" "LF_CRLF" ""        "LF_CRLF" "LF_CRLF" ""
 '
 
+# Commit "CRLFmixLF" on top of these files already in the repo:
+# LF, CRLF, CRLFmixLF LF_mix_CR CRLFNULL
+#                 attr                    LF        CRLF      CRLFmixLF   LF_mix_CR   CRLFNUL
+commit_MIX_chkwrn ""      ""      false   ""        ""        ""          ""          ""
+commit_MIX_chkwrn ""      ""      true    "LF_CRLF" ""        ""          "LF_CRLF"   "LF_CRLF"
+commit_MIX_chkwrn ""      ""      input   "CRLF_LF" ""        ""          "CRLF_LF"   "CRLF_LF"
+
+commit_MIX_chkwrn "auto"  ""      false   "CRLF_LF" ""        ""          "CRLF_LF"   "CRLF_LF"
+commit_MIX_chkwrn "auto"  ""      true    "LF_CRLF" ""        ""          "LF_CRLF"   "LF_CRLF"
+commit_MIX_chkwrn "auto"  ""      input   "CRLF_LF" ""        ""          "CRLF_LF"   "CRLF_LF"
+
 #                 attr                    LF        CRLF      CRLFmixLF   LF_mix_CR   CRLFNUL
 commit_chk_wrnNNO ""      ""      false   ""        ""        ""          ""          ""
 commit_chk_wrnNNO ""      ""      true    LF_CRLF   ""        ""          ""          ""