From d9545c7f465ed103df44cd93caddfdd265757779 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 25 Apr 2016 21:17:28 +0000 Subject: [PATCH] fast-import: implement unpack limit With many incremental imports, small packs become highly inefficient due to the need to readdir scan and load many indices to locate even a single object. Frequent repacking and consolidation may be prohibitively expensive in terms of disk I/O, especially in large repositories where the initial packs were aggressively optimized and marked with .keep files. In those cases, users may be better served with loose objects and relying on "git gc --auto". This changes the default behavior of fast-import for small imports found in test cases, so adjustments to t9300 were necessary. Signed-off-by: Eric Wong Signed-off-by: Junio C Hamano --- Documentation/config.txt | 9 +++++++ Documentation/git-fast-import.txt | 2 ++ fast-import.c | 32 +++++++++++++++++++++++++ t/t9300-fast-import.sh | 2 ++ t/t9302-fast-import-unpack-limit.sh | 48 +++++++++++++++++++++++++++++++++++++ 5 files changed, 93 insertions(+) create mode 100755 t/t9302-fast-import-unpack-limit.sh diff --git a/Documentation/config.txt b/Documentation/config.txt index 2cd6bdd7d..283bf0409 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1153,6 +1153,15 @@ difftool..cmd:: difftool.prompt:: Prompt before each invocation of the diff tool. +fastimport.unpackLimit:: + If the number of objects imported by linkgit:git-fast-import[1] + is below this limit, then the objects will be unpacked into + loose object files. However if the number of imported objects + equals or exceeds this limit then the pack will be stored as a + pack. Storing the pack from a fast-import can make the import + operation complete faster, especially on slow filesystems. If + not set, the value of `transfer.unpackLimit` is used instead. + fetch.recurseSubmodules:: This option can be either set to a boolean value or to 'on-demand'. Setting it to a boolean changes the behavior of fetch and pull to diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 66910aa2f..644df993f 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -136,6 +136,8 @@ Performance and Compression Tuning Maximum size of each output packfile. The default is unlimited. +fastimport.unpackLimit:: + See linkgit:git-config[1] Performance ----------- diff --git a/fast-import.c b/fast-import.c index 9fc709340..4fb464c1e 100644 --- a/fast-import.c +++ b/fast-import.c @@ -166,6 +166,7 @@ Format of STDIN stream: #include "quote.h" #include "exec_cmd.h" #include "dir.h" +#include "run-command.h" #define PACK_ID_BITS 16 #define MAX_PACK_ID ((1<pack_fd, 0, SEEK_SET) < 0) + die_errno("Failed seeking to start of '%s'", p->pack_name); + + unpack.in = p->pack_fd; + unpack.git_cmd = 1; + unpack.stdout_to_stderr = 1; + argv_array_push(&unpack.args, "unpack-objects"); + if (!show_stats) + argv_array_push(&unpack.args, "-q"); + + return run_command(&unpack); +} + static void end_packfile(void) { static int running; @@ -972,6 +991,12 @@ static void end_packfile(void) fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1, pack_data->pack_name, object_count, cur_pack_sha1, pack_size); + + if (object_count <= unpack_limit) { + if (!loosen_small_pack(pack_data)) + goto discard_pack; + } + close(pack_data->pack_fd); idx_name = keep_pack(create_index()); @@ -1002,6 +1027,7 @@ static void end_packfile(void) pack_id++; } else { +discard_pack: close(pack_data->pack_fd); unlink_or_warn(pack_data->pack_name); } @@ -3317,6 +3343,7 @@ static void parse_option(const char *option) static void git_pack_config(void) { int indexversion_value; + int limit; unsigned long packsizelimit_value; if (!git_config_get_ulong("pack.depth", &max_depth)) { @@ -3341,6 +3368,11 @@ static void git_pack_config(void) if (!git_config_get_ulong("pack.packsizelimit", &packsizelimit_value)) max_packsize = packsizelimit_value; + if (!git_config_get_int("fastimport.unpacklimit", &limit)) + unpack_limit = limit; + else if (!git_config_get_int("transfer.unpacklimit", &limit)) + unpack_limit = limit; + git_config(git_default_config, NULL); } diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 25bb60b28..e6a2b8a4d 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -52,6 +52,7 @@ echo "$@"' ### test_expect_success 'empty stream succeeds' ' + git config fastimport.unpackLimit 0 && git fast-import >input && test_create_repo R && + git --git-dir=R/.git config fastimport.unpackLimit 0 && git --git-dir=R/.git fast-import --big-file-threshold=1 input <<-INPUT_END && + commit refs/heads/master + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <input <<-INPUT_END && + commit refs/heads/master + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data < $GIT_COMMITTER_DATE + data <