2 use lib (split(/:/, $ENV{GITPERLLIB} || "/home/eos/Eos/util/X86LINUX64/share/perl5"));
4 # This tool is copyright (c) 2005, Martin Langhoff.
5 # It is released under the Gnu Public License, version 2.
7 # The basic idea is to walk the output of tla abrowse,
8 # fetch the changesets and apply them.
13 git archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
14 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
16 Imports a project from one or more Arch repositories. It will follow branches
17 and repositories within the namespaces defined by the <archive/branch>
18 parameters supplied. If it cannot find the remote branch a merge comes from
19 it will just import it as a regular commit. If it can find it, it will mark it
20 as a merge whenever possible.
22 See man (1) git-archimport for more details.
26 - create tag objects instead of ref tags
27 - audit shell-escaping of filenames
28 - hide our private tags somewhere smarter
29 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
30 - sort and apply patches by graphing ancestry relations instead of just
31 relying in dates supplied in the changeset itself.
32 tla ancestry-graph -m could be helpful here...
36 Add print in front of the shell commands invoked via backticks.
40 There are several places where Arch and git terminology are intermixed
41 and potentially confused.
43 The notion of a "branch" in git is approximately equivalent to
44 a "archive/category--branch--version" in Arch. Also, it should be noted
45 that the "--branch" portion of "archive/category--branch--version" is really
46 optional in Arch although not many people (nor tools!) seem to know this.
47 This means that "archive/category--version" is also a valid "branch"
50 We always refer to Arch names by their fully qualified variant (which
51 means the "archive" name is prefixed.
53 For people unfamiliar with Arch, an "archive" is the term for "repository",
54 and can contain multiple, unrelated branches.
62 use File::Temp qw(tempdir);
63 use File::Path qw(mkpath rmtree);
64 use File::Basename qw(basename dirname);
65 use Data::Dumper qw/ Dumper /;
68 $SIG{'PIPE'}="IGNORE";
71 my $git_dir = $ENV{"GIT_DIR"} || ".git";
72 $ENV{"GIT_DIR"} = $git_dir;
73 my $ptag_dir = "$git_dir/archimport/tags";
75 our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
79 usage: git archimport # fetch/update GIT from Arch
80 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
81 repository/arch-branch [ repository/arch-branch] ...
86 getopts("fThvat:D:") or usage();
89 @ARGV >= 1 or usage();
91 # values associated with keys:
92 # =1 - Arch version / git 'branch' detected via abrowse on a limit
93 # >1 - Arch version / git 'branch' of an auxiliary branch we've merged
94 my %arch_branches = map { my $branch = $_; $branch =~ s/:[^:]*$//; $branch => 1 } @ARGV;
97 # maps arch branches to git branch names
98 my %branch_name_map = map { m/^(.*):([^:]*)$/; $1 => $2 } grep { m/:/ } @ARGV;
100 $ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
101 my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
102 $opt_v && print "+ Using $tmp as temporary directory\n";
104 unless (-d $git_dir) { # initial import needs empty directory
105 opendir DIR, '.' or die "Unable to open current directory: $!\n";
106 while (my $entry = readdir DIR) {
107 $entry =~ /^\.\.?$/ or
108 die "Initial import needs an empty current working directory.\n"
113 my $default_archive; # default Arch archive
114 my %reachable = (); # Arch repositories we can access
115 my %unreachable = (); # Arch repositories we can't access :<
116 my @psets = (); # the collection
117 my %psets = (); # the collection, by name
118 my %stats = ( # Track which strategy we used to import:
119 get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
120 simple_changeset => 0, import_or_tag => 0
123 my %rptags = (); # my reverse private tags
124 # to map a SHA1 to a commitid
125 my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
129 while (my ($limit, $level) = each %arch_branches) {
130 next unless $level == $stage;
132 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
133 or die "Problems with tla abrowse: $!";
135 my %ps = (); # the current one
141 # first record padded w 8 spaces
143 my ($id, $type) = split(m/\s+/, $_, 2);
146 # store the record we just captured
147 if (%ps && !exists $psets{ $ps{id} }) {
148 %last_ps = %ps; # break references
149 push (@psets, \%last_ps);
150 $psets{ $last_ps{id} } = \%last_ps;
153 my $branch = extract_versionname($id);
154 %ps = ( id => $id, branch => $branch );
155 if (%last_ps && ($last_ps{branch} eq $branch)) {
156 $ps{parent_id} = $last_ps{id};
159 $arch_branches{$branch} = 1;
162 # deal with types (should work with baz or tla):
163 if ($type =~ m/\(.*changeset\)/) {
165 } elsif ($type =~ /\(.*import\)/) {
167 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
169 # read which revision we've tagged when we parse the log
172 warn "Unknown type $type";
175 $arch_branches{$branch} = 1;
177 } elsif (s/^\s{10}//) {
178 # 10 leading spaces or more
179 # indicate commit metadata
182 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
185 } elsif ($_ eq 'merges in:') {
187 $lastseen = 'merges';
188 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
190 push (@{$ps{merges}}, $id);
192 # aggressive branch finding:
194 my $branch = extract_versionname($id);
195 my $repo = extract_reponame($branch);
197 if (archive_reachable($repo) &&
198 !defined $arch_branches{$branch}) {
199 $arch_branches{$branch} = $stage + 1;
203 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
208 if (%ps && !exists $psets{ $ps{id} }) {
209 my %temp = %ps; # break references
210 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
211 $temp{parent_id} = $psets[$#psets]{id};
213 push (@psets, \%temp);
214 $psets{ $temp{id} } = \%temp;
217 close ABROWSE or die "$TLA abrowse failed on $limit\n";
219 } # end foreach $root
224 while ($depth <= $opt_D) {
229 ## Order patches by time
230 # FIXME see if we can find a more optimal way to do this by graphing
231 # the ancestry data and walking it, that way we won't have to rely on
232 # client-supplied dates
233 @psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
235 #print Dumper \@psets;
238 ## TODO cleanup irrelevant patches
239 ## and put an initial import
242 unless (-d $git_dir) { # initial import
243 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
244 print "Starting import from $psets[0]{id}\n";
249 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
251 } else { # progressing an import
253 opendir(DIR, $ptag_dir)
254 || die "can't opendir: $!";
255 while (my $file = readdir(DIR)) {
256 # skip non-interesting-files
257 next unless -f "$ptag_dir/$file";
259 # convert first '--' to '/' from old git-archimport to use
260 # as an archivename/c--b--v private tag
264 print STDERR "converting old tag $oldfile to $file\n";
265 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
267 my $sha = ptag($file);
269 $rptags{$sha} = $file;
275 # extract the Arch repository name (Arch "archive" in Arch-speak)
276 sub extract_reponame {
277 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
278 return (split(/\//, $fq_cvbr))[0];
281 sub extract_versionname {
283 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
287 # convert a fully-qualified revision or version to a unique dirname:
288 # normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
289 # becomes: normalperson@yhbt.net-05,mpd--uclinux--1
291 # the git notion of a branch is closer to
292 # archive/category--branch--version than archive/category--branch, so we
293 # use this to convert to git branch names.
294 # Also, keep archive names but replace '/' with ',' since it won't require
295 # subdirectories, and is safer than swapping '--' which could confuse
296 # reverse-mapping when dealing with bastard branches that
297 # are just archive/category--version (no --branch)
299 my $revision = shift;
300 my $name = extract_versionname($revision);
305 # old versions of git-archimport just use the <category--branch> part:
306 sub old_style_branchname {
308 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
313 *git_default_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
315 # retrieve default archive, since $branch_name_map keys might not include it
316 sub get_default_archive {
317 if (!defined $default_archive) {
318 $default_archive = safe_pipe_capture($TLA,'my-default-archive');
319 chomp $default_archive;
321 return $default_archive;
325 my $revision = shift;
326 my $name = extract_versionname($revision);
328 if (exists $branch_name_map{$name}) {
329 return $branch_name_map{$name};
331 } elsif ($name =~ m#^([^/]*)/(.*)$#
332 && $1 eq get_default_archive()
333 && exists $branch_name_map{$2}) {
334 # the names given in the command-line lacked the archive.
335 return $branch_name_map{$2};
338 return git_default_branchname($revision);
342 sub process_patchset_accurate {
345 # switch to that branch if we're not already in that branch:
346 if (-e "$git_dir/refs/heads/$ps->{branch}") {
347 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
349 # remove any old stuff that got leftover:
350 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
351 rmtree(split(/\0/,$rm)) if $rm;
354 # Apply the import/changeset/merge into the working tree
355 my $dir = sync_to_ps($ps);
356 # read the new log entry:
357 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
358 die "Error in cat-log: $!" if $?;
361 # grab variables we want from the log, new fields get added to $ps:
362 # (author, date, email, summary, message body ...)
363 parselog($ps, \@commitlog);
365 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
366 # this should work when importing continuations
367 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
369 # find where we are supposed to branch from
370 if (! -e "$git_dir/refs/heads/$ps->{branch}") {
371 system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
373 # We trust Arch with the fact that this is just a tag,
374 # and it does not affect the state of the tree, so
375 # we just tag and move on. If the user really wants us
376 # to consolidate more branches into one, don't tag because
377 # the tag name would be already taken.
378 tag($ps->{id}, $branchpoint);
379 ptag($ps->{id}, $branchpoint);
380 print " * Tagged $ps->{id} at $branchpoint\n";
382 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
384 # remove any old stuff that got leftover:
385 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
386 rmtree(split(/\0/,$rm)) if $rm;
389 warn "Tagging from unknown id unsupported\n" if $ps->{tag};
391 # allow multiple bases/imports here since Arch supports cherry-picks
392 # from unrelated trees
395 # update the index with all the changes we got
396 system('git-diff-files --name-only -z | '.
397 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
398 system('git-ls-files --others -z | '.
399 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
403 # the native changeset processing strategy. This is very fast, but
404 # does not handle permissions or any renames involving directories
405 sub process_patchset_fast {
408 # create the branch if needed
410 if ($ps->{type} eq 'i' && !$import) {
411 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
414 unless ($import) { # skip for import
415 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
416 # we know about this branch
417 system('git-checkout',$ps->{branch});
419 # new branch! we need to verify a few things
420 die "Branch on a non-tag!" unless $ps->{type} eq 't';
421 my $branchpoint = ptag($ps->{tag});
422 die "Tagging from unknown id unsupported: $ps->{tag}"
425 # find where we are supposed to branch from
426 if (! -e "$git_dir/refs/heads/$ps->{branch}") {
427 system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
429 # We trust Arch with the fact that this is just a tag,
430 # and it does not affect the state of the tree, so
431 # we just tag and move on. If the user really wants us
432 # to consolidate more branches into one, don't tag because
433 # the tag name would be already taken.
434 tag($ps->{id}, $branchpoint);
435 ptag($ps->{id}, $branchpoint);
436 print " * Tagged $ps->{id} at $branchpoint\n";
438 system('git-checkout',$ps->{branch}) == 0 or die "$! $?\n";
445 # Apply the import/changeset/merge into the working tree
447 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
448 apply_import($ps) or die $!;
449 $stats{import_or_tag}++;
451 } elsif ($ps->{type} eq 's') {
453 $stats{simple_changeset}++;
457 # prepare update git's index, based on what arch knows
458 # about the pset, resolve parents, etc
461 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
462 die "Error in cat-archive-log: $!" if $?;
464 parselog($ps,\@commitlog);
466 # imports don't give us good info
467 # on added files. Shame on them
468 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
469 system('git-ls-files --deleted -z | '.
470 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
471 system('git-ls-files --others -z | '.
472 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
475 # TODO: handle removed_directories and renamed_directories:
477 if (my $del = $ps->{removed_files}) {
480 my @slice = splice(@$del, 0, 100);
481 system('git-update-index','--remove','--',@slice) == 0 or
482 die "Error in git-update-index --remove: $! $?\n";
486 if (my $ren = $ps->{renamed_files}) { # renamed
488 die "Odd number of entries in rename!?";
492 my $from = shift @$ren;
493 my $to = shift @$ren;
495 unless (-d dirname($to)) {
496 mkpath(dirname($to)); # will die on err
498 # print "moving $from $to";
499 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
500 system('git-update-index','--remove','--',$from) == 0 or
501 die "Error in git-update-index --remove: $! $?\n";
502 system('git-update-index','--add','--',$to) == 0 or
503 die "Error in git-update-index --add: $! $?\n";
507 if (my $add = $ps->{new_files}) {
509 my @slice = splice(@$add, 0, 100);
510 system('git-update-index','--add','--',@slice) == 0 or
511 die "Error in git-update-index --add: $! $?\n";
515 if (my $mod = $ps->{modified_files}) {
517 my @slice = splice(@$mod, 0, 100);
518 system('git-update-index','--',@slice) == 0 or
519 die "Error in git-update-index: $! $?\n";
522 return 1; # we successfully applied the changeset
526 print "Will import patchsets using the fast strategy\n",
527 "Renamed directories and permission changes will be missed\n";
528 *process_patchset = *process_patchset_fast;
530 print "Using the default (accurate) import strategy.\n",
531 "Things may be a bit slow\n";
532 *process_patchset = *process_patchset_accurate;
535 foreach my $ps (@psets) {
537 $ps->{branch} = git_branchname($ps->{id});
540 # ensure we have a clean state
542 if (my $dirty = `git-diff-files`) {
543 die "Unclean tree when about to process $ps->{id} " .
544 " - did we fail to commit cleanly before?\n$dirty";
549 # skip commits already in repo
551 if (ptag($ps->{id})) {
552 $opt_v && print " * Skipping already imported: $ps->{id}\n";
556 print " * Starting to work on $ps->{id}\n";
558 process_patchset($ps) or next;
560 # warn "errors when running git-update-index! $!";
561 my $tree = `git-write-tree`;
562 die "cannot write tree $!" if $?;
569 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
570 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
576 if ($ps->{type} eq 's') {
577 warn "Could not find the right head for the branch $ps->{branch}";
583 push @par, find_parents($ps);
587 # Commit, tag and clean state
590 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
591 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
592 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
593 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
594 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
595 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
597 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
599 print WRITER $ps->{summary},"\n\n";
601 # only print message if it's not empty, to avoid a spurious blank line;
602 # also append an extra newline, so there's a blank line before the
603 # following "git-archimport-id:" line.
604 print WRITER $ps->{message},"\n\n" if ($ps->{message} ne "");
606 # make it easy to backtrack and figure out which Arch revision this was:
607 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
610 my $commitid = <READER>; # read
613 waitpid $pid,0; # close;
615 if (length $commitid != 40) {
616 die "Something went wrong with the commit! $! $commitid";
621 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
622 print HEAD $commitid;
624 system('git-update-ref', 'HEAD', "$ps->{branch}");
627 ptag($ps->{id}, $commitid); # private tag
628 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
629 tag($ps->{id}, $commitid);
631 print " * Committed $ps->{id}\n";
632 print " + tree $tree\n";
633 print " + commit $commitid\n";
634 $opt_v && print " + commit date is $ps->{date} \n";
635 $opt_v && print " + parents: ",join(' ',@par),"\n";
639 foreach (sort keys %stats) {
640 print" $_: $stats{$_}\n";
645 # used by the accurate strategy:
648 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
650 $opt_v && print "sync_to_ps($ps->{id}) method: ";
653 if ($ps->{type} eq 't') {
654 $opt_v && print "get (tag)\n";
655 # looks like a tag-only or (worse,) a mixed tags/changeset branch,
656 # can't rely on replay to work correctly on these
658 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
661 my $tree_id = arch_tree_id($tree_dir);
662 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
663 # the common case (hopefully)
664 $opt_v && print "replay\n";
665 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
668 # getting one tree is usually faster than getting two trees
669 # and applying the delta ...
671 $opt_v && print "apply-delta\n";
672 safe_pipe_capture($TLA,'get','--no-pristine',
673 $ps->{id},$tree_dir);
679 $opt_v && print "get (new tree)\n";
680 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
684 # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
685 system('rsync','-aI','--delete','--exclude',$git_dir,
686 # '--exclude','.arch-inventory',
687 '--exclude','.arch-ids','--exclude','{arch}',
688 '--exclude','+*','--exclude',',*',
689 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
695 my $bname = git_branchname($ps->{id});
699 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
700 die "Cannot get import: $!" if $?;
701 system('rsync','-aI','--delete', '--exclude',$git_dir,
702 '--exclude','.arch-ids','--exclude','{arch}',
703 "$tmp/import/", './');
704 die "Cannot rsync import:$!" if $?;
706 rmtree("$tmp/import");
707 die "Cannot remove tempdir: $!" if $?;
719 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
720 die "Cannot get changeset: $!" if $?;
723 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
724 # this can be sped up considerably by doing
725 # (find | xargs cat) | patch
726 # but that can get mucked up by patches
727 # with missing trailing newlines or the standard
728 # 'missing newline' flag in the patch - possibly
729 # produced with an old/buggy diff.
730 # slow and safe, we invoke patch once per patchfile
731 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
732 die "Problem applying patches! $!" if $?;
735 # apply changed binary files
736 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
737 foreach my $mod (@modified) {
740 $orig =~ s/\.modified$//; # lazy
741 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
742 #print "rsync -p '$mod' '$orig'";
743 system('rsync','-p',$mod,"./$orig");
744 die "Problem applying binary changes! $!" if $?;
749 system('rsync','-aI','--exclude',$git_dir,
750 '--exclude','.arch-ids',
751 '--exclude', '{arch}',
752 "$tmp/changeset/new-files-archive/",'./');
754 # deleted files are hinted from the commitlog processing
756 rmtree("$tmp/changeset");
761 # notes: *-files/-directories keys cannot have spaces, they're always
762 # pika-escaped. Everything after the first newline
763 # A log entry looks like:
764 # Revision: moodle-org--moodle--1.3.3--patch-15
765 # Archive: arch-eduforge@catalyst.net.nz--2004
766 # Creator: Penny Leach <penny@catalyst.net.nz>
767 # Date: Wed May 25 14:15:34 NZST 2005
768 # Standard-date: 2005-05-25 02:15:34 GMT
769 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
770 # lang/de/.arch-ids/block_html.php.id
771 # New-directories: lang/de/help/questionnaire
772 # lang/de/help/questionnaire/.arch-ids
773 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
774 # db_sears.sql db/db_sears.sql
775 # Removed-files: lang/be/docs/.arch-ids/release.html.id
776 # lang/be/docs/.arch-ids/releaseold.html.id
777 # Modified-files: admin/cron.php admin/delete.php
778 # admin/editor.html backup/lib.php backup/restore.php
779 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
780 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
781 # summary can be multiline with a leading space just like the above fields
784 # Updating yadda tadda tadda madda
789 # headers we want that contain filenames:
794 renamed_directories => 1,
796 removed_directories => 1,
800 while ($_ = shift @$log) {
801 if (/^Continuation-of:\s*(.*)/) {
804 } elsif (/^Summary:\s*(.*)$/ ) {
805 # summary can be multiline as long as it has a leading space.
806 # we squeeze it onto a single line, though.
807 $ps->{summary} = [ $1 ];
809 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
813 # any *-files or *-directories can be read here:
814 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
817 $key =~ tr/-/_/; # too lazy to quote :P
818 if ($want_headers{$key}) {
819 push @{$ps->{$key}}, split(/\s+/, $val);
824 last; # remainder of @$log that didn't get shifted off is message
827 if ($key eq 'summary') {
828 push @{$ps->{$key}}, $1;
829 } else { # files/directories:
830 push @{$ps->{$key}}, split(/\s+/, $1);
838 # drop leading empty lines from the log message
839 while (@$log && $log->[0] eq '') {
842 if (exists $ps->{summary} && @{$ps->{summary}}) {
843 $ps->{summary} = join(' ', @{$ps->{summary}});
846 $ps->{summary} = 'empty commit message';
848 $ps->{summary} = $log->[0] . '...';
850 $ps->{message} = join("\n",@$log);
852 # skip Arch control files, unescape pika-escaped files
853 foreach my $k (keys %want_headers) {
854 next unless (defined $ps->{$k});
856 foreach my $t (@{$ps->{$k}}) {
857 next unless length ($t);
858 next if $t =~ m!\{arch\}/!;
859 next if $t =~ m!\.arch-ids/!;
860 # should we skip this?
861 next if $t =~ m!\.arch-inventory$!;
862 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
863 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
865 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
875 my ($tag, $commit) = @_;
880 my $patchname = $tag;
881 $patchname =~ s/.*--//;
882 $tag = git_branchname ($tag) . '--' . $patchname;
886 open(C,">","$git_dir/refs/tags/$tag")
887 or die "Cannot create tag $tag: $!\n";
889 or die "Cannot write tag $tag: $!\n";
891 or die "Cannot write tag $tag: $!\n";
892 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
894 open(C,"<","$git_dir/refs/tags/$tag")
895 or die "Cannot read tag $tag: $!\n";
898 die "Error reading tag $tag: $!\n" unless length $commit == 40;
900 or die "Cannot read tag $tag: $!\n";
905 # write/read a private tag
906 # reads fail softly if the tag isn't there
908 my ($tag, $commit) = @_;
910 # don't use subdirs for tags yet, it could screw up other porcelains
913 my $tag_file = "$ptag_dir/$tag";
914 my $tag_branch_dir = dirname($tag_file);
915 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
917 if ($commit) { # write
918 open(C,">",$tag_file)
919 or die "Cannot create tag $tag: $!\n";
921 or die "Cannot write tag $tag: $!\n";
923 or die "Cannot write tag $tag: $!\n";
924 $rptags{$commit} = $tag
925 unless $tag =~ m/--base-0$/;
927 # if the tag isn't there, return 0
928 unless ( -s $tag_file) {
931 open(C,"<",$tag_file)
932 or die "Cannot read tag $tag: $!\n";
935 die "Error reading tag $tag: $!\n" unless length $commit == 40;
937 or die "Cannot read tag $tag: $!\n";
938 unless (defined $rptags{$commit}) {
939 $rptags{$commit} = $tag;
947 # Identify what branches are merging into me
948 # and whether we are fully merged
949 # git-merge-base <headsha> <headsha> should tell
950 # me what the base of the merge should be
954 my %branches; # holds an arrayref per branch
955 # the arrayref contains a list of
956 # merged patches between the base
957 # of the merge and the current head
959 my @parents; # parents found for this commit
961 # simple loop to split the merges
963 foreach my $merge (@{$ps->{merges}}) {
964 my $branch = git_branchname($merge);
965 unless (defined $branches{$branch} ){
966 $branches{$branch} = [];
968 push @{$branches{$branch}}, $merge;
972 # foreach branch find a merge base and walk it to the
973 # head where we are, collecting the merged patchsets that
974 # Arch has recorded. Keep that in @have
975 # Compare that with the commits on the other branch
976 # between merge-base and the tip of the branch (@need)
977 # and see if we have a series of consecutive patches
978 # starting from the merge base. The tip of the series
979 # of consecutive patches merged is our new parent for
982 foreach my $branch (keys %branches) {
984 # check that we actually know about the branch
985 next unless -e "$git_dir/refs/heads/$branch";
987 my $mergebase = `git-merge-base $branch $ps->{branch}`;
989 # Don't die here, Arch supports one-way cherry-picking
990 # between branches with no common base (or any relationship
992 warn "Cannot find merge base for $branch and $ps->{branch}";
997 # now walk up to the mergepoint collecting what patches we have
998 my $branchtip = git_rev_parse($ps->{branch});
999 my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
1000 my %have; # collected merges this branch has
1001 foreach my $merge (@{$ps->{merges}}) {
1005 foreach my $par (@ancestors) {
1006 $par = commitid2pset($par);
1007 if (defined $par->{merges}) {
1008 foreach my $merge (@{$par->{merges}}) {
1009 $ancestorshave{$merge}=1;
1013 # print "++++ Merges in $ps->{id} are....\n";
1014 # my @have = sort keys %have; print Dumper(\@have);
1016 # merge what we have with what ancestors have
1017 %have = (%have, %ancestorshave);
1019 # see what the remote branch has - these are the merges we
1020 # will want to have in a consecutive series from the mergebase
1021 my $otherbranchtip = git_rev_parse($branch);
1022 my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
1024 foreach my $needps (@needraw) { # get the psets
1025 $needps = commitid2pset($needps);
1026 # git-rev-list will also
1027 # list commits merged in via earlier
1028 # merges. we are only interested in commits
1029 # from the branch we're looking at
1030 if ($branch eq $needps->{branch}) {
1031 push @need, $needps->{id};
1035 # print "++++ Merges from $branch we want are....\n";
1036 # print Dumper(\@need);
1039 while (my $needed_commit = pop @need) {
1040 if ($have{$needed_commit}) {
1041 $newparent = $needed_commit;
1043 last; # break out of the while
1047 push @parents, $newparent;
1051 } # end foreach branch
1053 # prune redundant parents
1055 foreach my $p (@parents) {
1058 foreach my $p (@parents) {
1059 next unless exists $psets{$p}{merges};
1060 next unless ref $psets{$p}{merges};
1061 my @merges = @{$psets{$p}{merges}};
1062 foreach my $merge (@merges) {
1063 if ($parents{$merge}) {
1064 delete $parents{$merge};
1070 foreach (keys %parents) {
1071 push @parents, '-p', ptag($_);
1078 my $val = `git-rev-parse $name`;
1079 die "Error: git-rev-parse $name" if $?;
1084 # resolve a SHA1 to a known patchset
1086 my $commitid = shift;
1088 my $name = $rptags{$commitid}
1089 || die "Cannot find reverse tag mapping for $commitid";
1091 my $ps = $psets{$name}
1092 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1097 # an alternative to `command` that allows input to be passed as an array
1098 # to work around shell problems with weird characters in arguments
1099 sub safe_pipe_capture {
1101 if (my $pid = open my $child, '-|') {
1102 @output = (<$child>);
1103 close $child or die join(' ',@_).": $! $?";
1105 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
1107 return wantarray ? @output : join('',@output);
1110 # `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1113 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1117 sub archive_reachable {
1118 my $archive = shift;
1119 return 1 if $reachable{$archive};
1120 return 0 if $unreachable{$archive};
1122 if (system "$TLA whereis-archive $archive >/dev/null") {
1123 if ($opt_a && (system($TLA,'register-archive',
1124 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1125 $reachable{$archive} = 1;
1128 print STDERR "Archive is unreachable: $archive\n";
1129 $unreachable{$archive} = 1;
1132 $reachable{$archive} = 1;