The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
#
# mergelog-tool, DAPM 15-Feb-2009
#
# Process metadata records stored in a text file that concern merges
# between bleed and maint perl

use 5.010;

use warnings;
use strict;

use Getopt::Std;

my $SHA_LEN = 10; # how many characters in the shortened SHA-1 hash

my %STATUS = (
    'M' => 'Fully merged',
    'P' => 'Partally merged, the rest rejected',
    'R' => 'Fully rejected',
    'A' => 'part of branch merged in single Aggregate merge',
    'd' => 'Defer until a later release',
    'm' => 'Partally merged, the rest pending',
    '!' => 'Reviewed but awaiting action',
    '.' => 'Unreviewed',
);


my %OPTS;

sub usage { die <<EOF; }
usage: $0 [-ch] [-f record_file] -c|-m|-u
    -c         just check syntax of record file
    -f file    name of record file
    -h         help
    -M         Directory in which to create mailboxes
    -m         generate mailboxes (in current directory by default)
    -u         update log file with latest commits (previous renamed .bak)
EOF

getopts('cf:hM:mu', \%OPTS) or usage;
usage if $OPTS{h};

my $action = join '', map $OPTS{$_} ? $_ : '', qw(c m u);
die "The action must be one of -c, -m, -u\n" unless $action =~ /^[cmu]$/;

my $record_filename = $OPTS{f} // "Porting/mergelog";

{
    my ($records, $index) = read_merge_record_file($record_filename);
    exit 0 if $action eq 'c';

    if ($action eq 'm') {
	generate_mailboxes($records, $index, ($OPTS{M} // '.'));
	exit 0;
    }

    if ($action eq 'u') {
	update_record_file($records, $index, $record_filename);
    }
}



exit 0;


# given a file name, read in the merge record file and return
# an array of records and an index of commit ids that link to records in
# that array.
#
# Comment records and line continutions are captured in such a way
# that it should be possible to exactly regenerate the original file
# (apart from minor whitespace differences).

sub read_merge_record_file {
    my ($file) = @_;

    open my $fh, '<', $file
	or die "$0: Can't open '$file': $!\n";

    my %index;
    my @records;
    while (<$fh>) {
	if ($. == 1 or /^\S/) {
	    # new entry
	    if (/^#/) {
		# comment
		push @records, [ '#', undef, undef, $' ];
		next;
	    }
	    my ($flag, $commit, $date, $rest) = split ' ', $_, 4;
	    defined $rest
		or die "$0: malformed line at $file:$.:\n$_";
	    $flag =~ /^[AMPRdm!\.]$/
		or die "$0: unrecognised flag '$flag' at $file:$.\n";
	    $commit =~ /^[0-9a-f]{$SHA_LEN}$/
		or die "$0: badly-formed commit '$commit' at $file:$.\n";
	    $index{$commit} and
		die "Duplicate commit '$commit' at $file:$.\n";
	    push @records, [ $flag, $commit, $date, $rest ];
	    $index{$commit} = $records[-1];
	}
	else {
	    # continuation line
	    if ( (($records[-1][0] // '') eq '#') and /\S/) {
		die
		"$0: illegal continuation line after comment at $file:$.:\n$_";
	    }
	    $records[-1][-1] .= $_;
	}
    }
    chomp $_->[3] for @records;
    return \@records, \%index;
}

# given a ref to a record array, and a file handle, write the records out
#
sub write_merge_record_file {
    my ($records, $fh) = @_;
    for my $record (@$records) {
	if ($record->[0] eq '#') {
	    print $fh @$record[0,3], "\n";;
	}
	else {
	    printf $fh "%s  %s %s %s\n", @$record;
	}
    }
}


# get a list of commit records based on the passed format and args.
# Format should start with %H. $fieldcount is the expected number
# of fields per record.
#
# Returns both a hash and a list
 
sub get_commits {
    my ($format, $args, $fieldcount) = @_;

    # XXX make this depend on current branch rather than hard-coding???
    my $range = "perl-5.10.0..origin/blead";


    # Initially I just used \x00 as a record separator, but at least one
    # diff had a null char in it! (5254b38e) So add some extra text too
    my $SEP = 'RjqenKHPaNJq';

    open my $log, "git log $args --pretty=format:'%x00$SEP$format' $range|"
	or die "$0: failed to execute 'git log': $!\n";


    my %commits;
    my @commits;
    {
	local $/ = "\x00$SEP";
	while (<$log>) {
	    chomp;
	    next unless length;	# skip first null record
	    my $rec = [ split /\x01/, $_];
	    die "$0: unexpected commit field count: ", scalar(@$rec), "\n"
		if @$rec != $fieldcount;
	    my $short = substr($rec->[0], 0, $SHA_LEN);
	    if (exists $commits{$short}) {
		die <<EOF;
$0: Internal error: duplicate short commit found: '$short'.
This means that the shortened SHA-1 hashes in the log file are no longer
long enough. The log file will need altering and this script modified
before you can proceed.
EOF
	    }
	    $commits{$short} = $rec;
	    push @commits, $rec;
	}
    }

    close $log;
    return \%commits, [ reverse @commits ];
}


# Create three mailboxes  in the given directory, containing accepted
# rejected and pending changes

sub generate_mailboxes {
    my ($records, $index, $dir) = @_;

    die "$0: No such directory: '$dir'\n" unless -d $dir;

    my %mboxes;
    for (qw(accepted rejected pending)) {
	my $f = "$dir/p5c_$_";
	my $fh;
	open $fh, '>', $f or die "$0: failed to create '$f': $!\n";
	$mboxes{$_} = $fh;
    }

    my ($commits) = get_commits(
	'%H%x01%an%x01%ae%x01%aD%x01%ce%x01%cD%x01%s%x01%b%x01%P%x01',
	 '--stat -p -M', 10);

    my %counts;
    my $status;
    my $linesep = '=' x 70;

    for my $record (@$records) {
	next if $record->[0] eq '#';

	$status = "Status: RO\n"; # email is read and old

	my $fh;
	if ($record->[0] =~ /^[AMP]$/) {
	    $fh = $mboxes{accepted};
	    $counts{accepted}++;
	}
	elsif ($record->[0] =~ /^[dR]$/) {
	    $fh = $mboxes{rejected};
	    $counts{rejected}++;
	}
	elsif ($record->[0] =~ /^[m!\.]$/) {
	    $fh = $mboxes{pending};
	    $counts{pending}++;
	    $status = '' if $record->[0] eq '.'; # mark email as new
	}
	else {
	    die "$0: Unexpected flag type '$record->[0]'\n";
	}

	# $commit arrays contain:
	#    0 commit SHA1
	#    1 Author Name
	#    2 Author Email
	#    3 Author Date (RFC822)
	#    4 Committer Email
	#    5 Committer Date (RFC822)
	#    6 Subject
	#    7 Body
	#    8 parents
	#    9 File list and diff (--stat -p)

	my $shortsha1 = $record->[1];
	my $c =  $commits->{$shortsha1};
	die "$0: Unknown commit '$shortsha1'\n" unless $c;


	
	my $subj = "$record->[0] $shortsha1 "
	    # a slight subterfuge here to avoid three X's in this src
	    . (($record->[3] =~ /[X]XX/) ? 'X'.'XX ' : '') . ($c->[6] // '');

	my $cdate = $c->[5];
	# convert RFC822 date into mbox 'From ' header format
	#             Fri, 20 Feb 2009 14:45:36 +0100
	#             Wed Jan  9 19:47:43 2008
	$cdate =~ s/ [+\-]\d{4}$//;
	$cdate =~ s{^(\w\w\w),(\s+\d+) (\w\w\w) (\d{4}) ([\d:]{8})$}
		    {$1 $3$2 $5 $4}
	    or die "$0: Can't convert RFC822 date: '$cdate'\n";

	my @parents = map substr($_,0,$SHA_LEN), split ' ', $c->[8];
	my $merged = @parents > 1 ? "MERGED: @parents\n" : '';

	my $files_and_diff = $c->[9];
	$files_and_diff =~ s/^---/\n---\n/;
	$files_and_diff =~ s/^( \d+ files changed,)/\n$1/m;
	$files_and_diff =~ s/^diff /$linesep\n\ndiff /m;

	# truncate long bodies

	if (length($files_and_diff) > 100_000) {
	    substr($files_and_diff, 100_000) =
				    "\n\n***TRUNCATED at 100Kbytes\n";
	}

	my $body = <<EOF;
From: $c->[1] <$c->[2]>
Date: $c->[3]
Subject: $subj
Message-Id: <fake:$c->[0]>
$status
Commit: $c->[0]
Author: $c->[1] <$c->[2]>
Date:   $c->[3]
${merged}Status: [$record->[0]] ($STATUS{$record->[0]})
Notes:  $record->[3]
$linesep

$c->[6]
$c->[7]
$files_and_diff
EOF

	$body =~ s/^From />From /gm; # mbox 'From ' escaping
	$body = "From $c->[4]  $cdate\n$body";

	print $fh $body;
    }
    for (values %mboxes) {
	close $_ or die "$0: close: $!\n";
    }
    for (qw(accepted rejected pending)) {
	printf "%4d %s mailbox entries\n", $counts{$_}, $_;
    }
}


sub update_record_file {
    my ($records, $index, $record_filename) = @_;

    my ($commit_hash, $commits) =
	get_commits('%H%x01%P%x01%ct%x01%s', '', 4);

    # confirm that commits is a superset of records
    for (keys %$index) {
	$commit_hash->{$_}
	    or die "$0: Entry '$_' in log file is not a recognised commit\n";
    }

    # convert git log output to log file format

    for my $c (@$commits) {
	my ($sha1, $parents, $date, $subject) = @$c;
	$sha1 = substr($sha1, 0, $SHA_LEN);
	my ($yy,$mm,$dd) = (gmtime($date))[5,4,3];
	$date = sprintf "%04d/%02d/%02d", $yy+1900, $mm+1, $dd;
	chomp $subject;
	$subject = substr($subject, 0, 50);
	my @parents = split ' ', $parents;
	if (@parents > 1) {
	    $subject .= "\n\t\t\t\tMERGE: "
		. join ' ', map substr($_,0,$SHA_LEN), @parents;
	}
	@$c = ();
	push @$c, '.', $sha1, $date, $subject;
    }


    # merge log file and new commits
    
    my @out;
    COMMIT: for my $c (@$commits) {
	while (1) {
	    my $r = $records->[0];
	    last unless $r;
	    if ($r->[0] eq '#') {
		push @out, $r;
		shift @$records;
		next;
	    }

	    if ($r->[1] eq $c->[1]) {
		push @out, $r;
		shift @$records;
		next COMMIT;
	    }
	    last;
	}
	push @out, $c;
    }
    @$records and die
	"$0: Internal error: unexpected log records left after merge\n";


    my $new = "$record_filename.new";
    my $bak = "$record_filename.bak";

    die "$0: $new already exists\n" if -e $new;
    open my $out, '>', $new
	or die "$0: Can't create '$new': $!\n";
    write_merge_record_file(\@out,$out);
    close $out
	or die "$0: close($new): $!\n";
    -s $new < -s $record_filename
	and die "$0: new file '$new' is smaller than existing file\n";
    rename $record_filename, $bak
	or die "$0: rename $record_filename -> $bak: $!\n";
    rename $new, $record_filename
	or die "$0: rename $new -> $record_filename: $!\n";
}