#!/usr/bin/perl
#
# mergelog-tool, DAPM 15-Feb-2009
#
# Process metadata records stored in a text file that concern merges
# between bleed and maint perl
use 5.010;
use warnings;
use strict;
use Getopt::Std;
my $SHA_LEN = 10; # how many characters in the shortened SHA-1 hash
my %STATUS = (
'M' => 'Fully merged',
'P' => 'Partally merged, the rest rejected',
'R' => 'Fully rejected',
'A' => 'part of branch merged in single Aggregate merge',
'd' => 'Defer until a later release',
'm' => 'Partally merged, the rest pending',
'!' => 'Reviewed but awaiting action',
'.' => 'Unreviewed',
);
my %OPTS;
sub usage { die <<EOF; }
usage: $0 [-ch] [-f record_file] -c|-m|-u
-c just check syntax of record file
-f file name of record file
-h help
-M Directory in which to create mailboxes
-m generate mailboxes (in current directory by default)
-u update log file with latest commits (previous renamed .bak)
EOF
getopts('cf:hM:mu', \%OPTS) or usage;
usage if $OPTS{h};
my $action = join '', map $OPTS{$_} ? $_ : '', qw(c m u);
die "The action must be one of -c, -m, -u\n" unless $action =~ /^[cmu]$/;
my $record_filename = $OPTS{f} // "Porting/mergelog";
{
my ($records, $index) = read_merge_record_file($record_filename);
exit 0 if $action eq 'c';
if ($action eq 'm') {
generate_mailboxes($records, $index, ($OPTS{M} // '.'));
exit 0;
}
if ($action eq 'u') {
update_record_file($records, $index, $record_filename);
}
}
exit 0;
# given a file name, read in the merge record file and return
# an array of records and an index of commit ids that link to records in
# that array.
#
# Comment records and line continutions are captured in such a way
# that it should be possible to exactly regenerate the original file
# (apart from minor whitespace differences).
sub read_merge_record_file {
my ($file) = @_;
open my $fh, '<', $file
or die "$0: Can't open '$file': $!\n";
my %index;
my @records;
while (<$fh>) {
if ($. == 1 or /^\S/) {
# new entry
if (/^#/) {
# comment
push @records, [ '#', undef, undef, $' ];
next;
}
my ($flag, $commit, $date, $rest) = split ' ', $_, 4;
defined $rest
or die "$0: malformed line at $file:$.:\n$_";
$flag =~ /^[AMPRdm!\.]$/
or die "$0: unrecognised flag '$flag' at $file:$.\n";
$commit =~ /^[0-9a-f]{$SHA_LEN}$/
or die "$0: badly-formed commit '$commit' at $file:$.\n";
$index{$commit} and
die "Duplicate commit '$commit' at $file:$.\n";
push @records, [ $flag, $commit, $date, $rest ];
$index{$commit} = $records[-1];
}
else {
# continuation line
if ( (($records[-1][0] // '') eq '#') and /\S/) {
die
"$0: illegal continuation line after comment at $file:$.:\n$_";
}
$records[-1][-1] .= $_;
}
}
chomp $_->[3] for @records;
return \@records, \%index;
}
# given a ref to a record array, and a file handle, write the records out
#
sub write_merge_record_file {
my ($records, $fh) = @_;
for my $record (@$records) {
if ($record->[0] eq '#') {
print $fh @$record[0,3], "\n";;
}
else {
printf $fh "%s %s %s %s\n", @$record;
}
}
}
# get a list of commit records based on the passed format and args.
# Format should start with %H. $fieldcount is the expected number
# of fields per record.
#
# Returns both a hash and a list
sub get_commits {
my ($format, $args, $fieldcount) = @_;
# XXX make this depend on current branch rather than hard-coding???
my $range = "perl-5.10.0..origin/blead";
# Initially I just used \x00 as a record separator, but at least one
# diff had a null char in it! (5254b38e) So add some extra text too
my $SEP = 'RjqenKHPaNJq';
open my $log, "git log $args --pretty=format:'%x00$SEP$format' $range|"
or die "$0: failed to execute 'git log': $!\n";
my %commits;
my @commits;
{
local $/ = "\x00$SEP";
while (<$log>) {
chomp;
next unless length; # skip first null record
my $rec = [ split /\x01/, $_];
die "$0: unexpected commit field count: ", scalar(@$rec), "\n"
if @$rec != $fieldcount;
my $short = substr($rec->[0], 0, $SHA_LEN);
if (exists $commits{$short}) {
die <<EOF;
$0: Internal error: duplicate short commit found: '$short'.
This means that the shortened SHA-1 hashes in the log file are no longer
long enough. The log file will need altering and this script modified
before you can proceed.
EOF
}
$commits{$short} = $rec;
push @commits, $rec;
}
}
close $log;
return \%commits, [ reverse @commits ];
}
# Create three mailboxes in the given directory, containing accepted
# rejected and pending changes
sub generate_mailboxes {
my ($records, $index, $dir) = @_;
die "$0: No such directory: '$dir'\n" unless -d $dir;
my %mboxes;
for (qw(accepted rejected pending)) {
my $f = "$dir/p5c_$_";
my $fh;
open $fh, '>', $f or die "$0: failed to create '$f': $!\n";
$mboxes{$_} = $fh;
}
my ($commits) = get_commits(
'%H%x01%an%x01%ae%x01%aD%x01%ce%x01%cD%x01%s%x01%b%x01%P%x01',
'--stat -p -M', 10);
my %counts;
my $status;
my $linesep = '=' x 70;
for my $record (@$records) {
next if $record->[0] eq '#';
$status = "Status: RO\n"; # email is read and old
my $fh;
if ($record->[0] =~ /^[AMP]$/) {
$fh = $mboxes{accepted};
$counts{accepted}++;
}
elsif ($record->[0] =~ /^[dR]$/) {
$fh = $mboxes{rejected};
$counts{rejected}++;
}
elsif ($record->[0] =~ /^[m!\.]$/) {
$fh = $mboxes{pending};
$counts{pending}++;
$status = '' if $record->[0] eq '.'; # mark email as new
}
else {
die "$0: Unexpected flag type '$record->[0]'\n";
}
# $commit arrays contain:
# 0 commit SHA1
# 1 Author Name
# 2 Author Email
# 3 Author Date (RFC822)
# 4 Committer Email
# 5 Committer Date (RFC822)
# 6 Subject
# 7 Body
# 8 parents
# 9 File list and diff (--stat -p)
my $shortsha1 = $record->[1];
my $c = $commits->{$shortsha1};
die "$0: Unknown commit '$shortsha1'\n" unless $c;
my $subj = "$record->[0] $shortsha1 "
# a slight subterfuge here to avoid three X's in this src
. (($record->[3] =~ /[X]XX/) ? 'X'.'XX ' : '') . ($c->[6] // '');
my $cdate = $c->[5];
# convert RFC822 date into mbox 'From ' header format
# Fri, 20 Feb 2009 14:45:36 +0100
# Wed Jan 9 19:47:43 2008
$cdate =~ s/ [+\-]\d{4}$//;
$cdate =~ s{^(\w\w\w),(\s+\d+) (\w\w\w) (\d{4}) ([\d:]{8})$}
{$1 $3$2 $5 $4}
or die "$0: Can't convert RFC822 date: '$cdate'\n";
my @parents = map substr($_,0,$SHA_LEN), split ' ', $c->[8];
my $merged = @parents > 1 ? "MERGED: @parents\n" : '';
my $files_and_diff = $c->[9];
$files_and_diff =~ s/^---/\n---\n/;
$files_and_diff =~ s/^( \d+ files changed,)/\n$1/m;
$files_and_diff =~ s/^diff /$linesep\n\ndiff /m;
# truncate long bodies
if (length($files_and_diff) > 100_000) {
substr($files_and_diff, 100_000) =
"\n\n***TRUNCATED at 100Kbytes\n";
}
my $body = <<EOF;
From: $c->[1] <$c->[2]>
Date: $c->[3]
Subject: $subj
Message-Id: <fake:$c->[0]>
$status
Commit: $c->[0]
Author: $c->[1] <$c->[2]>
Date: $c->[3]
${merged}Status: [$record->[0]] ($STATUS{$record->[0]})
Notes: $record->[3]
$linesep
$c->[6]
$c->[7]
$files_and_diff
EOF
$body =~ s/^From />From /gm; # mbox 'From ' escaping
$body = "From $c->[4] $cdate\n$body";
print $fh $body;
}
for (values %mboxes) {
close $_ or die "$0: close: $!\n";
}
for (qw(accepted rejected pending)) {
printf "%4d %s mailbox entries\n", $counts{$_}, $_;
}
}
sub update_record_file {
my ($records, $index, $record_filename) = @_;
my ($commit_hash, $commits) =
get_commits('%H%x01%P%x01%ct%x01%s', '', 4);
# confirm that commits is a superset of records
for (keys %$index) {
$commit_hash->{$_}
or die "$0: Entry '$_' in log file is not a recognised commit\n";
}
# convert git log output to log file format
for my $c (@$commits) {
my ($sha1, $parents, $date, $subject) = @$c;
$sha1 = substr($sha1, 0, $SHA_LEN);
my ($yy,$mm,$dd) = (gmtime($date))[5,4,3];
$date = sprintf "%04d/%02d/%02d", $yy+1900, $mm+1, $dd;
chomp $subject;
$subject = substr($subject, 0, 50);
my @parents = split ' ', $parents;
if (@parents > 1) {
$subject .= "\n\t\t\t\tMERGE: "
. join ' ', map substr($_,0,$SHA_LEN), @parents;
}
@$c = ();
push @$c, '.', $sha1, $date, $subject;
}
# merge log file and new commits
my @out;
COMMIT: for my $c (@$commits) {
while (1) {
my $r = $records->[0];
last unless $r;
if ($r->[0] eq '#') {
push @out, $r;
shift @$records;
next;
}
if ($r->[1] eq $c->[1]) {
push @out, $r;
shift @$records;
next COMMIT;
}
last;
}
push @out, $c;
}
@$records and die
"$0: Internal error: unexpected log records left after merge\n";
my $new = "$record_filename.new";
my $bak = "$record_filename.bak";
die "$0: $new already exists\n" if -e $new;
open my $out, '>', $new
or die "$0: Can't create '$new': $!\n";
write_merge_record_file(\@out,$out);
close $out
or die "$0: close($new): $!\n";
-s $new < -s $record_filename
and die "$0: new file '$new' is smaller than existing file\n";
rename $record_filename, $bak
or die "$0: rename $record_filename -> $bak: $!\n";
rename $new, $record_filename
or die "$0: rename $new -> $record_filename: $!\n";
}