The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w
# 
#	patchls - patch listing utility
#
# Input is one or more patchfiles, output is a list of files to be patched.
#
# Copyright (c) 1997 Tim Bunce. All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# With thanks to Tom Horsley for the seed code.


use Getopt::Std;
use Text::Wrap qw(wrap $columns);
use Text::Tabs qw(expand unexpand);
use strict;

our $VERSION = 2.11;

sub usage {
die qq{
  patchls [options] patchfile [ ... ]

    -h     no filename headers (like grep), only the listing.
    -l     no listing (like grep), only the filename headers.
    -i     Invert: for each patched file list which patch files patch it.
    -c     Categorise the patch and sort by category (perl specific).
    -m     print formatted Meta-information (Subject,From,Msg-ID etc).
    -p N   strip N levels of directory Prefix (like patch), else automatic.
    -v     more verbose (-d for noisy debugging).
    -n     give a count of the number of patches applied to a file if >1.
    -f F   only list patches which patch files matching regexp F
           (F has \$ appended unless it contains a /).
    -e     Expect patched files to Exist (relative to current directory)
           Will print warnings for files which don't. Also affects -4 option.
    -      Read patch from STDIN
  other options for special uses:
    -I     just gather and display summary Information about the patches.
    -4     write to stdout the PerForce commands to prepare for patching.
    -5     like -4 but add "|| exit 1" after each command
    -M T   Like -m but only output listed meta tags (eg -M 'Title From')
    -W N   set wrap width to N (defaults to 70, use 0 for no wrap)
    -X     list patchfiles that may clash (i.e. patch the same file)

  patchls version $VERSION by Tim Bunce
}
}

$::opt_p = undef;	# undef != 0
$::opt_d = 0;
$::opt_v = 0;
$::opt_m = 0;
$::opt_n = 0;
$::opt_i = 0;
$::opt_h = 0;
$::opt_l = 0;
$::opt_c = 0;
$::opt_f = '';
$::opt_e = 0;

# special purpose options
$::opt_I = 0;
$::opt_4 = 0;	# output PerForce commands to prepare for patching
$::opt_5 = 0;
$::opt_M = '';	# like -m but only output these meta items (-M Title)
$::opt_W = 70;	# set wrap width columns (see Text::Wrap module)
$::opt_C = 0;	# 'Chip' mode (handle from/tags/article/bug files) undocumented
$::opt_X = 0;	# list patchfiles that patch the same file

usage unless (nelems @ARGV);

getopts("dmnihlvecC45Xp:f:IM:W:") or usage;

$columns = $::opt_W || 9999999;

$::opt_m = 1 if $::opt_M;
$::opt_4 = 1 if $::opt_5;
$::opt_i = 1 if $::opt_X;

# see get_meta_info()
my @show_meta = @( split(' ', $::opt_M || 'Title From Msg-ID Files') );
my %show_meta = %( map { ($_,1) } < @show_meta );

my %cat_title = %(
    'BUILD'	=> 'BUILD PROCESS',
    'CORE'	=> 'CORE LANGUAGE',
    'DOC'	=> 'DOCUMENTATION',
    'LIB'	=> 'LIBRARY',
    'PORT1'	=> 'PORTABILITY - WIN32',
    'PORT2'	=> 'PORTABILITY - GENERAL',
    'TEST'	=> 'TESTS',
    'UTIL'	=> 'UTILITIES',
    'OTHER'	=> 'OTHER CHANGES',
    'EXT'	=> 'EXTENSIONS',
    'UNKNOWN'	=> 'UNKNOWN - NO FILES PATCHED',
);


sub get_meta_info {
    my $ls = shift;
    local($_) = shift;
    if (m/^From:\s+(.*\S)/i) {;
	my $from = $1;	# temporary measure for Chip Salzenberg
	$from =~ s/chip\@(atlantic\.net|perlsupport\.com)/chip\@pobox.com/;
	$from =~ s/\(Tim Bunce\) \(Tim Bunce\)/(Tim Bunce)/;
	$ls->{From}->{$from} = 1
    }
    if (m/^Subject:\s+(?:Re: )?(.*\S)/i) {
	my $title = $1;
	$title =~ s/\[(PATCH|PERL)[\w\. ]*\]:?//g;
	$title =~ s/\b(PATCH|PERL)[\w\.]*://g;
	$title =~ s/\bRe:\s+/ /g;
	$title =~ s/\s+/ /g;
	$title =~ s/^\s*(.*?)\s*$/$1/g;
	$ls->{Title}->{$title} = 1;
    }
    $ls->{'Msg-ID'}->{$1}=1 if m/^Message-Id:\s+(.*\S)/i;
    $ls->{Date}->{$1}=1     if m/^Date:\s+(.*\S)/i;
    $ls->{$1}->{$2}=1       if $::opt_M && m/^([-\w]+):\s+(.*\S)/;
}


# Style 1:
#	*** perl-5.004/embed.h  Sat May 10 03:39:32 1997
#	--- perl-5.004.fixed/embed.h    Thu May 29 19:48:46 1997
#	***************
#	*** 308,313 ****
#	--- 308,314 ----
#
# Style 2:
#	--- perl5.004001/mg.c   Sun Jun 08 12:26:24 1997
#	+++ perl5.004-bc/mg.c   Sun Jun 08 11:56:08 1997
#	@@ .. @@
# or for deletions
#	--- perl5.004001/mg.c   Sun Jun 08 12:26:24 1997
#	+++ /dev/null   Sun Jun 08 11:56:08 1997
#	@@ ... @@
# or (rcs, note the different date format)
#	--- 1.18	1997/05/23 19:22:04
#	+++ ./pod/perlembed.pod	1997/06/03 21:41:38
#
# Variation:
#	Index: embed.h

my %ls;

my $in;
my $ls;
my $prevline = '';
my $prevtype = '';
my (%removed, %added);
my $prologue = 1;	# assume prologue till patch or /^exit\b/ seen


foreach my $argv (< @ARGV) {
    $in = $argv;
    if (-d $in) {
	warn "Ignored directory $in\n";
	next;
    }
    if ($in eq "-") {
      *F = $^STDIN;
    } elsif (not open F, "<", "$in") {
	warn "Unable to open $in: $!\n";
	next;
    }
    print "Reading $in...\n" if $::opt_v and (nelems @ARGV) +> 1;
    $ls = %ls{$in} ||= \%( is_in => 1, in => $in );
    my $type;
    while ( ~< *F) {
	unless (m/^([-+*]{3}) / || m/^(Index):/) {
	    # not an interesting patch line
	    # but possibly meta-information or prologue
	    if ($prologue) {
		%added{$1}   = 1    if m/^touch\s+(\S+)/;
		%removed{$1} = 1    if m/^rm\s+(?:-f)?\s*(\S+)/;
		$prologue = 0       if m/^exit\b/;
	    }
	    get_meta_info($ls, $_) if $::opt_m;
	    next;
	}
	$type = $1;
	next if m/^--- [0-9,]+ ----$/ || m/^\*\*\* [0-9,]+ \*\*\*\*$/;
	$prologue = 0;

	print "Last: $prevline","This: {$_}Got:  $type\n\n" if $::opt_d;

	# Some patches have Index lines but not diff headers
	# Patch copes with this, so must we. It's also handy for
	# documenting manual changes by simply adding Index: lines
	# to the file which describes the problem being fixed.
	if (m/^Index:\s+(.*)/) {
	    my $f;
	    foreach $f (split(m/ /, $1)) { add_patched_file($ls, $f) }
	    next;
	}

	if (	($type eq '---' and $prevtype eq '***')	# Style 1
	    or	($type eq '+++' and $prevtype eq '---')	# Style 2
	) {
	    if (m/^[-+*]{3} (\S+)\s*(.*?\d\d:\d\d:\d\d)?/) {	# double check
		if ($1 eq "/dev/null") {
		    $prevline =~ m/^[-+*]{3} (\S+)\s*/;
		    add_deleted_file($ls, $1);
		}
		else {
		    add_patched_file($ls, $1);
		}
	    }
	    else {
		warn "$in $.: parse error (prev $prevtype, type $type)\n$prevline$_";
	    }
	}
    }
    continue {
	$prevline = $_;
	$prevtype = $type || '';
	$type = '';
    }

    # special mode for patch sets from Chip
    if ($in =~ m:[\\/]patch$:) {
	my $is_chip;
	my $chip;
	my $dir; ($dir = $in) =~ s:[\\/]patch$::;
	if (!$ls->{From} && (open(CHIP,"$dir/article") || open(CHIP,"$dir/bug"))) {
	    get_meta_info($ls, $_) while ( ~< *CHIP);
	    $is_chip = 1;
	}
	if (open CHIP, "<","$dir/from") {
	    chop($chip = ~< *CHIP);
	    $ls->{From} = \%( $chip => 1 );
	    $is_chip = 1;
	}
	if (open CHIP, "<","$dir/tag") {
	    chop($chip = ~< *CHIP);
	    $ls->{Title} = \%( $chip => 1 );
	    $is_chip = 1;
	}
	$ls->{From} = \%( "Chip Salzenberg" => 1 ) if $is_chip && !$ls->{From};
    }

    # if we don't have a title for -m then use the file name
    $ls->{Title}->{"Untitled: $in"}=1 if $::opt_m
	and !$ls->{Title} and $ls->{out};

    $ls->{category} = $::opt_c
	? categorize_files(\@(keys %{ $ls->{out} }), $::opt_v) : '';
}
print scalar(nelems @ARGV)." files read.\n" if $::opt_v and (nelems @ARGV) +> 1;


# --- Firstly we filter and sort as needed ---

my @ls  = @( values %ls );

if ($::opt_f) {		# filter out patches based on -f <regexp>
    $::opt_f .= '$' unless $::opt_f =~ m:/:;
    @ls = @( grep {
	my $match = 0;
	if ($_->{is_in}) {
	    my @out = @( keys %{ $_->{out} } );
	    $match=1 if grep { m/$::opt_f/o } < @out;
	}
	else {
	    $match=1 if $_->{in} =~ m/$::opt_f/o;
	}
	$match;
    } < @ls );
}

@ls  = sort {
    $a->{category} cmp $b->{category} || $a->{in} cmp $b->{in}
} < @ls;


# --- Handle special modes ---

if ($::opt_4) {
    my $tail = ($::opt_5) ? "|| exit 1" : "";
    print map { "p4 delete $_$tail\n" } sort keys %removed if %removed;
    print map { "p4 add    $_$tail\n" } sort keys %added   if %added;
    my @patches = @( sort grep { $_->{is_in} } < @ls );
    my @no_outs = @( grep { keys %{$_->{out}} == 0 } < @patches );
    warn "Warning: Some files contain no patches:",
	join("\n\t", '', map { $_->{in} } < @no_outs), "\n" if (nelems @no_outs);

    my %patched = %( map { ($_, 1) } map { keys %{$_->{out}} } < @patches );
    delete %patched{[keys %added]};
    my @patched = @( sort keys %patched );
    foreach(< @patched) {
	next if %removed{$_};
	my $edit = ($::opt_e && !-f $_) ? "add " : "edit";
	print "p4 $edit   $_$tail\n";
    }
    exit 0 unless $::opt_C;
}


if ($::opt_I) {
    my $n_patches = 0;
    my($in,$out);
    my %all_out;
    my @no_outs;
    foreach $in (< @ls) {
	next unless $in->{is_in};
	++$n_patches;
	my @outs = @( keys %{$in->{out}} );
	push @no_outs, $in unless (nelems @outs);
	%all_out{[< @outs]} = ($in->{in}) x nelems @outs;
    }
    my @all_out = @( sort keys %all_out );
    my @missing = @( grep { ! -f $_ } < @all_out );
    print "$n_patches patch files patch ".nelems @all_out." files (".nelems @missing." missing)\n";
    print (nelems @no_outs)." patch files don't contain patches.\n" if (nelems @no_outs);
    print "(use -v to list patches which patch 'missing' files)\n"
	    if ((nelems @missing) || nelems @no_outs) && !$::opt_v;
    if ($::opt_v and nelems @no_outs) {
	print "Patch files which don't contain patches:\n";
	foreach $out (< @no_outs) {
	    printf "  \%-20s\n", $out->{in};
	}
    }
    if ($::opt_v and nelems @missing) {
	print "Missing files:\n";
	foreach $out (< @missing) {
	    printf "  \%-20s\t", $out	unless $::opt_h;
	    print %all_out{$out}	unless $::opt_l;
	    print "\n";
	}
    }
    print "Added files:   ".join(" ",sort keys %added  )."\n" if %added;
    print "Removed files: ".join(" ",sort keys %removed)."\n" if %removed;
    exit 0+nelems @missing;
}

unless ($::opt_c and $::opt_m) {
    foreach $ls (< @ls) {
	next unless ($::opt_i) ? $ls->{is_out} : $ls->{is_in};
	next if $::opt_X and keys %{$ls->{out}} +<= 1;
	list_files_by_patch($ls);
    }
}
else {
    my $c = '';
    foreach $ls (< @ls) {
	next unless ($::opt_i) ? $ls->{is_out} : $ls->{is_in};
	print "\n  ------  %cat_title{$ls->{category}}  ------\n"
	    if $ls->{category} ne $c;
	$c = $ls->{category};
	unless ($::opt_i) {
	    list_files_by_patch($ls);
	}
	else {
	    my $out = $ls->{in};
	    print "\n$out patched by:\n";
	    # find all the patches which patch $out and list them
	    my @p = @( grep { $_->{out}->{$out} } values %ls );
	    foreach $ls (< @p) {
		list_files_by_patch($ls, '');
	    }
	}
    }
    print "\n";
}

exit 0;


# ---


sub add_patched_file {
    my $ls = shift;
	my $raw_name = shift;
    my $action = shift || 1;	# 1==patched, 2==deleted

    my $out = trim_name($raw_name);
    print "add_patched_file '$out' ($raw_name, $action)\n" if $::opt_d;

    $ls->{out}->{$out} = $action;

    warn "$out patched but not present\n" if $::opt_e && !-f $out;

    # do the -i inverse as well, even if we're not doing -i
    my $i = %ls{$out} ||= \%(
	is_out   => 1,
	in       => $out,
	category => $::opt_c ? < categorize_files(\@( $out ), $::opt_v) : '',
    );
    $i->{out}->{$in} = 1;
}

sub add_deleted_file {
    my $ls = shift;
	my $raw_name = shift;
    my $out = trim_name($raw_name);
    print "add_deleted_file '$out' ($raw_name)\n" if $::opt_d;
	%removed{$out} = 1;
    #add_patched_file(@_[0,1], 2);
}


sub trim_name {		# reduce/tidy file paths from diff lines
    my $name = shift;
    $name =~ s:\\:/:g;	# adjust windows paths
    $name =~ s://:/:g;	# simplify (and make win \\share into absolute path)
    if ($name eq "/dev/null") {
	# do nothing (XXX but we need a way to record deletions)
    }
    elsif (defined $::opt_p) {
	# strip on -p levels of directory prefix
	my $dc = $::opt_p;
	$name =~ s:^[^/]+/(.+)$:$1: while $dc-- +> 0;
    }
    else {	# try to strip off leading path to perl directory
	# if absolute path, strip down to any *perl* directory first
	$name =~ s:^/.*?perl.*?/::i;
	$name =~ s:.*(perl|maint)[-_]?5?[._]?[-_a-z0-9.+]*/::i;
	$name =~ s:^\./::;
    }
    return $name;
}


sub list_files_by_patch {
    my($ls, $name) = < @_;
    $name = $ls->{in} unless defined $name;
    my @meta;
    if ($::opt_m) {
	my $meta;
	foreach $meta (< @show_meta) {
	    next unless $ls->{$meta};
	    my @list = @( sort keys %{$ls->{$meta}} );
	    push @meta, sprintf "\%7s:  ", $meta;
	    if ($meta eq 'Title') {
		@list = @( map { "\"$_\""; } < @list );
		push @list, "#$1" if $::opt_C && $ls->{in} =~ m:\b(\w\d+)/patch$:;
	    }
	    elsif ($meta eq 'From') {
		# fix-up bizzare addresses from japan and ibm :-)
		foreach(< @list) {
		    s:\W+=?iso.*?<: <:;
		    s/\d\d-\w\w\w-\d{4}\s+\d\d:\S+\s*//;
		}
	    }
	    elsif ($meta eq 'Msg-ID') {
		my %from; # limit long threads to one msg-id per site
		@list = @( map {
		    %from{(m/@(.*?)>/ ? $1 : $_)}++ ? () : ($_);
		} < @list );
	    }
	    push @meta, < my_wrap("","          ", join(", ",< @list)."\n");
	}
	$name = "\n$name" if (nelems @meta) and $name;
    }
    # don't print the header unless the file contains something interesting
    return if !nelems @meta and !$ls->{out} and !$::opt_v;
    if ($::opt_l) {	# -l = no listing, just names
	print "$ls->{in}";
	my $n = keys %{ $ls->{out} };
	print " ($n patches)" if $::opt_n and $n+>1;
	print "\n";
	return;
    }

    # a twisty maze of little options
    my $cat = ($ls->{category} and !$::opt_m) ? "\t$ls->{category}" : "";
    print "$name$cat: "	unless ($::opt_h and !$::opt_v) or !"$name$cat";
    my $sep = "\n";
    $sep = "" if (nelems @show_meta)==1 && $::opt_c && $::opt_h;
    print join('', $sep, < @meta) if (nelems @meta);

    return if $::opt_m && !%show_meta{Files};
    my @v = @( sort PATORDER keys %{ $ls->{out} } );
    my $n = (nelems @v);
    my $v = "{join ' ', <@v}";
    print $::opt_m ? "  Files:  ".my_wrap("","          ",$v) : $v;
    print " ($n patches)" if $::opt_n and $n+>1;
    print "\n";
}


sub my_wrap {
	my $txt = try { expand( <wrap(< @_)) };	# die's on long lines!
    return $txt unless $@;
	return expand("{join ' ', <@_}");
}



sub categorize_files {
    my($files, $verb) = < @_;
    my(%c, $refine);

    foreach (< @$files) {	# assign a score to a file path
	# the order of some of the tests is important
	%c{TEST} += 5,next   if m:^t/:;
	%c{DOC}  += 5,next   if m:^pod/:;
	%c{UTIL} += 10,next  if m:^(utils)/:;
	%c{PORT1}+= 15,next  if m:^win32:;
	%c{PORT2} += 15,next
	    if m:^(cygwin|os2|plan9|qnx|vms)/:
	    or m:^(hints|Porting|ext/DynaLoader)/:
	    or m:^README\.:;
	%c{EXT}  += 10,next
	    if m:^(ext|lib/ExtUtils)/:;
	%c{LIB}  += 10,next
	    if m:^(lib)/:;
	%c{'CORE'} += 15,next
	    if m:^[^/]+[\._]([chH]|sym|pl)$:;
	%c{BUILD} += 10,next
	    if m:^[A-Z]+$: or m:^[^/]+\.SH$:
	    or m:^(install|configure|configpm):i;
	print "Couldn't categorise $_\n" if $::opt_v;
	%c{OTHER} += 1;
    }
    if (keys %c +> 1) {	# sort to find category with highest score
      refine:
	++$refine;
	my @c = @( sort { %c{$b} <+> %c{$a} || $a cmp $b } keys %c );
	my @v = @( map  { %c{$_} } < @c );
	if ((nelems @v) +> 1 and $refine +<= 1 and "{join ' ', <@v}" =~ m/^(\d) \1/
		and @c[0] =~ m/^(DOC|TESTS|OTHER)/) { # rare
	    print "Tie, promoting @c[1] over @c[0]\n" if $::opt_d;
	    ++%c{@c[1]};
	    goto refine;
	}
	print "  ".nelems @$files." patches: ", join(", ", map { "$_: %c{$_}" } < @c),".\n"
	    if $verb;
	return @c[0] || 'OTHER';
    }
    else {
	my($c, $v) = < %c;
	$c ||= 'UNKNOWN'; $v ||= 0;
	print "  ".nelems @$files." patches: $c: $v\n" if $verb;
	return $c;
    }
}


sub PATORDER {		# PATORDER sort by Chip Salzenberg
    my ($i, $j);

    $i = ($a =~ m#^[A-Z]+$#);
    $j = ($b =~ m#^[A-Z]+$#);
    return $j - $i if $i != $j;

    $i = ($a =~ m#configure|hint#i) || ($a =~ m#[S_]H$#);
    $j = ($b =~ m#configure|hint#i) || ($b =~ m#[S_]H$#);
    return $j - $i if $i != $j;

    $i = ($a =~ m#\.pod$#);
    $j = ($b =~ m#\.pod$#);
    return $j - $i if $i != $j;

    $i = ($a =~ m#include/#);
    $j = ($b =~ m#include/#);
    return $j - $i if $i != $j;

    if ((($i = $a) =~ s#/+[^/]*$##)
	&& (($j = $b) =~ s#/+[^/]*$##)) {
	    return $i cmp $j if $i ne $j;
    }

    $i = ($a =~ m#\.h$#);
    $j = ($b =~ m#\.h$#);
    return $j - $i if $i != $j;

    return $a cmp $b;
}