The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package SVN::Web::DiffParser;
# $Id: Parser.pm,v 1.3 2006/04/13 01:47:37 fil Exp $

use 5.00404;
use strict;
use vars qw( $VERSION );

use Carp;
use IO::File;

$VERSION = '0.53';
$VERSION = eval $VERSION;  # see L<perlmodstyle>

####################################################
sub new
{
    my( $package, @args ) = @_;

    my $self = bless { changes=>[], 
                       source=>'' }, $package;

    my $parms;
    if( 1==@args ) {
        if( 'HASH' eq ref $args[0] ) {
            $parms = $args[0];
        }
        else {
            my $diff = $args[0];
            if( ref $diff or $diff !~ /\n/ ) {
                $parms = { File => $diff };
            }
            else {
                $parms = { Diff => $diff };
            }
        }
    }
    else {
        $parms = { @args };
    }

    $self->__init( $parms );
    return $self;
}

sub __init 
{
    my( $self, $parms ) = @_;

    $self->{verbose}  = 1 if $parms->{Verbose};
    $self->{simplify} = $parms->{Simplify};
    $self->{strip}    = $parms->{Strip};

    if( $parms->{ File } ) {
        $self->parse_file( $parms->{File} );
    }
    elsif( $parms->{ Diff } ) {
        $self->parse( $parms->{Diff} );
    }
    return $self;
}

####################################################
sub source
{
    my( $self ) = @_;
    return $self->{source};
}

####################################################
sub changes
{
    my( $self, $file ) = @_;
    my $ret = $self->{changes};
    if( $file ) {
        $ret = [];
        foreach my $ch ( @{ $self->{changes} } ) {
            next unless $ch->filename1 eq $file or
                        $ch->filename2 eq $file;
            push @$ret, $ch;
        }
    }

    return @$ret if wantarray;
    return 0+@$ret;
}


####################################################
sub files
{
    my( $self ) = @_;
    my %ret;
    foreach my $ch ( $self->changes ) {
        $ret{$ch->filename1} = $ch->filename2;
    }
    return %ret;
}


####################################################
sub simplify
{
    my( $self ) = @_;

    my @keep;
    my $prev;
    foreach my $ch ( $self->changes ) {
        if( $ch->type eq '' ) {                 # skip no-change
            undef( $prev );
            next;
        }
    
        if( $prev ) {
            my $size = $prev->size;
            ## Combine ADD/REMOVE lines
            if( $prev->type ne $ch->type and # ADD->REMOVE or REMOVE->ADD
                $prev->filename1 eq $ch->filename1 and
                $prev->filename2 eq $ch->filename2 and
                           $size == $ch->size ) {  #close

                if( $prev->type eq 'REMOVE' and
                        $prev->line2 == $ch->line2 and
                        ($prev->line1+$size) == $ch->line1 ) {
                    $prev->{type} = 'MODIFY';
                    $prev->{lines} = $ch->{lines};
                    undef( $prev );
                    next;
                }
                elsif( $prev->type eq 'ADD' and
                        ($prev->line2+$size) == $ch->line2 and
                        $prev->line1 == $ch->line1 ) {
                    $prev->{type} = 'MODIFY';
                    undef( $prev );
                    next;
                }
                # same size, same file, but not at the same spot
            }
        }
        push @keep, $ch;
        $prev = $ch;
    }
    $self->{changes} = \@keep;
}


####################################################
sub parse_file
{
    my( $self, $file ) = @_;

    my $fh;
    if( ref $file ) {               # assume it's a file handle
        $self->{source} = 'user filehandle';
        $fh = $file;
    }
    else {
        $self->{source} = $file;
        $fh = IO::File->new;
        $fh->open( $file ) or croak "Unable to open $file: $!";
    }

    $self->{changes}=[];
    $self->{state}={ OK=>1 };

    while( <$fh> ) {
        $self->{state}{context} = "line $. of $self->{source}";
        $self->_parse_line( $_ );
    }
    my $ok = $self->{state}{OK};
    delete $self->{state};
    $self->simplify if $self->{simplify};
    return $ok;
}


####################################################
sub parse
{
    my( $self, $text ) = @_;
    $self->{source} = "user string";
    $self->{changes}=[];
    $self->{state}={ OK=>1 };

    my $l=1;
    while( $text =~ /(.+?\n)/g ) {
        $self->{state}{context} = "line $l of string";
        $self->_parse_line( $1 );
        $l++;
    }
    my $ok = $self->{state}{OK};
    delete $self->{state};
    $self->simplify if $self->{simplify};
    return $ok;

}

####################################################
sub _parse_line
{
    my( $self, $line ) = @_;
    $self->{verbose} and warn "Parsing $line";

    my $state = $self->{state};

    if( $state->{unified} ) {
        $self->_unified_line( $line );        
        return if $state->{unified};
    }
    elsif( $state->{standard} ) {
        $self->_standard_line( $line );        
        return if $state->{standard};
    }

    my $file = '(?:-r\d(?:\.\d+)+)|(?:[^-].+)';

    if( $line =~ /^diff\s+($file)\s+($file)\s*$/ ) {
        my @match = ( $1, $2 );
        $self->{verbose} and warn "Diff $1 $2";
        $state->{filename1} = $self->_filename( $match[0] );
        $state->{filename2} = $self->_filename( $match[1] );
    } 
    elsif( $line =~ /^(\d+)(?:,\d+)?[acd](\d+)(?:,\d+)?$/  ) {
        $state->{standard} = 1;
        push @{ $self->{changes} }, bless {
                            at1 => $1, line1 => $1,
                            at2 => $2, line2 => $2,
                            filename1 => $state->{filename1},
                            filename2 => $state->{filename2},
                            timestamp1 => '',
                            timestamp2 => ''
                        }, 'SVN::Web::DiffParser::Change';        
        $self->{verbose} and warn "Standard diff line1=$1 line2=$2";
    }
    elsif( $line =~ /^--- (.+?)\t(.+)$/ or 
            $line =~ /^--- ([^\s]+)\s+(.+)$/) {
        $state->{unified} = 1;
        my $stamp = $2;
        my $name = $self->_filename( $1 );
        $self->{verbose} and warn "Unified diff";
        push @{ $self->{changes} }, bless {
                            type=>'',
                            filename1=>$name,
                            timestamp1=>$stamp,
                        }, 'SVN::Web::DiffParser::Change';
    }
    elsif( $line =~ /^\*\*\* (.+?)\t(.+)$/ or 
            $line =~ /^\*\*\* ([^\s]+)\s+(.+)$/) {
        die "Context diff not yet supported at $state->{context}";
    }
}

####################################################
sub _filename
{
    my( $self, $file ) = @_;
    return $file unless $self->{strip};
    my $n = $self->{strip};
    $file =~ s(^[^/]+/)() while $n--;
    return $file;
}

####################################################
sub _standard_line
{
    my( $self, $line ) = @_;

    my %types = ( ' '=>'', '>'=>'ADD', '<'=>'REMOVE' );

    my $change = $self->{changes}[-1];
 
    if( $line =~ /^([<>])(.+)$/ ) {
        my( $mod, $text ) = ( $1, $2 );
        $mod = $types{$mod};
        $self->_new_line( $mod, $text );
        return;
    }
    if( $line =~ /^---$/ ) {            # pivot
        $self->{verbose} and warn "Pivot";
        return;
    }
    delete $self->{state}{standard};    # let _parse_file deal with it
}

####################################################
sub _unified_line
{
    my( $self, $line ) = @_;
 
    my %types = ( ' '=>'', '+'=>'ADD', '-'=>'REMOVE' );

    my $change = $self->{changes}[-1];
    if( $line =~ /^\+\+\+ (.+?)\t(.+)$/ or 
            $line =~ /^\+\+\+ ([^\s]+)\s+(.+)$/) {
        $change->{timestamp2} = $2;
        $change->{filename2} = $self->_filename( $1 );
        $change->{lines} = [];
        return;
    }
    die "Missing +++ line before $line" 
	unless exists $change->{filename2} and defined $change->{filename2};
    if( $line =~ /^\@\@ -(\d+),(\d+) [+](\d+),(\d+) \@\@$/ ) {
        my @match = ($1, $2, $3, $4);
        if( @{ $change->{lines} } ) {
            $change = $self->_new_chunk;
        }
        @{ $change }{ qw( line1 size1 line2 size2 ) } = @match;
        $change->{at1} = $change->{line1};
        $change->{at2} = $change->{line2};
        return;
    }

    # Files that have been newly added and only contain one line have
    # a hunk marker that looks like "@@ -0,0 +1 @@". Handle that.
    # This code should be refactored with the code above, since only
    # the regexp and the assignment to @match are different.
    if( $line =~ /^\@\@ -(\d+),(\d+) [+](\d+) \@\@$/ ) {
        my @match = ($1, $2, $3, 1);
        if( @{ $change->{lines} } ) {
            $change = $self->_new_chunk;
        }
        @{ $change }{ qw( line1 size1 line2 size2 ) } = @match;
        $change->{at1} = $change->{line1};
        $change->{at2} = $change->{line2};
        return;
    }

    die "Missing \@\@ line before $line at $self->{state}{context}\n"
	unless exists $change->{line1} and defined $change->{line1};

    if( $line =~ /^([-+ ])(.*)$/) {
        my( $mod, $text ) = ( $1, $2 );
        $mod = $types{$mod};
        $self->_new_line( $mod, $text );
        return;
    }
    # Anything else is the end of the diff, so fall through to the
    # diff detection bit
    $self->{state}{unified} = 0;
}

sub _new_type
{
    my( $self, $mod ) = @_;
    my $change = $self->{changes}[-1];

    push @{ $self->{changes} }, bless { 
                                    filename1 => $change->{filename1},
                                    filename2 => $change->{filename2},
                                    line1 => $change->{at1},
                                    line2 => $change->{at2},
                                    at1 => $change->{at1},
                                    at2 => $change->{at2},
                                    type => $mod,
                                    lines => []
                                }, 'SVN::Web::DiffParser::Change';
    return $self->{changes}[-1];
}

sub _new_chunk
{
    my( $self ) = @_;
    my $change = $self->{changes}[-1];
    push @{ $self->{changes} }, bless {
                                    type => '',
                                    filename1 => $change->{filename1},
                                    filename2 => $change->{filename2},
                                    lines => []
                                }, 'SVN::Web::DiffParser::Change';
    return $self->{changes}[-1];
}

sub _new_line
{
    my( $self, $mod, $text ) = @_;

    $self->{verbose} and warn "_new_line";
    my $change = $self->{changes}[-1];
    if( defined $change->{type} ) {
        if( $change->{type} ne $mod ) {
            $self->{verbose} and warn "_new_type";
            $change = $self->_new_type( $mod );
        }
    }
    else {
        $change->{type} = $mod;
    }

    $change->{at1}++ unless $mod eq 'ADD';    # - or ' ', advance in file1
    $change->{at2}++ unless $mod eq 'REMOVE'; # + or ' ', advance in file2
    push @{ $change->{lines} }, $text;
}

######################################################################
package SVN::Web::DiffParser::Change;

use strict;

sub filename1 { $_[0]->{filename1} }
sub filename2 { $_[0]->{filename2} }
sub line1 { $_[0]->{line1} }
sub line2 { $_[0]->{line2} }
sub size  { 0+@{$_[0]->{lines}} }

sub type  
{ 
    my( $self ) = @_;

    return $self->{type} if $self->{type} eq 'ADD' or
                            $self->{type} eq 'REMOVE' or
                            $self->{type} eq 'MODIFY';
    return '';
}
    
sub text
{
    my( $self, $n ) = @_;
    return @{ $self->{lines} } if 1==@_;

    return $self->{lines}[$n];
}
    


1;
__END__

=head1 NAME

SVN::Web::DiffParser - Parse patch files containing unified and standard diffs

=head1 NOTE

This is Text::Diff::Parser, plus some local bug fixes that were exposed
by use with SVN::Web.  For more details about Text::Diff::Parser please
see CPAN.

=head1 SYNOPSIS

    use SVN::Web::DiffParser;

    # create the object
    my $parser = SVN::Web::DiffParser->new();

    # With options
    $parser = SVN::Web::DiffParser->new( Simplify=>1, # simplify the diff
                                       Strip=>2 );  # strip 2 directories

    # Create object.  Parse $file
    $parser = SVN::Web::DiffParser->new( $file );
    $parser = SVN::Web::DiffParser->new( File=>$file );

    # Create object.  Parse text
    my $parser = SVN::Web::DiffParser->new( $text );
    $parser = SVN::Web::DiffParser->new( Diff=>$text );

    # parse a file
    $parser->parse_file( $filename );

    # parse a string
    $parser->parse( $text );
    
    # Remove no-change lines.  Combine line substitutions
    $parser->simplify;

    # Find results
    foreach my $change ( $parser->changes ) {
        print "File1: ", $change->filename1;
        print "Line1: ", $change->line1;
        print "File2: ", $change->filename2;
        print "Line2: ", $change->line2;
        print "Type: ", $change->type;
        my $size = $change->size;
        foreach my $line ( 0..($size-1) ) {
            print "Line: ", $change->line( $size );
        }
    }

    # In scalar context, returns the number of changes
    my $n = $parser->changes;
    print "There are $n changes", 

    # Get the changes to a given file
    my @changes = $parser->changes( 'Makefile.PL' );

    # Get list of files changed by the diff
    my @files = $parser->files;


=head1 DESCRIPTION

C<SVN::Web::DiffParser> parses diff files and patches.  It allows you to
access the changes to a file in a standardized way, even if multiple patch
formats are used.

A diff may be viewed a series of operations on a file, either adding,
removing or modifying lines of one file (the C<from-file>) to produce
another file (the C<to-file>).  Diffs are generally produced either by hand
with diff, or by your version control system (C<cvs diff>, C<svn diff>,
...).  Some diff formats, notably unified diffs, also contain null
operations, that is lines that

C<SVN::Web::DiffParser> currently parses unified diff format and standard diff
format.

Unified diffs look like the following.

    --- Filename1 2006-04-12 18:47:22.000000000 -0400
    +++ Filename2 2006-04-12 19:21:16.000000000 -0400
    @@ -1,4 +1,6 @@
     ONE
     TWO
    -THREE
    +honk
     FOUR
    +honk
    +honk

Standard diffs look like the following.

    diff something something.4
    3c3
    < THREE
    ---
    > honk
    4a5,6
    > honk
    > honk

The diff line isn't in fact part of the format but is necessary to find
which files the chunks deal with.  It is output by C<cvs diff> and C<svn
diff> so that isn't a problem.

=head1 METHODS

=head2 new

    $parser = SVN::Web::DiffParser->new;
    $parser = SVN::Web::DiffParser->new( $file );
    $parser = SVN::Web::DiffParser->new( $handle );
    $parser = SVN::Web::DiffParser->new( %params );
    $parser = SVN::Web::DiffParser->new( \%params );

Object constructor.  



=over 4

=item Diff

String that contains a diff.  This diff will be parse before C<new> returns.

=item File

File name or file handle that is parsed before C<new> returns.

=item Simplify

Simplifying a patch involves dropping all null-operations and converting and
remove operation followed by an add operation (or an add followed by a
remove) of the same size on the same lines into a modify operation.

=item Strip

Strip N leading directories from all filenames.  Less then useful for
standard diffs produced by C<cvs diff>, because they don't contain directory
information.

=item Verbose

If true, print copious details of what is going on.

=back





=head2 parse_file

    $parser->parse_file( $file );
    $parser->parse_file( $handle );

Read and parse the file or file handle specified.  Will C<die> if it fails, 
returns true on success.  Contents of the file may then be accessed with
C<changes> and C<files>.

=head2 parse

    $parser->parse( $string );

Parses the diff present in $string.  Will C<die> if it fails, returns true
on success.  Contents of the file may then be accessed with C<changes> and
C<files>.

=head2 files

    %files = $parser->files;

Fetch a list of all the files that were referenced in the patch.  The keys
are original files (C<from-file>) and the values are the modified files
(C<to-file>).

=head2 changes

    @changes = $parser->changes;
    $n = $parser->changes;
    @changes = $parser->changes( $file );
    $n = $parser->changes( $file );

Return all the operations (array context) or the number of operations in the
patch file.  If C<$file> is specified, only returns changes to that file
(C<from-file> or C<to-file>).

Elements of the returned array are change objects, as described in 
C<CHANGE METHODS> below.




=head1 CHANGE METHODS

The C<changes> method returns an array of objects that describe each
operation.  You may use the following methods to find out details of the
operation.

=head2 type

Returns the type of operation, either C<'ADD'>, C<'REMOVE'>, C<'MODIFY'> or
C<''> (null operation).

=head2 filename1

Filename of the C<from-file>.

=head2 filename2

Filename of the C<to-file>.

=head2 line1

Line in the C<from-file> the operation starts at.

=head2 line2

Line in the C<to-file> the operation starts at.

=head2 size

Number of lines affected by this operation.

=head2 text

    @lines = $ch->text;
    $line  = $ch->text( $N );

Fetch the text of the line C<$N> if present or all lines of affected by this
operation.  For C<''> (null) and C<'REMOVE'> operations, these are the lines
present before the operation was done (C<'from-file'>.  For C<'ADD'> and
C<'MODIFY'> operations, these are the lines present after the operation was
done (C<'to-file'>.


=head1 BUGS

I'm not 100% sure of standard diff handling.

Missing support for context diffs.

=head1 SEE ALSO

L<Text::Diff>, L<Arch>, L<diff>.

=head1 AUTHOR

Philip Gwyn, E<lt>gwyn-at-cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2006 by Philip Gwyn

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.8 or,
at your option, any later version of Perl 5 you may have available.


=cut


$Log: Parser.pm,v $
Revision 1.3  2006/04/13 01:47:37  fil
Tweak

Revision 1.2  2006/04/13 01:43:27  fil
Tweak for move coverage
Add coverage stanza to Makefile.PL
Added BUGS and SEE ALSO section