The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/local/bin/nosuch/perl
#### Text above here was automatically generated during the configuration.
#### Modifications to this file will be lost during configuration.


#!/usr/local/bin/perl

# $Id: diffre.prl.root,v 1.1 2004/03/24 19:25:24 nodine Exp $
# Copyright (C) 2002-2005 Freescale Semiconductor, Inc.
# Distributed under terms of the GNU General Public License (GPL).

# Description: This program is like the Unix diff program except that
# the first file can contain regular expressions that allow parts of the
# line to vary but still count as a match.
# 
# Usage: ${main::TOOL_NAME} [options] re-file file2
# 
# Options:
#   -h          Help
#   -r regexp   Elide differences involving the regexp (no regular 
#               expressions in re-file) (conflicts with -t)
#   -t          Text-only match (no regular expressions in re-file) 
#               (conflicts with -r)
#   -D symbol   Generate "#ifdef symbol" output
#   -V          Print version info

$0 =~ m|([^\/]+)$|;
$main::TOOL_NAME = $1;

main();

#use strict;
use integer;
if ($[ > 5.008) { eval "use bytes"; }
use Getopt::Std;

sub main {
    my($options) = "dhr:tVD:C";
    Usage() unless getopts($options);
    if (defined $opt_r && $opt_t) {
	print STDERR "Cannot have both -r and -t\n";
	Usage('Options');
    }

    $| = 1;			# Output right away (no output buffering)

    Usage('\\$Id') if $main::opt_V;
    Usage('Description') if $main::opt_h;
    Usage() unless defined $ARGV[1];
    
    # Open the files
    open (FILE_RE,"<$ARGV[0]") or die "Cannot open $ARGV[0]";
    open (FILE_2,"<$ARGV[1]") or die "Cannot open $ARGV[1]";

    # Read the files
    @main::FILE_RE = <FILE_RE>;
    @main::FILE_2 = <FILE_2>;
    # Add sentinels to the files
    push @main::FILE_RE, "\000";
    push @main::FILE_2, "\000";

    # Close the files
    close FILE_RE;
    close FILE_2;

    CompareFiles();
}

# This algorithm is loosely based on Dijkstra's single-source shortest-paths
# algorithm.  See Thomas H. Cormen, Charles E. Leiserson, Ronald
# L. Rivest, _Introduction_to_Algorithms_, first edition, pp. 527-532
# for a description of the algorithm.  The graph upon which the
# algorithm is applied is that induced by comparing every line of file
# 1 with every line of file 2; if they are the same, we create a node
# in the graph with a label that is the combination of the two line
# numbers.  The distance between node A=(a1,a2) and node B=(b1,b2)
# is the minumum number of line changes that need to occur in going
# from line a1 of file 1 to line b1 of file 1 and line a2 of file 2
# to line b2 of file 2 (or infinite if b1 <= a1 or b2 <= a2).  This
# distance is delta(A,B) = b1-a1+b2-a2-2 if there are no
# intervening nodes.  The goal is to find the shortest path from
# (-1,-1) to the vertex that represents the end of both files.

# For efficiency's sake, we do not actually generate all the nodes,
# but rather produce them as Dijkstra's algorithm would need them.
# For further efficiency's sake, we do not even generate all the successors
# every time we add a node to the list of those whose distances are
# known.  We go through the list generating successors at progressively
# larger distances as they become greater than the minimum distance.
# Since the delta function has an inverse triangle inequality, i.e.,
# delta(A,C) > delta(A,B) + delta(B,C), it means that any successor B
# of any node A means that we do not need to check edges from A to any
# point C for which c1 >= b1 and c2 >= b2.  As a special case, if B
# is (a1+1, b1+1), no further checks from A are necessary, so it is
# removed from the active list.

# Unlike Dijkstra's algorithm, by the time we compute a distance for
# a node, we know that it is the exact minimum distance.

# The following global variables are used:
#   %PRED	          Key=node name, value=predecessor on min. path
#   %DIST                 Key=node name, value=distance from start node
#   %SUCC	          Key=node name, value=hash ref. with key=successor 
#                         node name and value=est min. dist.
#   %ACTIVE               Key=node name, value=1; has an entry for every
#                         node from which we actively generate successors
#   %COORDS               key=node name, value=array ref. with node's
#                         coordinates.

# This is the subroutine that actually compares the files.
# Uses globals: @main::FILE_RE, @main::FILE_2, %main::DIST, %main::PRED,
#               %main::SUCC
# Sets globals: %main::DIST, %main::ACTIVE
#BEGIN_CORE -- Do not delete this line; expectit2 depends upon it
sub CompareFiles {
    my($goalNode) = @main::FILE_2 . ',' . @main::FILE_RE;
    if ($main::opt_r) {
	# Preprocess RE file for regexp
	foreach (@main::FILE_RE) {
	    my $A = $_;
	    # Quote regexp characters
	    $A =~ s/([.*?+^\$\\$@\[\]\(\)\{\}])|$opt_r/
		$1 ne '' ? "\\$1" : $opt_r/geo;
	    push @main::OPTR_RE,$A;
	}
    }
    my($startNode) = '-1,-1';
    if ($main::opt_t) {
	eval (q|sub Compare { 
#print STDERR "($_[0],$_[1]): \'${\substr($main::FILE_2[$_[0]],0,-1)}\' cmp \'${\substr($main::FILE_RE[$_[1]],0,-1)}\'\n";
	    return $main::FILE_2[$_[0]] eq $main::FILE_RE[$_[1]];
	}|);
    }
    elsif ($main::opt_r) {
	eval (q|sub Compare {
	    return $main::FILE_2[$_[0]] =~ /^$main::OPTR_RE[$_[1]]$/;
	}|);
    }
    else {
	eval(q|sub Compare {
	    return $main::FILE_2[$_[0]] =~ /^$main::FILE_RE[$_[1]]$/;
	}|);
    }
    $main::COORDS{$startNode} = [-1, -1];
    $main::ACTIVE{$startNode} = 1;
    $main::DIST{$startNode} = 0;

    my $genDist = 0;

    my %done;
    # We continue until the last node we generated was the goal node
#print STDERR "$goalNode\n";
    while (! defined $main::DIST{$goalNode}) {
	$genDist = GenerateSuccessors($genDist);
    }

    pop @main::FILE_2;
    pop @main::FILE_RE;
    # Generate the report by recursively working backwards from the goal node
    if ($main::opt_d) {
	GenerateDot();
    }
    else {
	OutputDifferences($goalNode);
	print "$main::count comparisons\n" if $main::opt_C;
    }
}

# This subroutine generates the successors for the nodes in 
# %main::ACTIVE starting with the specified distance.
# It returns the distance of any newly generated nodes.
# Arguments: $genDist
# Returns: Updated $genDist
# Uses globals: @main::FILE_RE, @main::FILE_2, %main::DIST, 
#               %main::DONE_COMPARISON, %main::ACTIVE
# Sets globals: %main::DIST, %main::PRED, %main::SUCC, %main::DONE_COMPARISON,
#               %main::ACTIVE, %main::COORDS
sub GenerateSuccessors {
    my($genDist) = @_;
#print STDERR "GenerateSuccessors(",join(',',@_),")\n";

    my $found_node;
    my $max_line_2 = @main::FILE_2;
    for ($found_node = 0; ! $found_node; $genDist++) {
	my $i;
	my @nodes = keys %main::ACTIVE;
	die "Internal error: No active nodes!" unless @nodes;
	for ($i=0; $i < @nodes; $i++) {
#print STDERR "$i: [",join(';',@nodes),"]\n";
	    my $node = $nodes[$i];
	    my $baseDist = $main::DIST{$node};
#print STDERR "[$node] at $genDist = ${\scalar($genDist-$baseDist)} from $baseDist\n";
	    my $node_lines = $COORDS{$node};
	    my ($min_line_2,$min_line_re) =
		($node_lines->[0]+1, $node_lines->[1]+1);
	    my $netDist = $genDist - $baseDist;
	    my $active = 0;
	    my $max_line_re = $min_line_re + $netDist;
	    $max_line_re = @main::FILE_RE if @main::FILE_RE < $max_line_re;
	    my $line_re = $min_line_re;
	    my $line_2 = $min_line_2 + $netDist;
	    if ($line_2 > $max_line_2) {
#print STDERR "    Off end of file 2: ($line_2, $line_re)";
		my $diff = $line_2 - $max_line_2;
		$line_re += $diff;
		$line_2 -= $diff;
#print STDERR ": warp to ($line_2, $line_re)\n";
	    }
	    my @succ = sort {$b <=> $a} keys %{$main::SUCC{$node}};
	    while ($line_re <= $max_line_re) {
#print STDERR "  Check ($line_2, $line_re)\n";
		# Check to see that none of the node's successors blocks this
		# comparison.
		my $succ = $succ[0];
		if (defined $succ && $COORDS{$succ}[1] <= $line_re) {
		    # It blocks.
		    my $diff = $line_2 - $COORDS{$succ}[0] + 1;
		    $line_re += $diff;
		    $line_2 -= $diff;
#print STDERR "    Blocked by $succ: warp to ($line_2, $line_re)\n";
		    shift @succ;
		    next;
		}
		my $newNode = $line_2 . ',' . $line_re;
		my $compare = $DONE_COMPARISON{$newNode};
		if (! defined $compare) {
		    $compare = $DONE_COMPARISON{$newNode} =
			Compare($line_2,$line_re);
		}
		$main::count++;
		$active = 1;
		if ($compare) {
		    # The lines agree; we've found a new node
#print STDERR "  Match ($line_2,$line_re) = $genDist\n";
		    my $oldDist = $main::DIST{$newNode};
		    if (! defined $oldDist || $oldDist > $genDist) {
			$main::DIST{$newNode} = $genDist;
			$main::COORDS{$newNode} = [$line_2,$line_re];
			$main::ACTIVE{$newNode} = 1;
			$main::PRED{$newNode} = $node;
			push @nodes, $newNode;
			$found_node = 1;
		    }
		    $main::SUCC{$node}{$newNode} = $genDist;
		}
		$line_re++, $line_2--;
	    }
#print STDERR "  $node is no longer active\n" unless $active;	    
	    delete $main::ACTIVE{$node} unless $active;
	}
    }
    return $genDist;
}

# This subroutine outputs a difference report.  The arguments are the
# delta line numbers for the two files where the next match occurs.
# Arguments: $node
# Uses globals: @main::FILE_RE, @main::FILE_2, %main::PRED
sub OutputDifferences {
    my($goalNode) = @_;
    my(@path,$node);

    push(@path, $goalNode);
    for ($node = $goalNode; $main::PRED{$node} ne "";
	 $node = $main::PRED{$node}) {
	push(@path, $main::PRED{$node});
    }

    my($i);
    for ($i = $#path-1; $i >= 0; $i--) {
	my($last_2,$last_re) = @{$COORDS{$path[$i+1]}};
	my($this_2,$this_re) = @{$COORDS{$path[$i]}};
	my($diff_2,$diff_re) = ($this_2-$last_2-1, $this_re-$last_re-1);
	if ($diff_re != 0 || $diff_2 != 0) {
	    my($base_re) = $diff_re == 0 ? $last_re + 1 :
		($diff_re == 1 ? ($last_re + 2) :
		 ($last_re + 2) . ',' . ($last_re + $diff_re + 1));
	    my($base_2) = $diff_2 == 0 ? $last_2 + 1 :
		($diff_2 == 1 ? ($last_2 + 2) :
		 ($last_2 + 2) . ',' . ($last_2 + $diff_2 + 1));
	    my $pfx;
	    if (defined $opt_D) {
		print "#ifndef $opt_D\n" if $diff_re > 0;
	    }
	    else {
		print $base_re, ($diff_re == 0 ? 'a' : ($diff_2 == 0 ? 'd'
							: 'c')),
		    "$base_2\n";
		$pfx = "< ";
	    }
	    print map("$pfx$_",@main::FILE_RE[$last_re+1 .. ($last_re+$diff_re)])
		if ($diff_re > 0);
	    if (defined $opt_D) {
		print $diff_re > 0 ? "#else /* $opt_D */\n" : 
		    "#ifdef $opt_D\n" if $diff_2 > 0;
	    }
	    else {
		print "---\n" if $diff_re > 0 && $diff_2 > 0;
		$pfx = "> ";
	    }
	    print map("$pfx$_",@main::FILE_2[$last_2+1 .. ($last_2+$diff_2)])
		if ($diff_2 > 0);
	    if (defined $opt_D) {
		print "#endif /* $opt_D */\n";
		print $main::FILE_2[$this_2];
	    }
	}
	elsif (defined $opt_D) {
	    print $main::FILE_2[$this_2];
	}
    }
}

# This subroutine outputs a dot file for debugging purposes.
sub GenerateDot {
    print "digraph g {\n";
    my($node);
    # Find out how many predecessors each node has
    my(%preds);
    foreach $node (keys %main::SUCC) {
	my($nextNode);
	foreach $nextNode (keys %{$main::SUCC{$node}}) {
	    $preds{$nextNode}++;
	}
    }
    # Elide strings of nodes with the same total distance
    foreach $node (keys %main::SUCC) {
	my($nextNode);
	my(@succs);
	@succs = keys %{$main::SUCC{$node}};
	if ($preds{$node} == 1 && @succs == 1 &&
	    $main::DIST{$main::PRED{$node}} == 
	    $main::SUCC{$node}{$succs[0]}) {
	    $dist = $main::SUCC{$node}{$succs[0]};
	    delete $main::SUCC{$main::PRED{$node}}{$node};
	    delete $main::SUCC{$node}{$succs[0]};
	    $main::SUCC{$main::PRED{$node}}{$succs[0]} = $dist;
	    $main::PRED{$succs[0]} = $main::PRED{$node};
	}
    }
    # Find the shortest path
    my(%path);
    for ($node = $goalNode; $main::PRED{$node} ne "";
	 $node = $main::PRED{$node}) {
	$path{$main::PRED{$node}} = $node;
    }
    # Output the graph
    foreach $node (sort lex keys %main::SUCC) {
	my($nextNode);
	my($l1a,$l2a) = split(/,/, $node);
	foreach $nextNode (sort lex keys %{$main::SUCC{$node}}) {
	    my($dist) = $main::SUCC{$node}{$nextNode};
	    my($atts);
	    $atts = ', weight = "10"'
		if $path{$node} eq $nextNode;
	    my($l1b,$l2b) = split(/,/, $nextNode);
	    $atts .= ', style = "dotted"'
		if $l1b - $l1a > 1 && $main::DIST{$node} ==
		$main::SUCC{$node}{$nextNode};
	    print qq/ "$node" -> "$nextNode" [label = "$dist"$atts];\n/
	    }
    }
    print "}\n";
}
# Does lexicographic sort of node names
sub lex {
    my ($a1,$a2) = split(/,/, $a);
    my ($b1,$b2) = split(/,/, $b);
    return $a1 <=> $b1 || $a2 <=> $b2;
}

#END_CORE -- Do not delete this line; expectit2 depends upon it

# This subroutine extracts and prints usage information
sub Usage {
    my ($what,$end) = @_;
    $what = "Usage" if ! $what;
    if (open(ME,$0) == 1) {
	while (<ME>) {
	    $print = 1 if /^\# $what/o;
	    $print = 0 if ! /^\#/o || ($end && /^\# $end/o);
	    if ($print) {
		my $line = substr($_, 2);
		$line =~ s/(\$\{[^\}]+\})/eval($1)/ge;
		print $line;
	    }
	}
	close(ME);
    }
    else {
	print STDERR "Usage not available.\n";
    }
    exit (1);
}

__END__
# $Log: diffre.prl.root,v $
# Revision 1.1  2004/03/24 19:25:24  nodine
# Rename tool from rediff to diffre.
#
# Revision 1.1  2004/03/19 21:54:50  nodine
# Create gen_gress, run_gress, and rediff using the safe perl from config
# time and just copy to the bin directory.
#
# Revision 1.1  2004/02/03 20:45:09  nodine
# Initial release.
#
# Revision 1.1  2003/02/07 21:28:22  nodine
# Initial release.
#
# Revision 1.1  2001/11/05 22:17:42  nodine
# Initial release.
#
# Revision 1.1  2001/07/25 15:24:35  nodine
# Initial release.  These tools are for the regression set methodology.
#
# Revision 1.1  2001/02/10 16:22:55  nodine
# Initial release.
#
# Revision 1.1  2000/11/03 20:22:40  nodine
# Initial release.
#
# Revision 1.1  2000/08/31 18:11:48  nodine
# Initial release.
#
# Revision 1.1  2000/05/31 20:31:14  nodine
# Initial release.
#
# Revision 1.4  1997/11/06 21:42:17  nodine
# * Added comma before "weight" attribute in dot output.
# * Fixed processing that occurs when one EOF is reached before the other.
#
# Revision 1.3  1997/11/03  18:15:07  nodine
# * Added -t option and hidden -D and -C options.
# * Fixed a bunch of bugs.
# * Did some speed optimizations.
#
# Revision 1.2  1997/10/27  20:22:54  nodine
# Changed to find the minimal set of differences by using Dijkstra's shortest
# path algorithm.
#
# Revision 1.1  1997/10/24  21:22:43  nodine
# "Initial version"
#