#!/usr/local/bin/nosuch/perl
#### Text above here was automatically generated during the configuration.
#### Modifications to this file will be lost during configuration.
#!/usr/local/bin/perl
# $Id: diffre.prl.root,v 1.1 2004/03/24 19:25:24 nodine Exp $
# Copyright (C) 2002-2005 Freescale Semiconductor, Inc.
# Distributed under terms of the GNU General Public License (GPL).
# Description: This program is like the Unix diff program except that
# the first file can contain regular expressions that allow parts of the
# line to vary but still count as a match.
#
# Usage: ${main::TOOL_NAME} [options] re-file file2
#
# Options:
# -h Help
# -r regexp Elide differences involving the regexp (no regular
# expressions in re-file) (conflicts with -t)
# -t Text-only match (no regular expressions in re-file)
# (conflicts with -r)
# -D symbol Generate "#ifdef symbol" output
# -V Print version info
$0 =~ m|([^\/]+)$|;
$main::TOOL_NAME = $1;
main();
#use strict;
use integer;
if ($[ > 5.008) { eval "use bytes"; }
use Getopt::Std;
sub main {
my($options) = "dhr:tVD:C";
Usage() unless getopts($options);
if (defined $opt_r && $opt_t) {
print STDERR "Cannot have both -r and -t\n";
Usage('Options');
}
$| = 1; # Output right away (no output buffering)
Usage('\\$Id') if $main::opt_V;
Usage('Description') if $main::opt_h;
Usage() unless defined $ARGV[1];
# Open the files
open (FILE_RE,"<$ARGV[0]") or die "Cannot open $ARGV[0]";
open (FILE_2,"<$ARGV[1]") or die "Cannot open $ARGV[1]";
# Read the files
@main::FILE_RE = <FILE_RE>;
@main::FILE_2 = <FILE_2>;
# Add sentinels to the files
push @main::FILE_RE, "\000";
push @main::FILE_2, "\000";
# Close the files
close FILE_RE;
close FILE_2;
CompareFiles();
}
# This algorithm is loosely based on Dijkstra's single-source shortest-paths
# algorithm. See Thomas H. Cormen, Charles E. Leiserson, Ronald
# L. Rivest, _Introduction_to_Algorithms_, first edition, pp. 527-532
# for a description of the algorithm. The graph upon which the
# algorithm is applied is that induced by comparing every line of file
# 1 with every line of file 2; if they are the same, we create a node
# in the graph with a label that is the combination of the two line
# numbers. The distance between node A=(a1,a2) and node B=(b1,b2)
# is the minumum number of line changes that need to occur in going
# from line a1 of file 1 to line b1 of file 1 and line a2 of file 2
# to line b2 of file 2 (or infinite if b1 <= a1 or b2 <= a2). This
# distance is delta(A,B) = b1-a1+b2-a2-2 if there are no
# intervening nodes. The goal is to find the shortest path from
# (-1,-1) to the vertex that represents the end of both files.
# For efficiency's sake, we do not actually generate all the nodes,
# but rather produce them as Dijkstra's algorithm would need them.
# For further efficiency's sake, we do not even generate all the successors
# every time we add a node to the list of those whose distances are
# known. We go through the list generating successors at progressively
# larger distances as they become greater than the minimum distance.
# Since the delta function has an inverse triangle inequality, i.e.,
# delta(A,C) > delta(A,B) + delta(B,C), it means that any successor B
# of any node A means that we do not need to check edges from A to any
# point C for which c1 >= b1 and c2 >= b2. As a special case, if B
# is (a1+1, b1+1), no further checks from A are necessary, so it is
# removed from the active list.
# Unlike Dijkstra's algorithm, by the time we compute a distance for
# a node, we know that it is the exact minimum distance.
# The following global variables are used:
# %PRED Key=node name, value=predecessor on min. path
# %DIST Key=node name, value=distance from start node
# %SUCC Key=node name, value=hash ref. with key=successor
# node name and value=est min. dist.
# %ACTIVE Key=node name, value=1; has an entry for every
# node from which we actively generate successors
# %COORDS key=node name, value=array ref. with node's
# coordinates.
# This is the subroutine that actually compares the files.
# Uses globals: @main::FILE_RE, @main::FILE_2, %main::DIST, %main::PRED,
# %main::SUCC
# Sets globals: %main::DIST, %main::ACTIVE
#BEGIN_CORE -- Do not delete this line; expectit2 depends upon it
sub CompareFiles {
my($goalNode) = @main::FILE_2 . ',' . @main::FILE_RE;
if ($main::opt_r) {
# Preprocess RE file for regexp
foreach (@main::FILE_RE) {
my $A = $_;
# Quote regexp characters
$A =~ s/([.*?+^\$\\$@\[\]\(\)\{\}])|$opt_r/
$1 ne '' ? "\\$1" : $opt_r/geo;
push @main::OPTR_RE,$A;
}
}
my($startNode) = '-1,-1';
if ($main::opt_t) {
eval (q|sub Compare {
#print STDERR "($_[0],$_[1]): \'${\substr($main::FILE_2[$_[0]],0,-1)}\' cmp \'${\substr($main::FILE_RE[$_[1]],0,-1)}\'\n";
return $main::FILE_2[$_[0]] eq $main::FILE_RE[$_[1]];
}|);
}
elsif ($main::opt_r) {
eval (q|sub Compare {
return $main::FILE_2[$_[0]] =~ /^$main::OPTR_RE[$_[1]]$/;
}|);
}
else {
eval(q|sub Compare {
return $main::FILE_2[$_[0]] =~ /^$main::FILE_RE[$_[1]]$/;
}|);
}
$main::COORDS{$startNode} = [-1, -1];
$main::ACTIVE{$startNode} = 1;
$main::DIST{$startNode} = 0;
my $genDist = 0;
my %done;
# We continue until the last node we generated was the goal node
#print STDERR "$goalNode\n";
while (! defined $main::DIST{$goalNode}) {
$genDist = GenerateSuccessors($genDist);
}
pop @main::FILE_2;
pop @main::FILE_RE;
# Generate the report by recursively working backwards from the goal node
if ($main::opt_d) {
GenerateDot();
}
else {
OutputDifferences($goalNode);
print "$main::count comparisons\n" if $main::opt_C;
}
}
# This subroutine generates the successors for the nodes in
# %main::ACTIVE starting with the specified distance.
# It returns the distance of any newly generated nodes.
# Arguments: $genDist
# Returns: Updated $genDist
# Uses globals: @main::FILE_RE, @main::FILE_2, %main::DIST,
# %main::DONE_COMPARISON, %main::ACTIVE
# Sets globals: %main::DIST, %main::PRED, %main::SUCC, %main::DONE_COMPARISON,
# %main::ACTIVE, %main::COORDS
sub GenerateSuccessors {
my($genDist) = @_;
#print STDERR "GenerateSuccessors(",join(',',@_),")\n";
my $found_node;
my $max_line_2 = @main::FILE_2;
for ($found_node = 0; ! $found_node; $genDist++) {
my $i;
my @nodes = keys %main::ACTIVE;
die "Internal error: No active nodes!" unless @nodes;
for ($i=0; $i < @nodes; $i++) {
#print STDERR "$i: [",join(';',@nodes),"]\n";
my $node = $nodes[$i];
my $baseDist = $main::DIST{$node};
#print STDERR "[$node] at $genDist = ${\scalar($genDist-$baseDist)} from $baseDist\n";
my $node_lines = $COORDS{$node};
my ($min_line_2,$min_line_re) =
($node_lines->[0]+1, $node_lines->[1]+1);
my $netDist = $genDist - $baseDist;
my $active = 0;
my $max_line_re = $min_line_re + $netDist;
$max_line_re = @main::FILE_RE if @main::FILE_RE < $max_line_re;
my $line_re = $min_line_re;
my $line_2 = $min_line_2 + $netDist;
if ($line_2 > $max_line_2) {
#print STDERR " Off end of file 2: ($line_2, $line_re)";
my $diff = $line_2 - $max_line_2;
$line_re += $diff;
$line_2 -= $diff;
#print STDERR ": warp to ($line_2, $line_re)\n";
}
my @succ = sort {$b <=> $a} keys %{$main::SUCC{$node}};
while ($line_re <= $max_line_re) {
#print STDERR " Check ($line_2, $line_re)\n";
# Check to see that none of the node's successors blocks this
# comparison.
my $succ = $succ[0];
if (defined $succ && $COORDS{$succ}[1] <= $line_re) {
# It blocks.
my $diff = $line_2 - $COORDS{$succ}[0] + 1;
$line_re += $diff;
$line_2 -= $diff;
#print STDERR " Blocked by $succ: warp to ($line_2, $line_re)\n";
shift @succ;
next;
}
my $newNode = $line_2 . ',' . $line_re;
my $compare = $DONE_COMPARISON{$newNode};
if (! defined $compare) {
$compare = $DONE_COMPARISON{$newNode} =
Compare($line_2,$line_re);
}
$main::count++;
$active = 1;
if ($compare) {
# The lines agree; we've found a new node
#print STDERR " Match ($line_2,$line_re) = $genDist\n";
my $oldDist = $main::DIST{$newNode};
if (! defined $oldDist || $oldDist > $genDist) {
$main::DIST{$newNode} = $genDist;
$main::COORDS{$newNode} = [$line_2,$line_re];
$main::ACTIVE{$newNode} = 1;
$main::PRED{$newNode} = $node;
push @nodes, $newNode;
$found_node = 1;
}
$main::SUCC{$node}{$newNode} = $genDist;
}
$line_re++, $line_2--;
}
#print STDERR " $node is no longer active\n" unless $active;
delete $main::ACTIVE{$node} unless $active;
}
}
return $genDist;
}
# This subroutine outputs a difference report. The arguments are the
# delta line numbers for the two files where the next match occurs.
# Arguments: $node
# Uses globals: @main::FILE_RE, @main::FILE_2, %main::PRED
sub OutputDifferences {
my($goalNode) = @_;
my(@path,$node);
push(@path, $goalNode);
for ($node = $goalNode; $main::PRED{$node} ne "";
$node = $main::PRED{$node}) {
push(@path, $main::PRED{$node});
}
my($i);
for ($i = $#path-1; $i >= 0; $i--) {
my($last_2,$last_re) = @{$COORDS{$path[$i+1]}};
my($this_2,$this_re) = @{$COORDS{$path[$i]}};
my($diff_2,$diff_re) = ($this_2-$last_2-1, $this_re-$last_re-1);
if ($diff_re != 0 || $diff_2 != 0) {
my($base_re) = $diff_re == 0 ? $last_re + 1 :
($diff_re == 1 ? ($last_re + 2) :
($last_re + 2) . ',' . ($last_re + $diff_re + 1));
my($base_2) = $diff_2 == 0 ? $last_2 + 1 :
($diff_2 == 1 ? ($last_2 + 2) :
($last_2 + 2) . ',' . ($last_2 + $diff_2 + 1));
my $pfx;
if (defined $opt_D) {
print "#ifndef $opt_D\n" if $diff_re > 0;
}
else {
print $base_re, ($diff_re == 0 ? 'a' : ($diff_2 == 0 ? 'd'
: 'c')),
"$base_2\n";
$pfx = "< ";
}
print map("$pfx$_",@main::FILE_RE[$last_re+1 .. ($last_re+$diff_re)])
if ($diff_re > 0);
if (defined $opt_D) {
print $diff_re > 0 ? "#else /* $opt_D */\n" :
"#ifdef $opt_D\n" if $diff_2 > 0;
}
else {
print "---\n" if $diff_re > 0 && $diff_2 > 0;
$pfx = "> ";
}
print map("$pfx$_",@main::FILE_2[$last_2+1 .. ($last_2+$diff_2)])
if ($diff_2 > 0);
if (defined $opt_D) {
print "#endif /* $opt_D */\n";
print $main::FILE_2[$this_2];
}
}
elsif (defined $opt_D) {
print $main::FILE_2[$this_2];
}
}
}
# This subroutine outputs a dot file for debugging purposes.
sub GenerateDot {
print "digraph g {\n";
my($node);
# Find out how many predecessors each node has
my(%preds);
foreach $node (keys %main::SUCC) {
my($nextNode);
foreach $nextNode (keys %{$main::SUCC{$node}}) {
$preds{$nextNode}++;
}
}
# Elide strings of nodes with the same total distance
foreach $node (keys %main::SUCC) {
my($nextNode);
my(@succs);
@succs = keys %{$main::SUCC{$node}};
if ($preds{$node} == 1 && @succs == 1 &&
$main::DIST{$main::PRED{$node}} ==
$main::SUCC{$node}{$succs[0]}) {
$dist = $main::SUCC{$node}{$succs[0]};
delete $main::SUCC{$main::PRED{$node}}{$node};
delete $main::SUCC{$node}{$succs[0]};
$main::SUCC{$main::PRED{$node}}{$succs[0]} = $dist;
$main::PRED{$succs[0]} = $main::PRED{$node};
}
}
# Find the shortest path
my(%path);
for ($node = $goalNode; $main::PRED{$node} ne "";
$node = $main::PRED{$node}) {
$path{$main::PRED{$node}} = $node;
}
# Output the graph
foreach $node (sort lex keys %main::SUCC) {
my($nextNode);
my($l1a,$l2a) = split(/,/, $node);
foreach $nextNode (sort lex keys %{$main::SUCC{$node}}) {
my($dist) = $main::SUCC{$node}{$nextNode};
my($atts);
$atts = ', weight = "10"'
if $path{$node} eq $nextNode;
my($l1b,$l2b) = split(/,/, $nextNode);
$atts .= ', style = "dotted"'
if $l1b - $l1a > 1 && $main::DIST{$node} ==
$main::SUCC{$node}{$nextNode};
print qq/ "$node" -> "$nextNode" [label = "$dist"$atts];\n/
}
}
print "}\n";
}
# Does lexicographic sort of node names
sub lex {
my ($a1,$a2) = split(/,/, $a);
my ($b1,$b2) = split(/,/, $b);
return $a1 <=> $b1 || $a2 <=> $b2;
}
#END_CORE -- Do not delete this line; expectit2 depends upon it
# This subroutine extracts and prints usage information
sub Usage {
my ($what,$end) = @_;
$what = "Usage" if ! $what;
if (open(ME,$0) == 1) {
while (<ME>) {
$print = 1 if /^\# $what/o;
$print = 0 if ! /^\#/o || ($end && /^\# $end/o);
if ($print) {
my $line = substr($_, 2);
$line =~ s/(\$\{[^\}]+\})/eval($1)/ge;
print $line;
}
}
close(ME);
}
else {
print STDERR "Usage not available.\n";
}
exit (1);
}
__END__
# $Log: diffre.prl.root,v $
# Revision 1.1 2004/03/24 19:25:24 nodine
# Rename tool from rediff to diffre.
#
# Revision 1.1 2004/03/19 21:54:50 nodine
# Create gen_gress, run_gress, and rediff using the safe perl from config
# time and just copy to the bin directory.
#
# Revision 1.1 2004/02/03 20:45:09 nodine
# Initial release.
#
# Revision 1.1 2003/02/07 21:28:22 nodine
# Initial release.
#
# Revision 1.1 2001/11/05 22:17:42 nodine
# Initial release.
#
# Revision 1.1 2001/07/25 15:24:35 nodine
# Initial release. These tools are for the regression set methodology.
#
# Revision 1.1 2001/02/10 16:22:55 nodine
# Initial release.
#
# Revision 1.1 2000/11/03 20:22:40 nodine
# Initial release.
#
# Revision 1.1 2000/08/31 18:11:48 nodine
# Initial release.
#
# Revision 1.1 2000/05/31 20:31:14 nodine
# Initial release.
#
# Revision 1.4 1997/11/06 21:42:17 nodine
# * Added comma before "weight" attribute in dot output.
# * Fixed processing that occurs when one EOF is reached before the other.
#
# Revision 1.3 1997/11/03 18:15:07 nodine
# * Added -t option and hidden -D and -C options.
# * Fixed a bunch of bugs.
# * Did some speed optimizations.
#
# Revision 1.2 1997/10/27 20:22:54 nodine
# Changed to find the minimal set of differences by using Dijkstra's shortest
# path algorithm.
#
# Revision 1.1 1997/10/24 21:22:43 nodine
# "Initial version"
#