#!/usr/local/bin/perl
=pod
=begin reST
=begin Id
$Id: htmldiff.prl 767 2006-01-28 03:29:15Z marknodine $
Copyright (C) 2002-2005 Freescale Semiconductor, Inc.
Distributed under terms of the GNU General Public License (GPL).
=end Id
=begin Description
Description: This tool takes two HTML files and creates an HTML file
that has markings of text insertions and deletions.
=end Description
=begin Usage
Usage: htmldiff [options] file1 file2
Options:
-b string String to insert for backward link with -l.
Default: "${opt_b}"
-d string Deletion marking string, where "$_" specifies the
deleted words.
Default: "${opt_d}"
-f string String to insert for forward link with -l.
Default: "${opt_f}"
-h Prints help
-i string Insertion marking string, where "$_" specifies the
inserted words.
Default: "${opt_i}"
-k Create key describing difference format
-l Create links between difference regions
-p string Program to run for diff (program must support -D option).
Default: ${opt_p}
-V Prints version information
=end Usage
=end reST
=cut
BEGIN {
$opt_b = '<font color=orange><</font>';
$opt_f = '<font color=orange>></font>';
$opt_i = '<U><FONT COLOR=#800080>$_</FONT></U>';
$opt_d = '<S><FONT COLOR=#800000>$_</FONT></S>';
$opt_p = 'diff';
}
# Get options and process them
use Getopt::Std;
Usage() unless getopts("b:d:f:hi:klp:V");
Usage('Id') if $opt_V;
Usage('Description') if $opt_h;
die "Wrong number of arguments" unless @ARGV == 2;
$opt_i =~ /(.*)\$_(.*)/;
($INSERT_PFX,$INSERT_SFX) = ($1, $2);
$opt_d =~ /(.*)\$_(.*)/;
($DELETE_PFX,$DELETE_SFX) = ($1, $2);
#sub INSERTION { "$INSERT_PFX$_[0]$INSERT_SFX" }
#sub DELETION { OptLString() . "$DELETE_PFX$_[0]$DELETE_SFX" }
($ctx_both, $ctx_1, $ctx_2) = (0 .. 2);
$args = join(' ',@ARGV);
$sym = "A__";
open (DIFF, "$opt_p -D $sym $args|") || die "Cannot run diff";
$context = $ctx_both;
$DIFF = 0;
if ($opt_k) {
print "Key: ${DELETE_PFX}Deleted text.$DELETE_SFX\n";
print "${INSERT_PFX}Inserted text.$INSERT_SFX\n";
print "Use <A HREF='#'>$opt_b</A> and <A HREF='#__diff${\scalar($DIFF+1)}'>$opt_f</A> to jump to previous/next difference region.\n"
if $opt_l;
print "<hr />\n";
}
print "<A HREF='#__diff${\scalar($DIFF+1)}' NAME='__diff$DIFF'>$opt_f</A> "
if $opt_l;
while (<DIFF>) {
if (/^#ifndef $sym\b/o) {
$context = $ctx_1; next;
}
if (/^#ifdef $sym\b/o) {
$context = $ctx_2; next;
}
if (/^#else /o) {
$context = 3 - $context; next;
}
if (/^#endif /o) {
$context = $ctx_both;
# Process the diff
# Get the word lists for each section
$diff1 = join('', @diff1);
$diff2 = join('', @diff2);
if ($diff1 eq '') {
my $s = MarkNonHTML($diff2, $INSERT_PFX, $INSERT_SFX);
$s =~ s/($INSERT_PFX)/OptLString() . $1/eo;
print $s;
}
elsif ($diff2 eq '') {
my $s = MarkNonHTML($diff1, $DELETE_PFX, $DELETE_SFX);
$s =~ s/($DELETE_PFX)/OptLString() . $1/eo;
print $s;
}
else {
@words1 = GetWordList($diff1);
@words2 = GetWordList($diff2);
# Use diff to find out which words were added/deleted/changed
open (O1, "> /tmp/1.$$") || die "Cannot write to /tmp/1.$$";
print O1 join("\n",@words1),"\n";
close O1;
open (O2, "> /tmp/2.$$") || die "Cannot write to /tmp/2.$$";
print O2 join("\n",@words2),"\n";
close O2;
open (DIFF2, "diff /tmp/1.$$ /tmp/2.$$|");
# Record for each difference section the type of difference
# (a, c, or d), the beginning and ending words in diff2, and
# any non-HTML words deleted from diff1.
my(@deletes, @diffs);
while (<DIFF2>) {
if (/^[0-9]+(?:,[0-9]+)?([acd][0-9]+(?:,[0-9]+)?)/) {
# A new difference section
push(@diffs, $1);
push(@deletes, []);
}
elsif (/^< ([^<].*)/) {
push(@{$deletes[$#deletes]}, $1);
}
}
close DIFF2;
unlink "/tmp/1.$$";
unlink "/tmp/2.$$";
# Create the merged version. We copy words from diff2 to
# $merged until we hit a difference section; for each
# difference section, we put the struck out section in and
# then mark as added any non-HTML words from diff2 for c
# and a type difference sections.
my($merged);
my($i);
my($word) = 1;
for ($i = 0; $i < @diffs; $i++) {
my($diff) = $diffs[$i];
my($deletes) = $deletes[$i];
my($type,$start,$end) =
$diff =~ /([acd])([0-9]+)(?:,([0-9]+))?/;
$start++ if $type eq 'd';
$end = $start unless $end;
# Copy over stuff
while ($word < $start) {
if ($diff2 =~ /^(\s+)/) {
}
elsif ($diff2 =~ /^(<.*?>|[^<\s]+)/) {
$word++;
}
else { next; }
$merged .= $1;
$diff2 = "$'";
}
# Mark any deletions
my $new;
$new = ' ' .
MarkNonHTML((join(' ',@$deletes)), $DELETE_PFX,
$DELETE_SFX) . ' '
if @$deletes;
if ($type =~ /[ac]/) {
# Mark any additions. Copy the whole string to $add.
my($add);
while ($word <= $end) {
if ($diff2 =~ /^(\s+)/) {
}
elsif ($diff2 =~ /^(<.*?>|[^<\s]+)/) {
$word++;
}
else { next; }
$add .= $1;
$diff2 = "$'";
}
# Now mark the non-HTML words.
$new .= MarkNonHTML($add, $INSERT_PFX, $INSERT_SFX);
}
$merged .= (($new =~ /$DELETE_PFX|$INSERT_PFX/o) ?
OptLString() : "") . $new;
}
# Copy the rest of the diff2 text
$merged .= $diff2;
print $merged;
}
undef @diff1;
undef @diff2;
next;
}
if ($context == $ctx_both) {
print;
}
elsif ($context == $ctx_1) {
push(@diff1, $_);
}
else {
push(@diff2, $_);
}
}
print "<A HREF='#__diff${\scalar($DIFF++)}' NAME='__diff$DIFF'>$opt_b</A>\n"
if $opt_l;
print "($DIFF differences)\n" if $opt_k;
# Returns a list of words. A word is either a set of non-space
# characters separated by spaces or a string enclosed between '<' and '>'.
# Inputs: string
# Outputs: array of words
sub GetWordList {
$_ = $_[0];
my(@out);
my($last_pos) = -1;
while ($last_pos != pos) {
$last_pos = pos;
next if (/\G(\s+)/gc);
if (/\G(<.*?>)/gc || /\G([^<\s]+)/gc) {
push (@out, $1);
}
}
return @out;
}
# Marks all non-HTML words by putting the prefix and suffix around
# them.
# Inputs: string, prefix, suffix
# Returns: string
sub MarkNonHTML {
my($s, $pfx, $sfx) = @_;
$s =~ s/(>|\A)(?!\s+(?:<|\Z))([^<]+)/$1$pfx$2$sfx/g;
return $s;
}
# Returns a string that links from one difference section to the next
# Inputs: None
# Returns: string
# Uses globals: $opt_l, $DIFF
sub OptLString {
return $opt_l ?
"<A HREF='#__diff${\scalar($DIFF++)}' NAME='__diff$DIFF'>$opt_b</A> <A HREF='#__diff${\scalar($DIFF+1)}'>$opt_f</A> " :
"";
}
# This subroutine extracts and prints usage information
sub Usage {
my ($what) = @_;
$what = "Usage" if ! $what;
my $mark = $what eq 'Description' ? "($what|Usage)" : $what;
if (open(ME,$0) == 1) {
while (<ME>) {
if ((/^=begin $mark/ .. /^=end $mark/) &&
! /^=(begin|end) $mark/) {
s/(\$\{[^\}]+\})/eval($1)/ge;
print;
}
}
close(ME);
}
else {
print STDERR "Usage not available.\n";
}
exit (1);
}