#!/usr/bin/perl
# HTML manual href link munging.
# Copyright 2005, 2006, 2007, 2009 Kevin Ryde
#
# This file is part of Chart.
#
# Chart is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3, or (at your option) any later
# version.
#
# Chart is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Usage: ./mung-html.pl filename.html ...
#
# This program modifies the given "filename.html" etc input files, changing
# links like href="../foo.html#Node", as generated by makeinfo, into links
# to the GNU web site, or wherever.
#
use strict;
use warnings;
use Image::ExifTool;
use Perl6::Slurp ('slurp');
use FindBin;
my $progname = $FindBin::Script;
my $option_verbose = 0;
my $everything_good = 1;
#------------------------------------------------------------------------------
# href external links
my $external_count = 0;
my @external_gnu = (['elisp', 'emacs/manual/html_node/elisp'],
['emacs', 'emacs/manual/html_node/emacs'],
['bashref', 'bash/manual/html_node'],
'gettext',
'libc',
['texinfo', 'texinfo/manual/texinfo/html_node'],
# ['info_stnd', 'texinfo/manual/info_stnd'],
);
my @external_gnu_onepage = (
);
sub mung_external_links {
my ($body) = @_;
# no_split style:
# foo.html#Top
# foo.html#Node_Name
#
$body =~ s{href="(([a-z_]+)[.]html#([^"]+))}
{'href="' . mung_one_link ($1,$2,$3)}eg;
$body =~ s{href="([.][.]/([a-z_]+)/index[.]html#(Top))}
{'href="' . mung_one_link ($1,$2,$3)}eg;
# split style:
# ../foo/index.html#Top
# ../foo/Node_Name.html#Node_Name
#
$body =~ s{href="([.][.]/([a-z_]+)/([^".]+)[.]html#\3)}
{'href="' . mung_one_link ($1,$2,$3)}eg;
return $body;
}
sub mung_one_link {
my ($url, $manual, $node) = @_;
if ($manual eq 'index' && $node eq 'Top') {
# top of this manual, no change
return $url;
}
$external_count++;
if ($option_verbose) {
print "$url\n";
}
# @external_gnu
# $manual is at www.gnu.org.
# $subdir is the subdirectory of http://www.gnu.org/software/ to find
# it, defaulting to
# http://www.gnu.org/software/PACKAGE/manual/index.html
# http://www.gnu.org/software/PACKAGE/manual/html_node/Foo.html
#
foreach my $elem (@external_gnu) {
my ($elem_manual, $subdir);
if (ref $elem) {
($elem_manual, $subdir) = @$elem;
} else {
$elem_manual = $elem;
$subdir = "$manual/manual/html_node";
}
if ($elem_manual ne $manual) { next; }
if ($node eq 'Top') {
$url = "http://www.gnu.org/software/$subdir/index.html";
} else {
$url = "http://www.gnu.org/software/$subdir/$node.html";
}
if ($option_verbose) {
print " -> $url\n";
}
return $url;
}
# @external_gnu_onepage
# $manual is at www.gnu.org, but only in single_page html form
# http://www.gnu.org/software/$package/$manual.html
#
foreach my $elem (@external_gnu_onepage) {
my ($elem_manual, $subdir) = @$elem;
if ($elem_manual ne $manual) { next; }
if ($node eq 'Top') {
$url = "http://www.gnu.org/software/$subdir/$manual.html";
} else {
$url = "http://www.gnu.org/software/$subdir/manual.html#$node";
}
if ($option_verbose) {
print " -> $url\n";
}
return $url;
}
#
if ($option_verbose) {
print " unchanged\n";
}
return $url;
}
# # no online ses manual yet ...
# (external_any "ses" "http://home.comcast.net/~jyavner/ses/")
#
#------------------------------------------------------------------------------
# png sizes
my $image_count = 0;
sub mung_img_sizes {
my ($body) = @_;
$body =~ s{<img (src="([^"]+)")}{
my $filename = $2;
$image_count++;
my $info = Image::ExifTool::ImageInfo($filename);
if (my $error = $info->{'Error'}) {
print "$progname: cannot get image info for '$filename': $error\n";
}
"<img width=$info->{'ImageWidth'} height=$info->{'ImageHeight'} $1"
}egi;
return $body;
}
#------------------------------------------------------------------------------
# copyright at end
sub mung_copyright {
my ($body, $filename) = @_;
my $see_copying = '';
if ($filename ~~ m{/Copying\.html$}) {
my $link = ($body =~ /href="#Copying"/
? '#Copying'
: 'Copying.html');
my $see_copying = "\n(see <a href=\"$link\">Copying</a>)";
}
$body =~ s{</body>}{<hr width="100%">
<p>
Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Kevin Ryde
<p>
Chart is free software; you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software
Foundation; either version 3, or (at your option) any later version$see_copying.
</p>
</body>};
return $body;
}
#------------------------------------------------------------------------------
# footer link back to home page
sub mung_footer_link {
my ($body) = @_;
$body =~ s{</body>}{<p>
(Back to <a href="../../index.html">Chart home page</a>.)
</p>
</body>};
return $body;
}
#------------------------------------------------------------------------------
sub mung_file {
my ($filename) = @_;
my $body = slurp ($filename);
$body = mung_external_links ($body);
$body = mung_img_sizes ($body);
$body = mung_copyright ($body, $filename);
# not sure if this is any good ...
# $body = mung_footer_link ($body);
open my $out, '>', $filename or die;
print $out $body or die;
close $out or die;
}
foreach my $filename (@ARGV) {
mung_file ($filename);
}
if (! $everything_good) {
exit 1;
}
print "munged $external_count external links, $image_count image sizes\n";
exit 0;