The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

# HTML manual href link munging.

# Copyright 2005, 2006, 2007, 2009 Kevin Ryde
#
# This file is part of Chart.
#
# Chart is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3, or (at your option) any later
# version.
#
# Chart is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


# Usage: ./mung-html.pl filename.html ...
#
# This program modifies the given "filename.html" etc input files, changing
# links like href="../foo.html#Node", as generated by makeinfo, into links
# to the GNU web site, or wherever.
#

use strict;
use warnings;
use Image::ExifTool;
use Perl6::Slurp ('slurp');

use FindBin;
my $progname = $FindBin::Script;

my $option_verbose = 0;
my $everything_good = 1;


#------------------------------------------------------------------------------
# href external links

my $external_count = 0;

my @external_gnu = (['elisp',   'emacs/manual/html_node/elisp'],
                    ['emacs',   'emacs/manual/html_node/emacs'],
                    ['bashref', 'bash/manual/html_node'],
                    'gettext',
                    'libc',
                    ['texinfo', 'texinfo/manual/texinfo/html_node'],
                    # ['info_stnd', 'texinfo/manual/info_stnd'],
                   );

my @external_gnu_onepage = (
                           );

sub mung_external_links {
  my ($body) = @_;

  # no_split style:
  #     foo.html#Top
  #     foo.html#Node_Name
  #
  $body =~ s{href="(([a-z_]+)[.]html#([^"]+))}
            {'href="' . mung_one_link ($1,$2,$3)}eg;
  $body =~ s{href="([.][.]/([a-z_]+)/index[.]html#(Top))}
            {'href="' . mung_one_link ($1,$2,$3)}eg;

  # split style:
  #    ../foo/index.html#Top
  #    ../foo/Node_Name.html#Node_Name
  #
  $body =~ s{href="([.][.]/([a-z_]+)/([^".]+)[.]html#\3)}
            {'href="' . mung_one_link ($1,$2,$3)}eg;

  return $body;
}

sub mung_one_link {
  my ($url, $manual, $node) = @_;
  if ($manual eq 'index' && $node eq 'Top') {
    # top of this manual, no change
    return $url;
  }
  $external_count++;

  if ($option_verbose) {
    print "$url\n";
  }

  # @external_gnu
  # $manual is at www.gnu.org.
  # $subdir is the subdirectory of http://www.gnu.org/software/ to find
  # it, defaulting to
  #     http://www.gnu.org/software/PACKAGE/manual/index.html
  #     http://www.gnu.org/software/PACKAGE/manual/html_node/Foo.html
  #
  foreach my $elem (@external_gnu) {
    my ($elem_manual, $subdir);
    if (ref $elem) {
      ($elem_manual, $subdir) = @$elem;
    } else {
      $elem_manual = $elem;
      $subdir = "$manual/manual/html_node";
    }
    if ($elem_manual ne $manual) { next; }

    if ($node eq 'Top') {
      $url = "http://www.gnu.org/software/$subdir/index.html";
    } else {
      $url = "http://www.gnu.org/software/$subdir/$node.html";
    }
    if ($option_verbose) {
      print "  -> $url\n";
    }
    return $url;
  }

  # @external_gnu_onepage
  # $manual is at www.gnu.org, but only in single_page html form
  #   http://www.gnu.org/software/$package/$manual.html
  #
  foreach my $elem (@external_gnu_onepage) {
    my ($elem_manual, $subdir) = @$elem;
    if ($elem_manual ne $manual) { next; }

    if ($node eq 'Top') {
      $url = "http://www.gnu.org/software/$subdir/$manual.html";
    } else {
      $url = "http://www.gnu.org/software/$subdir/manual.html#$node";
    }
    if ($option_verbose) {
      print "  -> $url\n";
    }
    return $url;
  }

  #

  if ($option_verbose) {
    print "  unchanged\n";
  }
  return $url;
}

# # no online ses manual yet ...
# (external_any "ses" "http://home.comcast.net/~jyavner/ses/")
# 


#------------------------------------------------------------------------------
# png sizes

my $image_count = 0;

sub mung_img_sizes {
  my ($body) = @_;
  $body =~ s{<img (src="([^"]+)")}{
    my $filename = $2;
    $image_count++;
    my $info = Image::ExifTool::ImageInfo($filename);
    if (my $error = $info->{'Error'}) {
      print "$progname: cannot get image info for '$filename': $error\n";
    }
    "<img width=$info->{'ImageWidth'} height=$info->{'ImageHeight'} $1"
  }egi;
  return $body;
}


#------------------------------------------------------------------------------
# copyright at end

sub mung_copyright {
  my ($body, $filename) = @_;

  my $see_copying = '';
  if ($filename ~~ m{/Copying\.html$}) {
    my $link = ($body =~ /href="#Copying"/
                ? '#Copying'
                : 'Copying.html');
    my $see_copying = "\n(see <a href=\"$link\">Copying</a>)";
  }

  $body =~ s{</body>}{<hr width="100%">
<p>
Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Kevin Ryde

<p>
Chart is free software; you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software
Foundation; either version 3, or (at your option) any later version$see_copying.
</p>

</body>};

  return $body;
}


#------------------------------------------------------------------------------
# footer link back to home page

sub mung_footer_link {
  my ($body) = @_;

  $body =~ s{</body>}{<p>
(Back to <a href="../../index.html">Chart home page</a>.)
</p>

</body>};

  return $body;
}


#------------------------------------------------------------------------------

sub mung_file {
  my ($filename) = @_;
  my $body = slurp ($filename);
  $body = mung_external_links ($body);
  $body = mung_img_sizes ($body);
  $body = mung_copyright ($body, $filename);

  # not sure if this is any good ...
  # $body = mung_footer_link ($body);

  open my $out, '>', $filename or die;
  print $out $body or die;
  close $out or die;
}

foreach my $filename (@ARGV) {
  mung_file ($filename);
}

if (! $everything_good) {
  exit 1;
}
print "munged $external_count external links, $image_count image sizes\n";
exit 0;