#!/usr/bin/perl
#
# Format XHTML generated by pod2html (via tidy) for websites
#
# Usage: tidy ... pydoc.html | fix-python-xhtml OUTPUT-FILE
#
# (C) Copyright 2002 Dave Beckett - http://www.dajobe.org/
# University of Bristol
#
use strict;
use File::Basename;
my $progname=basename $0;
my $main_title="Redland RDF Application Framework";
my $doc_title="Python API Reference";
die "USAGE: $progname OUTPUT-FILE\n" if @ARGV < 1;
my($file)=@ARGV;
my($pod_rdf_rel)=($file =~ m%^pod/RDF/%) ? '../' : '';
my($pod_rdf_redland_rel)=($file =~ m%^pod/RDF/Redland/%) ? '' : 'Redland/';
my $skip_heading=0;
open(OUT, ">$file") or die "$progname: Cannot create $file - $!\n";
print OUT qq{<?xml version="1.0" encoding="iso-8859-1"?>\n};
open(IN, "-");
while(<IN>) {
if(m%<title>(.*?)</title>%) {
s%<title>(.*?)</title>%<title>$main_title - $doc_title</title>%;
}
next if /^<link|meta/i;
# skip the #-commented heading
if(!$skip_heading && m%^<p><tt>\#%) {
$skip_heading=1;
next;
} elsif ($skip_heading == 1) {
if(m%\#</tt></p>%) {
$skip_heading=2;
print OUT "<!-- LICENSE HEADING -->\n";
}
next;
}
# Links to other Python modules, exceptions
s%<a href="(Redland|string|sys|exceptions|__builtin__).html[^"]*">(.+?)</a>%$2%g; #"
# The horrors of guessing urls in text
s%<a href="http://example.(org|com|net)[^"]*">([^<]+)</a>%$2%g; # "
s%<a href="http://(?:localhost/r.rdf|foo|bar)[^"]*">([^<]+)</a>%$2%g; # "
# Never leave these in web pages
s%<a href="file:[^"]+">/[^<]+</a>%%g; # "
s%RDF.html\#%#%;
# Bad IDs, who writes this stuff?
s%<a name="([^"]+)">%my $a=$1; $a =~ s/\<.*?\>//g; qq{<a id="$a" name="$a">}%ge;
s%<a id="([^"]+)" name="[^"]+">%my $a=$1; $a =~ s/\<.*?\>//g; $a =~ s/^-//; $a=~ s/ /-/g; qq{<a id="$a" name="$a">}%ge;
s%<a href="#-[^"+]">%<a href="#$1">%g;
# All the __new__ links are broken
s%<a href="\#\w+\-__new__">__new__</a>%__new__%;
# This is broken too, dunno why
s%<a href="\#\-debug">debug</a>%debug%;
if(m%^<body.*>%) {
print OUT qq{<body>\n\n<h1 style="text-align:center">$main_title - $doc_title</h1>\n\n};
next;
}
my $year=1900+(localtime)[5];
print OUT <<"EOT" if m%^</body>%;
<hr />
<p>(C) Copyright 2000-$year <a href="http://www.dajobe.org/">Dave Beckett</a>, Copyright 2000-2005 <a href="http://www.bristol.ac.uk/">University of Bristol</a></p>
EOT
print OUT;
}
close(IN);
close(OUT);