#!/usr/bin/perl
# ABSTRACT: Generate a wxwidgets POD file from a wxWidgets HTML documentation source
# Dist::Zilla needs a package name
package parse_wxwidgets_docs;
use 5.006;
use strict;
use warnings;
use File::Temp ();
use File::Spec ();
use HTML::Parse ();
use HTML::FormatText ();
use Getopt::Long ();
my ( $pod, $verbose, $help ) = ( 'wxwidgets.pod', 0, 0 );
my $result = Getopt::Long::GetOptions(
"pod=s" => \$pod,
"verbose" => \$verbose,
"help" => \$help,
);
# User Asked for help?
if ($help) {
_help();
exit;
}
# Read the wxwidget HTML documentation directory name
my $wx_dir = shift @ARGV;
unless ($wx_dir) {
# User did not specify a wxwidgets directory
# Let us download it from sourceforge and unzip it
# The name of the wxwidget ZIP filename to download from the sourceforge website
my $WX_WIGDETS_HTML_ZIP = 'wxWidgets-2.8.12-HTML.zip';
# Download and unzip the wxWidgets HTML documentation zip file
my $dir = _download_unzipped_wxwidgets_html_doc($WX_WIGDETS_HTML_ZIP);
# and specify the wx documentation base directory
$wx_dir = File::Spec->join( $dir, 'docs', 'mshtml', 'wx' );
} else {
# User specified a wxwidget directory
# Let us validate it
unless ( -d $wx_dir ) {
warn "WxWidgets HTML documentation directory is not valid\n";
exit;
}
}
# Read WX Classes list HTML class reference file
my @wxclasses = _read_wx_classes_list($wx_dir);
TRACE( "Found " . scalar @wxclasses . " Wx Classes to parse" );
# Write the final POD while processing all html files
_write_pod( $pod, $wx_dir, @wxclasses );
# and we're done
exit;
#
# TRACing print function that is enabled when --verbose is specified
#
sub TRACE {
print( $_[0] . "\n" ) if $verbose;
}
#
# The command-line help
#
sub _help {
print <<"HELP";
This is $0, an HTML to POD wxwidgets documentation generator.
Usage:
$0 [options] [wx-widgets-html-directory]
The optional 'wx-widgets-html-directory' points to the wxwidgets
HTML documentation directory. If this is omitted, the script will
automatically try to download the HTML documentation from the
wxWidgets sourceforge website.
If you have installed wx2.8-doc on Debian, you can use the
following path:
/usr/share/doc/wx2.8-doc/wx-manual.html
Options can be one of the following:
--pod=filename
wxwidgets POD file name. Defaults to wxwidgets.pod
--verbose
Enabling verbose logging to standard output
--help
Prints out usage help.
HELP
}
#
# Download if not found and then extract wxwidgets HTML documentation zip file
# Returns the extracted directory or otherwise dies
#
sub _download_unzipped_wxwidgets_html_doc {
my ($WX_WIGDETS_HTML_ZIP) = @_;
# Let us try to download it if the zip file is not found in the current
# directory
unless ( -e $WX_WIGDETS_HTML_ZIP ) {
my $url = "http://garr.dl.sourceforge.net/project/wxwindows/2.8.12/$WX_WIGDETS_HTML_ZIP";
TRACE("Downloading $url. Please wait...");
require LWP::UserAgent;
require HTTP::Request;
my $ua = LWP::UserAgent->new;
my $req = HTTP::Request->new( GET => $url );
my $res = $ua->request($req);
if ( not $res->is_success ) {
die $res->status_line, "\n";
}
# Write download file to disk
TRACE("Writing $WX_WIGDETS_HTML_ZIP...");
if ( open FILE, '>:raw', $WX_WIGDETS_HTML_ZIP ) {
print FILE $res->content;
close FILE;
} else {
die "Could not open $WX_WIGDETS_HTML_ZIP for writing\n";
}
}
# Unzip the html zip file
my $dir = File::Temp::tempdir;
require Archive::Extract;
my $zip = Archive::Extract->new( archive => $WX_WIGDETS_HTML_ZIP );
die "$WX_WIGDETS_HTML_ZIP is not a zip file\n" unless ( $zip->is_zip );
$zip->extract( to => $dir ) or die $zip->error;
return $dir;
}
#
# Read WX classes list index from docs/mshtml/wx/wx_classref.html
#
sub _read_wx_classes_list {
my $dir = shift;
my $wx_classref = File::Spec->join( $dir, 'wx_classref.html' );
# Stores a list of WX classes filenames
my @wxclasses = ();
#Step 1: Read Wx classes list from wx_classref.html
if ( open( my $fh, $wx_classref ) ) {
TRACE("Opened $wx_classref");
my $begin;
while ( my $line = <$fh> ) {
if ( $line =~ /<H2>Alphabetical class reference<\/H2>/ ) {
$begin = 1;
} elsif ( $begin
&& $line =~ /<A HREF="(.+?)#.+?"><B>(.+)?<\/B><\/A><BR>/ )
{
my ( $file, $class ) = ( $1, $2 );
$class =~ s/wx(.+?)/Wx::$1/;
push @wxclasses, { file => $file, class => $class };
}
}
} else {
die "Could not open $wx_classref in $dir\n";
}
return @wxclasses;
}
#
# Process wxClassName HTML file
#
sub _process_class {
my ( $class, $file ) = @_;
my $oldclass;
my $pod_text = '';
if ( open my $html_file, '<', $file ) {
my $desc = '';
my $name;
while ( my $line = <$html_file> ) {
if ( $line =~ /<H3>(.+?)<\/H3>/ ) {
$name = $1;
$name =~ s/wx(.+?)/Wx::$1/;
if ( $name =~ /^Wx::(.+?)::(.+?)$/ ) {
my $method = $2;
if ( $method eq "wx$1" ) {
# Convert C++ constructor to ::new
$name = $class . '::new';
} elsif ( $method =~ /^~.+/ ) {
# Convert C++ destructor to ::DESTROY
$name = $class . '::DESTROY';
} elsif ( $method =~ /^operator.+/ ) {
# Ignore operators
$name = undef;
}
}
$desc = '';
} elsif ( $line =~ /^\s*$/ ) {
if ($name) {
if ( !$oldclass || $class ne $oldclass ) {
# print out new class header
$pod_text .= "=head1 $class\n\n";
$oldclass = $class;
}
# print out method description
$desc = HTML::FormatText->new->format( HTML::Parse::parse_html($desc) );
$pod_text .= "=head2 $name\n\n$desc\n";
$name = undef;
}
} else {
$desc .= $line;
}
}
close $html_file;
}
return $pod_text;
}
#
# Writes wxwidgets.pod... :)
#
sub _write_pod {
my ( $pod, $wx_dir, @wxclasses ) = @_;
TRACE("Writing $pod");
if ( open( my $pod, '>', $pod ) ) {
binmode($pod);
my $oldclass;
foreach my $wxclass (@wxclasses) {
my $file = File::Spec->join( $wx_dir, $wxclass->{file} );
print $pod _process_class( $wxclass->{class}, $file );
}
print $pod copyright_pod();
close $pod;
} else {
die "Could not write $pod\n";
}
}
#
# Copyright and license POD section
#
sub copyright_pod {
# this is done to avoid Dist::Zilla PodWeaver from croaking
return '=' . <<'END';
head1 COPYRIGHT AND LICENSE
Copyright 2010 C<< <ahmad.zawawi at gmail.com> >>
This document was generated by C<parse_wxwidgets_docs> which is found at
L<http://svn.perlide.org/padre/trunk/tools/parse_wxwidgets_docs>. The original
wxWidgets HTML documentation
L<http://garr.dl.sourceforge.net/project/wxwindows/Documents/2.8.10/wxWidgets-2.8.10-HTML.zip>
is copyrighted as the following:
Copyright (c) 1992-2006 by Julian Smart, Robert Roebling, Vadim Zeitlin
and other members of the wxWidgets team. Portions (c) 1996 Artificial
Intelligence Applications Institute.
The original wxWidgets HTML documentation is licensed under:
wxWindows Library License version 3.1, http://docs.wxwidgets.org/2.8.10/wx_wxlicense.html
GNU Library General Public License version 2, http://docs.wxwidgets.org/2.8.10/wx_gnulicense.html
This document is part of free software; you can redistribute it and/or modify it
under the same terms as Perl 5 itself.
END
}
__END__
=head1 DESCRIPTION
This is a simple script to parse WxWidgets HTML documentation into something useful
that we can make use of in Padre help system :)
=head1 SYNOPSIS
# Generates myfullpodname from wx2.8-doc HTML documentation
parse_wxwidgets_docs /usr/share/doc/wx2.8-doc/wx-manual.html/ --pod=myfullpodname
# Generates wxwidgets.pod after downloading from wxwidgets website.
# Defaults to 2.8 at the moment
parse_wxwidgets_docs
=head1 USAGE
parse_wxwidgets_docs [options] [wx-widgets-html-directory]
The optional 'wx-widgets-html-directory' points to the wxwidgets
HTML documentation directory. If this is omitted, the script will
automatically try to download the HTML documentation from the
wxWidgets sourceforge website.
=head1 OPTIONS
=over 4
=item --pod=filename
wxwidgets POD file name. Defaults to wxwidgets.pod
=item --verbose
Enabling verbose logging to standard output
=item --help
Prints out this screen.
=back