The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

# ABSTRACT: Generate a wxwidgets POD file from a wxWidgets HTML documentation source

# Dist::Zilla needs a package name
package parse_wxwidgets_docs;

use 5.006;
use strict;
use warnings;

use File::Temp       ();
use File::Spec       ();
use HTML::Parse      ();
use HTML::FormatText ();
use Getopt::Long     ();

my ( $pod, $verbose, $help ) = ( 'wxwidgets.pod', 0, 0 );
my $result = Getopt::Long::GetOptions(
	"pod=s"   => \$pod,
	"verbose" => \$verbose,
	"help"    => \$help,
);

# User Asked for help?
if ($help) {
	_help();
	exit;
}

# Read the wxwidget HTML documentation directory name
my $wx_dir = shift @ARGV;

unless ($wx_dir) {

	# User did not specify a wxwidgets directory
	# Let us download it from sourceforge and unzip it

	# The name of the wxwidget ZIP filename to download from the sourceforge website
	my $WX_WIGDETS_HTML_ZIP = 'wxWidgets-2.8.12-HTML.zip';

	# Download and unzip the wxWidgets HTML documentation zip file
	my $dir = _download_unzipped_wxwidgets_html_doc($WX_WIGDETS_HTML_ZIP);

	# and specify the wx documentation base directory
	$wx_dir = File::Spec->join( $dir, 'docs', 'mshtml', 'wx' );
} else {

	# User specified a wxwidget directory
	# Let us validate it
	unless ( -d $wx_dir ) {
		warn "WxWidgets HTML documentation directory is not valid\n";
		exit;
	}
}

# Read WX Classes list HTML class reference file
my @wxclasses = _read_wx_classes_list($wx_dir);
TRACE( "Found " . scalar @wxclasses . " Wx Classes to parse" );

# Write the final POD while processing all html files
_write_pod( $pod, $wx_dir, @wxclasses );

# and we're done
exit;

#
# TRACing print function that is enabled when --verbose is specified
#
sub TRACE {
	print( $_[0] . "\n" ) if $verbose;
}

#
# The command-line help
#
sub _help {
	print <<"HELP";

This is $0, an HTML to POD wxwidgets documentation generator.

Usage:

	$0 [options] [wx-widgets-html-directory]

The optional 'wx-widgets-html-directory' points to the wxwidgets
HTML documentation directory. If this is omitted, the script will
automatically try to download the HTML documentation from the
wxWidgets sourceforge website.

If you have installed wx2.8-doc on Debian, you can use the
following path:
/usr/share/doc/wx2.8-doc/wx-manual.html

Options can be one of the following:
--pod=filename
	wxwidgets POD file name. Defaults to wxwidgets.pod

--verbose
	Enabling verbose logging to standard output

--help
	Prints out usage help.

HELP

}

#
# Download if not found and then extract wxwidgets HTML documentation zip file
# Returns the extracted directory or otherwise dies
#
sub _download_unzipped_wxwidgets_html_doc {
	my ($WX_WIGDETS_HTML_ZIP) = @_;

	# Let us try to download it if the zip file is not found in the current
	# directory
	unless ( -e $WX_WIGDETS_HTML_ZIP ) {
		my $url = "http://garr.dl.sourceforge.net/project/wxwindows/2.8.12/$WX_WIGDETS_HTML_ZIP";
		TRACE("Downloading $url. Please wait...");
		require LWP::UserAgent;
		require HTTP::Request;
		my $ua  = LWP::UserAgent->new;
		my $req = HTTP::Request->new( GET => $url );
		my $res = $ua->request($req);
		if ( not $res->is_success ) {
			die $res->status_line, "\n";
		}

		# Write download file to disk
		TRACE("Writing $WX_WIGDETS_HTML_ZIP...");
		if ( open FILE, '>:raw', $WX_WIGDETS_HTML_ZIP ) {
			print FILE $res->content;
			close FILE;
		} else {
			die "Could not open $WX_WIGDETS_HTML_ZIP for writing\n";
		}
	}

	# Unzip the html zip file
	my $dir = File::Temp::tempdir;
	require Archive::Extract;
	my $zip = Archive::Extract->new( archive => $WX_WIGDETS_HTML_ZIP );
	die "$WX_WIGDETS_HTML_ZIP is not a zip file\n" unless ( $zip->is_zip );
	$zip->extract( to => $dir ) or die $zip->error;

	return $dir;
}

#
# Read WX classes list index from docs/mshtml/wx/wx_classref.html
#
sub _read_wx_classes_list {
	my $dir = shift;

	my $wx_classref = File::Spec->join( $dir, 'wx_classref.html' );

	# Stores a list of WX classes filenames
	my @wxclasses = ();

	#Step 1: Read Wx classes list from wx_classref.html
	if ( open( my $fh, $wx_classref ) ) {
		TRACE("Opened $wx_classref");
		my $begin;
		while ( my $line = <$fh> ) {
			if ( $line =~ /<H2>Alphabetical class reference<\/H2>/ ) {
				$begin = 1;
			} elsif ( $begin
				&& $line =~ /<A HREF="(.+?)#.+?"><B>(.+)?<\/B><\/A><BR>/ )
			{
				my ( $file, $class ) = ( $1, $2 );
				$class =~ s/wx(.+?)/Wx::$1/;
				push @wxclasses, { file => $file, class => $class };
			}
		}
	} else {
		die "Could not open $wx_classref in $dir\n";
	}

	return @wxclasses;
}

#
# Process wxClassName HTML file
#
sub _process_class {
	my ( $class, $file ) = @_;

	my $oldclass;
	my $pod_text = '';
	if ( open my $html_file, '<', $file ) {
		my $desc = '';
		my $name;
		while ( my $line = <$html_file> ) {
			if ( $line =~ /<H3>(.+?)<\/H3>/ ) {
				$name = $1;
				$name =~ s/wx(.+?)/Wx::$1/;
				if ( $name =~ /^Wx::(.+?)::(.+?)$/ ) {
					my $method = $2;
					if ( $method eq "wx$1" ) {

						# Convert C++ constructor to ::new
						$name = $class . '::new';
					} elsif ( $method =~ /^~.+/ ) {

						# Convert C++ destructor to ::DESTROY
						$name = $class . '::DESTROY';
					} elsif ( $method =~ /^operator.+/ ) {

						# Ignore operators
						$name = undef;
					}
				}
				$desc = '';
			} elsif ( $line =~ /^\s*$/ ) {
				if ($name) {
					if ( !$oldclass || $class ne $oldclass ) {

						# print out new class header
						$pod_text .= "=head1 $class\n\n";
						$oldclass = $class;
					}

					# print out method description
					$desc = HTML::FormatText->new->format( HTML::Parse::parse_html($desc) );
					$pod_text .= "=head2 $name\n\n$desc\n";

					$name = undef;
				}
			} else {
				$desc .= $line;
			}
		}
		close $html_file;
	}

	return $pod_text;
}

#
# Writes wxwidgets.pod... :)
#
sub _write_pod {
	my ( $pod, $wx_dir, @wxclasses ) = @_;
	TRACE("Writing $pod");
	if ( open( my $pod, '>', $pod ) ) {
		binmode($pod);
		my $oldclass;
		foreach my $wxclass (@wxclasses) {
			my $file = File::Spec->join( $wx_dir, $wxclass->{file} );
			print $pod _process_class( $wxclass->{class}, $file );
		}
		print $pod copyright_pod();
		close $pod;
	} else {
		die "Could not write $pod\n";
	}
}

#
# Copyright and license POD section
#
sub copyright_pod {
	# this is done to avoid Dist::Zilla PodWeaver from croaking
	return '=' . <<'END';
head1 COPYRIGHT AND LICENSE

Copyright 2010 C<< <ahmad.zawawi at gmail.com> >>

This document was generated by C<parse_wxwidgets_docs> which is found at
L<http://svn.perlide.org/padre/trunk/tools/parse_wxwidgets_docs>. The original
wxWidgets HTML documentation
L<http://garr.dl.sourceforge.net/project/wxwindows/Documents/2.8.10/wxWidgets-2.8.10-HTML.zip>
is copyrighted as the following:

    Copyright (c) 1992-2006 by Julian Smart, Robert Roebling, Vadim Zeitlin
    and other members of the wxWidgets team. Portions (c) 1996 Artificial
    Intelligence Applications Institute.

    The original wxWidgets HTML documentation is licensed under:
      wxWindows Library License version 3.1, http://docs.wxwidgets.org/2.8.10/wx_wxlicense.html
      GNU Library General Public License version 2, http://docs.wxwidgets.org/2.8.10/wx_gnulicense.html

This document is part of free software; you can redistribute it and/or modify it
under the same terms as Perl 5 itself.

END
}

__END__

=head1 DESCRIPTION

This is a simple script to parse WxWidgets HTML documentation into something useful
that we can make use of in Padre help system :)

=head1 SYNOPSIS

    # Generates myfullpodname from wx2.8-doc HTML documentation
    parse_wxwidgets_docs /usr/share/doc/wx2.8-doc/wx-manual.html/ --pod=myfullpodname

    # Generates wxwidgets.pod after downloading from wxwidgets website.
    # Defaults to 2.8 at the moment
    parse_wxwidgets_docs

=head1 USAGE

    parse_wxwidgets_docs [options] [wx-widgets-html-directory]

The optional 'wx-widgets-html-directory' points to the wxwidgets
HTML documentation directory. If this is omitted, the script will
automatically try to download the HTML documentation from the
wxWidgets sourceforge website.

=head1 OPTIONS

=over 4

=item --pod=filename

	wxwidgets POD file name. Defaults to wxwidgets.pod

=item --verbose

	Enabling verbose logging to standard output

=item --help

	Prints out this screen.

=back