The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package WWW::Scraper::Wikipedia::ISO3166::Database::Download;

use parent 'WWW::Scraper::Wikipedia::ISO3166::Database';
use feature 'say';
use strict;
use warnings;
use warnings  qw(FATAL utf8);    # Fatalize encoding glitches.

use HTTP::Tiny;

use Moo;

use Types::Standard qw/Str/;

has code2 =>
(
	default  => sub{return 'AU'},
	is       => 'rw',
	isa      => Str,
	required => 0,
);

has url =>
(
	default  => sub{return 'http://en.wikipedia.org/wiki/ISO_3166-1'},
	is       => 'rw',
	isa      => Str,
	required => 0,
);

our $VERSION = '2.00';

# -----------------------------------------------

sub get_1_page
{
	my($self, $url, $data_file) = @_;

	my($response) = HTTP::Tiny -> new -> get($url);

	if (! $$response{success})
	{
		$self -> log(error => "Failed to get $url");
		$self -> log(error => "HTTP status: $$response{status} => $$response{reason}");

		if ($$response{status} == 599)
		{
			$self -> log(error => "Exception message: $$response{content}");
		}

		# Return 0 for success and 1 for failure.

		return 1;
	}

	open(my $fh, '>', $data_file) || die "Can't open file: $data_file: $!\n";
	print $fh $$response{content};
	close $fh;

	$self -> log(debug => "Downloaded '$url' to '$data_file'");

	# Return 0 for success and 1 for failure.

	return 0;

} # End of get_1_page.

# -----------------------------------------------

sub get_country_pages
{
	my($self) = @_;

	# Firstly, get the page of 3 letter country codes.

	my($url)	= $self -> url;
	my($result)	= $self -> get_1_page($url, $self -> data_file . '.html');

	$self -> log(info => "Result: $result. URL: $url. (0 is success)");

	# Secondly, get the page of country names.

	$url			=~ s/-1/-2/;
	my($data_file)	= $self -> data_file;
	$data_file		=~ s/-1/-2/;

	$result += $self -> get_1_page($url, $data_file . '.html');

	$self -> log(info => "Result: $result. URL: $url. (0 is success)");

	# Return 0 for success and 1 for failure.

	return $result;

} # End of get_country_pages.

# -----------------------------------------------

sub get_subcountry_page
{
	my($self)		= @_;
	my($code2)		= $self -> code2;
	my($url)		= $self -> url . ":$code2";
	$url			=~ s/-1/-2/;
	my($data_file)	= $self -> data_file;
	$data_file		=~ s/-1/-2/;

	my($result) = $self -> get_1_page($url, $self -> data_file . ".$code2.html");

	$self -> log(info => "Result: $result. URL: $url. (0 is success)");

	# Return 0 for success and 1 for failure.

	return 0;

} # End of get_subcountry_page.

# -----------------------------------------------

sub get_subcountry_pages
{
	my($self) = @_;

	# %downloaded will contain 2-letter codes.

	my(%downloaded);

	my($downloaded)				= $self -> find_subcountry_downloads;
	@downloaded{@$downloaded}	= (1) x @$downloaded;
	my($countries)				= $self -> read_countries_table;
	my($count)					= 0;

	my(%countries);

	for my $id (keys %$countries)
	{
		if (! $downloaded{$$countries{$id}{code2} })
		{
			$count++;

			$self -> code2($$countries{$id}{code2});
			$self -> get_subcountry_page;

			sleep 5;
		}
	}

	$self -> log(info => "Download page count: $count");

	# Return 0 for success and 1 for failure.

	return 0;

} # End of get_subcountry_pages.

# -----------------------------------------------

1;

=pod

=head1 NAME

WWW::Scraper::Wikipedia::ISO3166::Database::Download - Download various pages from Wikipedia

=head1 Synopsis

See L<WWW::Scraper::Wikipedia::ISO3166/Synopsis>.

=head1 Description

Downloads these pages:

Input: L<http://en.wikipedia.org/wiki/ISO_3166-1>.

Output: data/en.wikipedia.org.wiki.ISO_3166-1.html.

Input: L<http://en.wikipedia.org/wiki/ISO_3166-2>.

Output: data/en.wikipedia.org.wiki.ISO_3166-2.html.

Downloads each countries' corresponding subcountries page.

Source: http://en.wikipedia.org/wiki/ISO_3166:$code2.html.

Output: data/en.wikipedia.org.wiki.ISO_3166-2.$code2.html.

See scripts/get.country.pages.pl, scripts/get.subcountry.page.pl and scripts/get.subcountries.pages.pl.

Note: These pages have been downloaded, and are shipped with the distro.

=head1 Constructor and initialization

new(...) returns an object of type C<WWW::Scraper::Wikipedia::ISO3166::Database::Download>.

This is the class's contructor.

Usage: C<< WWW::Scraper::Wikipedia::ISO3166::Database::Download -> new() >>.

This method takes a hash of options.

Call C<new()> as C<< new(option_1 => value_1, option_2 => value_2, ...) >>.

Available options (these are also methods):

=over 4

=item o code2 => $2_letter_code

Specifies the code2 of the country whose subcountry page is to be downloaded.

=back

=head1 Distributions

This module is available as a Unix-style distro (*.tgz).

See http://savage.net.au/Perl-modules.html for details.

See http://savage.net.au/Perl-modules/html/installing-a-module.html for
help on unpacking and installing.

=head1 Methods

This module is a sub-class of L<WWW::Scraper::Wikipedia::ISO3166::Database> and consequently
inherits its methods.

=head2 code2($code)

Get or set the 2-letter country code of the country or subcountry being processed.

See L</get_subcountry_page()>.

Also, I<code2> is an option to L</new()>.

=head2 get_1_page($url, $data_file)

Download $url and save it in $data_file. $data_file normally takes the form 'data/*.html'.

=head2 get_country_pages()

Download the 2 country pages:

L<http://en.wikipedia.org/wiki/ISO_3166-1>.

L<http://en.wikipedia.org/wiki/ISO_3166-2>.

See L<WWW::Scraper::Wikipedia::ISO3166/Description>.

=head2 get_subcountry_page()

Download 1 subcountry page, e.g. http://en.wikipedia.org/wiki/ISO_3166:$code2.html.

Warning. The 2-letter code of the subcountry must be set with $self -> code2('XX') before calling this
method.

See L<WWW::Scraper::Wikipedia::ISO3166/Description>.

=head2 get_subcountry_pages()

Download all subcountry pages which have not been downloaded.

See L<WWW::Scraper::Wikipedia::ISO3166/Description>.

=head2 new()

See L</Constructor and initialization>.

=head1 FAQ

For the database schema, etc, see L<WWW::Scraper::Wikipedia::ISO3166/FAQ>.

=head1 References

See L<WWW::Scraper::Wikipedia::ISO3166/References>.

=head1 Support

Email the author, or log a bug on RT:

L<https://rt.cpan.org/Public/Dist/Display.html?Name=WWW::Scraper::Wikipedia::ISO3166>.

=head1 Author

C<WWW::Scraper::Wikipedia::ISO3166> was written by Ron Savage I<E<lt>ron@savage.net.auE<gt>> in 2012.

Home page: L<http://savage.net.au/index.html>.

=head1 Copyright

Australian copyright (c) 2012 Ron Savage.

	All Programs of mine are 'OSI Certified Open Source Software';
	you can redistribute them and/or modify them under the terms of
	The Artistic License, a copy of which is available at:
	http://www.opensource.org/licenses/index.html


=cut