The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#$Revision: 1.4 $$Date: 2007-11-11 20:26:22 $$Author: boumenot $
# FIXME: this script screen scapes the web to build the ItemSearch
# validate classes.  Unfortunately, this breaks too frequently.  A
# better way needs to be found.

package main;
require 5.008_001;

use Getopt::Long;
use IO::File;
use Pod::Usage;
use LWP::Simple;
use Text::Template;
use Data::Dumper;
use File::Path;
use lib "$FindBin::Bin/../lib";
use HTML::TreeBuilder::XPath;
use Net::Amazon ();

use strict;
use warnings;


use constant AWS4_LOCALE_HTML => {
    'us' => 'USSearchIndexParamForItemsearch.html',
#     'de' => 'DESearchIndexParamForItemsearch.html',
#     'es' => 'ESSearchIndexParamForItemsearch.html',
#     'jp' => 'JPSearchIndexParamForItemsearch.html',
#     'it' => 'ITSearchIndexParamForItemsearch.html',
#     'uk' => 'UKSearchIndexParamForItemsearch.html',
#     'fr' => 'FRSearchIndexParamForItemsearch.html',
#     'ca' => 'CASearchIndexParamForItemsearch.html',

my $Opt_Debug = 0;
my $Opt_Dest = "../lib/Net/Amazon/Validate/ItemSearch";
my $Opt_Overwrite = 0;

unless (&GetOptions (
		     "help|h"	 => \&usage,
		     "version|V" => \&version,
		     "debug|D"   => \$Opt_Debug,
                     "dest=s"    => \$Opt_Dest,
		     "overwrite" => \$Opt_Overwrite,
		     "<>"	 => \&parameter,
		     )) {

## main #########################################

unless (-d $Opt_Dest) {
    die "The directory $Opt_Dest does not exist!\n";

for my $locale (keys %{(AWS4_LOCALE_HTML)}) {
    my $link =  AWS4_ONLINE_HTML.AWS4_LOCALE_HTML->{$locale};
    print "fetching $link ...\n" if $Opt_Debug;

    my $tree = HTML::TreeBuilder::XPath->new();

    my @search_indicies = map { $_->as_text } $tree->findnodes("//div[\@class=\"section\"]//h2");
    my %depts;
    my %upc;
    my %keywords;

    for my $search_index (@search_indicies) {

	(my $search_index_name) = $search_index =~ /SearchIndex:\s+(\w+)/;
	next if $search_index_name eq 'All';

	print $search_index_name."\n";

	my @parameters = map { $_->as_text } $tree->findnodes("//div[\@class=\"section\"]//h2[contains(text(),\"$search_index\")]/../../../..//li/p");

	for my $parameter (@parameters) {
	    print "  -> $parameter\n";
	    push @{$depts{$search_index_name}}, $parameter;
            $keywords{$locale}{$search_index_name}++ if $parameter eq "Keywords";


    for my $dept (keys %depts) {
	dump_library($depts{$dept}, $locale, $dept);
	upc_add(\%upc, $depts{$dept});

    for my $locale (keys %keywords) {
        my @a = keys %{$keywords{$locale}};
        dump_library(\@a, $locale, "Keywords");

    my @a = keys %upc;
    my $type = ($locale eq 'us') ? 'UPC' : 'EAN';
    dump_library(\@a, $locale, $type);

## subs #########################################

sub usage {
    print '$Revision: 1.4 $$Date: 2007-11-11 20:26:22 $$Author: boumenot $ ', "\n";
    pod2usage(-verbose=>2, -exitval => 2);
    exit (1);

sub version {
    print '$Revision: 1.4 $$Date: 2007-11-11 20:26:22 $$Author: boumenot $ ', "\n";
    exit (1);

sub parameter {
    my $param = shift;
    die "%Error: Unknown parameter: $param\n";


# Attempt to pick a "favored" default for the different types of
# ItemSearch'es.  The favored list is returned in order of preference.
# The most preferred is Books because that was the default for AWS3.
# As Books is not available for all types of ItemSearch'es use other
# "favored" defaults.  They are Music, DVD, Software, etc. in that
# order.  If none of those are a possible default then use the first
# item in the list of acceptable values.

sub select_default {
    my $aref = shift;

    my %hash = map { $_ => 1 } @$aref;

    for my $favored_default (qw(Books Music DVD Software Title Keyword Keywords)) {
	return $favored_default if defined $hash{$favored_default};

    return $aref->[0];

sub upc_add {
    my ($href, $aref) = @_;
    $href->{$_}++ for @$aref;

sub dump_library {
    my ($aref, $locale, $dept) = @_;

    my $fn = "$Opt_Dest/$locale/$";
    my $dn = "$Opt_Dest/$locale";

    unless (-d $dn) {
        mkpath $dn or die "Failed to create '$dn'!\n";

    if (-f $fn && !$Opt_Overwrite) {
        warn "The file $fn already exists, skipping!\n";

    my $template = Text::Template->new(
            TYPE       => 'FILE',
            SOURCE     => 'aws4-itemsearch.tmpl',
            DELIMITERS => [ '[%--', '--%]', ],

    my $hash = {'MODULE_NAME'    => "$locale".'::'."$dept",
                'DEFAULT_OPTION' => select_default(\@$aref),
                'LOCALE'         => $locale,
                'ITEM_SEARCH'    => $dept,
                'options'        => \@$aref,

    my $text = $template->fill_in(HASH => $hash);
    unless ($text) {
        die "Failed to fill in the text template for $locale/$dept!\n";

    my $fouth = IO::File->new(">$fn") or
        die "$! '$fn'!\n";

    print $fouth $text;




=head1 asw4-itemsearch

B<asw4-types> - convert Amazon's HTML data to Perl libraries to pick ItemSearch


B<asw4-itemsearch> - [I<OPTION>]... [I<FILE>]...


B<asw4-itemsearch> converts the data stored in Amazon's HTML pages for
ASW4 into Perl libraries.  These libraries are used by Net::Amazon to
validate user input, and select default entries for ItemSearch


=over 4

=item -h, --help

Displays this message and program version and exits.

=item -V, --version

Displays the program's version and exits.

=item -D, --debug

Prints debug information.

=item --overwrite

Overwrite any libraries if they already exist.

=item --dest E<lt>directoryE<gt>

Specify the destination where the files should be written.


=head1 AUTHORS

Written by Christopher Boumenot.


Report bugs to <>.

=head1 SEE ALSO
