lib/WWW/Scraper/eBay.pm


package WWW::Scraper::eBay;

use strict;
use vars qw($VERSION @ISA);
@ISA = qw(WWW::Scraper);
$VERSION = sprintf("%d.%02d", q$Revision: 1.01 $ =~ /(\d+)\.(\d+)/);

use WWW::Scraper(qw(2.27 generic_option addURL trimTags trimLFs));

my $scraperRequest = 
   { 
      'type' => 'FORM'
     ,'formNameOrNumber' => 'search_form'
     ,'submitButton' => undef

     # This is the basic URL on which to build the query.
     ,'url' => 'http://pages.ebay.com/search/items/basicsearch.html'
     # This is the Scraper attributes => native input fields mapping
     ,'nativeQuery' => 'query'
     ,'nativeDefaults' => {
                            'query' => undef
                          }
     ,'fieldTranslations' =>
             {
                 '*' =>
                     {    '*'             => '*'
                     }
             }
      # Some more options for the Scraper operation.
     ,'cookies' => 0
   };

my $scraperFrame =
        [ 'HTML', 
            [ 
               [ 'COUNT', '([,0-9]+)</b>\s+items found\s+for']
              ,[ 'BODY', '</form>', undef,
                  [  
                     #[ 'NEXT', 2, \&findNextForm ] # it used to be a form . . .
                     [ 'NEXT', 1, 'Next >' ]
                    ,[ 'BODY', '<!-- eBayCacheStart -->', '<!-- eBayCacheEnd -->',
                       [ 
                           [ 'TABLE', '#0' ]
                          ,[ 'HIT*' , 'Auction',
                             [ 
#try again!                                [ 'TRYUNTIL', 2, 'url', [
                                [ 'TABLE', 
                                   [  
                                      [ 'TR',
                                         [
                                            # <img height="15" width="64" border="0" alt="Pic" src="http://pics.ebay.com/aw/pics/lst/_p__64x15.gif">
                                            #[ 'TD',[ [ 'REGEX', '<img\s+.*?src=([^ >)', 'thumbNailUrl'] ] ]
                                            [ 'TD' ] # The thumbnail url is in there somewhere!
                                           ,[ 'TD',[ [ 'A', 'url', 'title' ] ] ]
                                           ,[ 'TD', 'price', \&parsePrice ]
                                           ,[ 'TD', 'bids', \&trimLFs ]
                                           ,[ 'TD', 'endsPDT', \&trimLFs ]
                                            # this regex never matches; just lets us declare fields.
                                           #,[ 'REGEX', 'neverMatch', 'isNew', 'itemNumber' ] #, 'isBillpoint']
                                         ]
                                      ]
                                   ]
                                ] 
#try again!                                ] ]
                             ] 
                           ] 
                          ,[ 'TABLE', '#2' ]
                          ,[ 'HIT*' , 'Auction',
                             [ 
#try again!                                [ 'TRYUNTIL', 2, 'url', [
                                [ 'TABLE', 
                                   [  
                                      [ 'TR',
                                         [
                                            # <img height="15" width="64" border="0" alt="Pic" src="http://pics.ebay.com/aw/pics/lst/_p__64x15.gif">
                                            #[ 'TD',[ [ 'REGEX', '<img\s+.*?src=([^ >)', 'thumbNailUrl'] ] ]
                                            [ 'TD' ] # The thumbnail url is in there somewhere!
                                           ,[ 'TD',[ [ 'A', 'url', 'title' ] ] ]
                                           ,[ 'TD', 'price', \&parsePrice ]
                                           ,[ 'TD', 'bids', \&trimLFs ]
                                           ,[ 'TD', 'endsPDT', \&trimLFs ]
                                            # this regex never matches; just lets us declare fields.
                                           #,[ 'REGEX', 'neverMatch', 'isNew', 'itemNumber' ] #, 'isBillpoint']
                                         ]
                                      ]
                                   ]
                                ] 
#try again!                                ] ]
                             ] 
                           ] 
                          #,[ 'BOGUS', -2 ] # eBay's last 2 hits are bogus ("return to top", etc.).
                       ] 
                     ]
                  ]
               ]
            ]
        ];



sub testParameters {
    return {
                 'SKIP' => '' 
                ,'TODO' => "Implement 'TRYUNTIL' Scraper frame option - helps for skipping 'hits' that aren't actually hits."
                ,'testNativeQuery' => 'turntable'
                ,'expectedOnePage' => 9
                ,'expectedMultiPage' => 25
                ,'expectedBogusPage' => 0
           };
}


# Access methods for the structural declarations of this Scraper engine.
sub scraperRequest { $scraperRequest }
sub scraperFrame { $_[0]->SUPER::SetScraperFrame($scraperFrame); return $scraperFrame }
sub scraperDetail{ undef }



my $defaultScraperForm_url = ['http://pages.ebay.com/search/items/search.html', 0, 'query', undef];
sub import
{
    my $package = shift;

    my @exports = grep { "HASH" ne ref($_) } @_;
    my @options = grep { "HASH" eq ref($_) } @_;

    foreach (@options)
    {
        if ( $_->{'scraperBaseURL'} ) {
            $scraperRequest->{'url'} = $_->{'scraperBaseURL'};  # new form
            $$defaultScraperForm_url[0] = $_->{'scraperBaseURL'}; # old form
        }
    }

    @_ = ($package, @exports);
    goto &Exporter::import;
}


# Sometimes there's just a NEXT form, sometimes there's a PREV form and a NEXT form . . .
sub findNextForm {
    my ($self, $hit, $dat) = @_;
    
    my $next_content = $dat;
    while ( my ($sub_content, $frm) = $self->getMarkedText('FORM', \$next_content) ) {
        last unless $sub_content;
        # Reconstruct the form that contains the NEXT data.
        my @forms = HTML::Form->parse("<form $frm>$sub_content</form>", $self->{'_base_url'});
        my $form = $forms[0];

        my $submit_button;
        for ( $form->inputs() ) {
            if ( $_->value() eq 'Next' ) {
                $submit_button = $_;
                last;
            }
        }
        if ( $submit_button ) {
            my $req = $submit_button->click($form); #
            return $req->uri();
        }
    }
    return '';
}


# eBay's title sometimes includes other things, such as "new" link and "billpoint" link
#<td valign=top width=52%><font size=3><a href="http://cgi.ebay.com/ws/eBayISAPI.dll?ViewItem&item=1383008995">UNITED AUDIO TURNTABLE DUAL 1209 MODEL</a></font>
#<BR><img height=1 width=200 border=0 alt="" src="http://pics.ebay.com/aw/pics/s.gif"></td>
sub parseItemTitle {
   my ($self, $hit, $dat) = @_;
   my $next_content = $dat;
   my ($sub_content, $frm);
   my ($isNew, $isBillpoint) = (0,0);
   while ( ($sub_content, $frm) = $self->getMarkedText('A', \$next_content) ) {
      last unless $sub_content;
      $isNew       |= ($sub_content =~ m{alt="New!"})?1:0;
      $isBillpoint |= ($sub_content =~ m{alt="eBay Online Payments by Billpoint"})?1:0;
      last unless $sub_content =~ m{<img}i;
   }
   $hit->plug_elem('title', $sub_content);
   $hit->plug_elem('isNew', $isNew);
#   $hit->plug_elem('isBillpoint', $isBillpoint); # need to match Billpoint *after* matching title.
   my $url = $frm;
   $url =~ s{a\s+href=(['"])(.*)$1}{$2};
   $url =~ m{item=(\d+)$};
   $hit->plug_elem('itemNumber', $1);
   return $url;
}

# eBay's price sometimes contains multiple values ("Buy it Now")
sub parsePrice {
    my ($self, $hit, $dat) = @_;
    for my $price ( split /<BR>/, $dat) {
        $price = $self->trimLFs($hit, $price);
        next unless $price;
        $hit->plug_elem('price', $price);
    }
    return undef; # we already plugged the values into the $hit.
}
1;

__END__

=pod

=head1 NAME

WWW::Scraper::eBay - Scrapes www.eBay.com


=head1 SYNOPSIS

    require WWW::Scraper;
    $search = new WWW::Scraper('eBay');


=head1 DESCRIPTION

This class is an eBay extension of WWW::Scraper.
It handles making and interpreting eBay searches
F<http://www.eBay.com>.

=head1 OPTIONS

=over 8

=item search_debug, search_parse_debug, search_ref
Specified at L<WWW::Search>.

=back


=head1 AUTHOR

C<WWW::Scraper::eBay> is written and maintained
by Glenn Wood, http://search.cpan.org/search?mode=author&query=GLENNWOOD.

=head1 COPYRIGHT

Copyright (c) 2001 Glenn Wood
All rights reserved.

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut

#####################################################################
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)