The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Yahoo::Search::Request;
use strict;

use Yahoo::Search::Response;
use Yahoo::Search::XML;
use LWP::UserAgent;
use HTTP::Request;
use URI;

our $VERSION = "20070320.002"; # just to make CPAN happy


my $have_XML_Simple; # undef means 'not yet tested'
sub _have_XML_Simple
{
    if (not defined $have_XML_Simple) {
        # test whether XML::Simple is installed
        if (eval { require XML::Simple; 1 }) {
            $have_XML_Simple = 1;
        } else {
            $have_XML_Simple = 0;
        }
    }
    return $have_XML_Simple;
}


=head1 NAME

Yahoo::Search::Request -- Container object for a Yahoo! Search request.
(This package is included in, and automatically loaded by, the Yahoo::Search package.)

=head1 Package Use

You never need to C<use> this package directly -- it is loaded
automatically by Yahoo::Search.

=head1 Object Creation

In practice, this class is generally not dealt with explicitly, but rather
implicitly via functions in Yahoo::Search such as C<Query> and C<Links>,
which build and use a C<Request> object under the hood.

You also have access to the C<Request> object via C<Request()> method of
resulting C<Response> and C<Result> objects.

To be clear, C<Request> objects are created by the C<Request()> method of a
Search Engine object (Yahoo::Search).

=cut

sub new
{
    my $class = shift;
    my %Args = @_;

    ##
    ## Don't want to keep any arg that begins with '_' (e.g. _Url).
    ##
    for my $key (grep { /^_/ } keys %Args) {
        delete $Args{$key};
    }

    return bless \%Args, $class;
}


=head1 Methods

A C<Request> object provides the following methods:

=over 4

=cut

###########################################################################

=item $Request->Uri

Returns the URI::http object representing the url fetched (or to be
fetched) from Yahoo's Search servers. The url is actually fetched when the
C<Request> object's C<Fetch()> method is called.

Note that this does I<not> reflect the fact that a request is changed to a
POST when request is sufficiently large. Thus, there are times when the url
represented by the URI::http object returned is not actually fetchable from
the Yahoo! servers.

=cut

sub Uri
{
    my $Request = shift; # self

    if (not $Request->{_Uri})
    {
        ##
        ## Create the URI (action + query string)
        ##
        $Request->{_Uri} = URI->new($Request->{Action}, "http");
        $Request->{_Uri}->query_form(%{$Request->{Params}});
    }
    return $Request->{_Uri};
}



###########################################################################

=item $Request->Url

Like the C<Uri> method, but returns a string with the full url
fetched (or to be fetched).

Note that this does I<not> reflect the fact that a request is changed to a
POST when request is sufficiently large. Thus, there are times when the url
returned is not actually fetchable from the Yahoo! servers.

=cut

sub Url
{
    my $Request = shift; # self
    return $Request->Uri->as_string;
}



###########################################################################

=item $Request->SearchSpace

Returns the search space the request represents (I<Doc>, I<Image>, etc.)

=cut


sub SearchSpace
{
    my $Request = shift; # self
    return $Request->{Space}
}



###########################################################################

=item $Request->SearchEngine

Returns the Yahoo::Search "search engine" object used in creating this
request.

=cut

sub SearchEngine
{
    my $Request = shift; # self
    return $Request->{SearchEngine};
}



##
## Some search spaces spaces have very simple <Result> data --
## they are simple text phrases, and not further nested xml.
##
my %SimpleResultSpace =
(
 Spell   => 1,
 Related => 1,
 Terms   => 1,
);


###########################################################################

=item $Request->Fetch

Actually contact the Yahoo Search servers, returning a C<Result>
(Yahoo::Search::Result) object.

=cut

our $UA;

sub Fetch
{
    my $Request = shift; # self
    ## no other args

    ##
    ## Fetch -- get the response (which contains xml, hopefully)
    ##

    if (my $callback = $Request->SearchEngine->Default('PreRequestCallback'))
    {
        if (not $callback->($Request)) {
            $@ ||= "aborted because PreRequestCallback returned false";
            return ();
        }
    }

    $Yahoo::Search::RecentRequestUrl = $Request->Url;

    warn "Fetching url: $Yahoo::Search::RecentRequestUrl\n" if $Request->{Debug} =~ m/url/x;

    ## create the useragent object just the first time.
    $UA ||= LWP::UserAgent->new(agent => "Yahoo::Search ($Yahoo::Search::VERSION)", env_proxy  => 1);

    my $response;

    ##
    ## Yahoo! servers allow a GET until the GET line (including "GET" and
    ## ending "\r\n" is 8192 bytes long. The following switches to POST
    ## once it gets close. (To bring a GET pedantically up to the limit,
    ## we'd have to switch to POST once what follows the '?' in the URL is
    ## more than 8186 bytes, but there's really no reason to push right up
    ## to the limit.)
    ##
    if (length($Yahoo::Search::RecentRequestUrl) < 8180) {
        $response = $UA->get($Yahoo::Search::RecentRequestUrl);
    } else {
        $response = $UA->post($Request->{Action}, $Request->{Params});
    }

    ##
    ## Ensure we have a good result
    ##
    if (not $response) {
        $@ = "couldn't make request";
        return ();
    }

    ##
    ## Nab (and if debugging, report) the xml
    ##
    my $xml = $response->content;
    print $xml, "\n" if $Request->{Debug} =~ m/xml/x;
    if ($Request->{Debug} =~ m/XMLtmp/) {
        open XMLTMP, ">/tmp/XML";
        print XMLTMP $xml;
        close XMLTMP;
    }

    ##
    ## Even if the response is not successful, it may still be XML and may
    ## have an error message in it.
    ##
    if (not $response->is_success)
    {
        if ($xml and $xml =~ m{<Message>(.+?)</Message>}s) {
            $@ = "Bad Request: $1";
        } elsif ($response->status_line) {
            $@ = $response->status_line;
        } else {
            $@ = "ERROR"; ## unknown error
        }
        return ();
    }

    if (not $xml) {
        $@ = "empty response from Yahoo server";
        return ();
    }

    ##
    ## Turn the XML into a Perl hash.
    ##
    ## If we're told to use XML::Simple, we'll do so directly.
    ## Otherwise, we'll try our own mini (==fast) Yahoo::Search::XML. If it
    ## can't grok the XML, we'll revert to XML::Simple, asking the user to
    ## file a bug report....
    ##
    ## The following is more verbose than need be, but the more succinct
    ## code is convoluted for little gain.
    ## 
    my $ResultHash;
    if ($Yahoo::Search::UseXmlSimple)
    {
        if (not _have_XML_Simple()) {
            $@ = "\$Yahoo::Search::UseXmlSimple is true, but XML::Simple is not installed";
            return ();
        }

        $ResultHash = eval { XML::Simple::XMLin($xml) };
        if (not $ResultHash) {
            $@ = "Yahoo::Request: Error processing XML by XML::Simple: $@";
            return ();
        }
    }
    else
    {
        ## first try my mini parser
        $ResultHash = eval { Yahoo::Search::XML::Parse($xml) };

        if (not $ResultHash)
        {
            my $orig_error = $@;

            ##
            ## Give XML::Simple a chance, if it's there
            ##
            if (not _have_XML_Simple())
            {
                warn "Yahoo::Search::XML is having trouble with the XML returned from Yahoo; try installing XML::Simple and setting \$Yahoo::Search::UseXmlSimple to true, and filing a bug report with jfriedl\@yahoo.com.\n";
                $@ = "Yahoo::Request: Error processing XML: $orig_error";
                return ();
            }

            $ResultHash = eval { XML::Simple::XMLin($xml) };

            if (not $ResultHash) {
                $@ = "Yahoo::Request: Error processing XML (even tried XML::Simple): $orig_error";
                return ();
            }
            ##
            ## XML::Simple could parse it, but Yahoo::Search::XML couldn't,
            ## so it must be a bug with the former... )_:
            ##
            $Yahoo::Search::UseXmlSimple = 1;
            warn "Yahoo::Search::XML is having trouble with the XML returned from Yahoo, so reverting to XML::Simple; suggest setting \$Yahoo::Search::UseXmlSimple to true and filing a bug report with jfriedl\@yahoo.com.\n";
        }
    }


    ##
    ## If there is only one result, $ResultHash->{Result} will be a hash
    ## ref rather than the ref to an array of hash refs that we would
    ## otherwise expect, so we'll fix that here.
    ##
    if (not exists $ResultHash->{Result}) {
        $ResultHash->{Result} = [ ];
    } elsif (ref($ResultHash->{Result}) ne "ARRAY") {
        $ResultHash->{Result} = [ $ResultHash->{Result} ];
    }

    ##
    ## The mention of "hash ref" in the previous comment doesn't apply
    ## to Spell and Related spaces -- let's fix that.
    ##
    if ($SimpleResultSpace{$Request->SearchSpace})
    {
        my @Results;
        for my $item (@{ $ResultHash->{Result}}) {
            push @Results,  { Term => $item };
        }
        $ResultHash->{Result} = \@Results;


        ##
        ## These are not part of what's returned, but it makes it easier
        ## for us if they're there, so fake'em.
        ##
        $ResultHash->{firstResultPosition} = @Results ? 1 : 0;
        $ResultHash->{totalResultsAvailable} = scalar @Results;

        ##
        ## Add this hint to the rest of the code to not allow
        ## further requests (e.g. via AutoContinue).
        ##
        $ResultHash->{_NoFurtherRequests} = 1;
    }

    ##
    ## Report if needed.
    ##
    if ($Request->{Debug} =~ m/hash/x) {
        require Data::Dumper;
        local($Data::Dumper::Terse) = 1;
        warn "Grokked Hash: ", Data::Dumper::Dumper($ResultHash), "\n";
    }

    $ResultHash->{_Request} = $Request;
    $ResultHash->{_XML}     = $xml;

    ##
    ## Create (and return) a new Response object from the request and the
    ## returned hash.
    ##
    return Yahoo::Search::Response->new($ResultHash);
}



###########################################################################

=item $Request->RelatedRequest

=item $Request->RelatedResponse

Perform a I<Related> request for search terms related to the query phrase
of the current request, returning the new C<Request> or C<Response> object,
respectively.

Both return nothing if the current request is already for a I<Related>
search.

=cut


sub RelatedRequest
{
    my $Request = shift;

    if ($Request->SearchSpace eq "Related") {
        return ();
    } else {
        return $Request->SearchEngine->Request(Related => $Request->{Params}->{query});
    }
}

sub RelatedResponse
{
    my $Request = shift;
    if (my $new = $Request->RelatedRequest) {
        return $new->Fetch();
    } else {
        return ();
    }
}


###########################################################################

=item $Request->SpellRequest

=item $Request->SpellResponse

Perform a I<Spell> request for a search term that may reflect proper
spelling of the query phrase of the current request, returning the new
C<Request> or C<Response> object, respectively.

Both return nothing if the current request is already for a I<Spell>
search.

=cut


sub SpellRequest
{
    my $Request = shift;

    if ($Request->SearchSpace eq "Spell") {
        return ();
    } else {
        return $Request->SearchEngine->Request(Spell => $Request->{Params}->{query});
    }
}

sub SpellResponse
{
    my $Request = shift;
    if (my $new = $Request->SpellRequest) {
        return $new->Fetch();
    } else {
        return ();
    }
}


=pod

=back

=head1 Copyright

Copyright 2007 Yahoo! Inc.

=head1 Author

Jeffrey Friedl (jfriedl@yahoo.com)

=cut

1;