The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Lycos.pm
# by Wm. L. Scheding and Martin Thurn
# Copyright (C) 1996-1998 by USC/ISI
# $Id: Lycos.pm,v 2.224 2013/03/17 13:18:25 martin Exp $

=head1 NAME

WWW::Search::Lycos - class for searching www.lycos.com

=head1 SYNOPSIS

  use WWW::Search;
  my $oSearch = new WWW::Search('Lycos');
  my $sQuery = WWW::Search::escape_query("+sushi restaurant +Columbus Ohio");
  $oSearch->native_query($sQuery);
  while (my $oResult = $oSearch->next_result())
    { print $oResult->url, "\n"; }

=head1 DESCRIPTION

This class is a Lycos specialization of L<WWW::Search>.  It handles
making and interpreting Lycos-site searches F<http://www.lycos.com>.

This class exports no public interface; all interaction should
be done through L<WWW::Search> objects.

=head1 NOTES

Warning!  As of 2004-01, lycos.com often returns an error page in
place of the third page of results.  So it is very difficult to get
more than 20 hits for any query!

www.lycos.com is sometimes slow to respond; but I have not had a
problem with the default timeout.

www.lycos.com does not give the date nor size of the pages at the
resulting URLs; therefore change_date() and size() will never have a
value.

=head1 SEE ALSO

To make new back-ends, see L<WWW::Search>.

=head1 BUGS

Please tell the author if you find any!

=head1 AUTHOR

As of 1998-12-07, C<WWW::Search::Lycos> is maintained by Martin Thurn
(mthurn@cpan.org).

C<WWW::Search::Lycos> was originally written by Martin Thurn,
based on C<WWW::Search::Yahoo> version 1.12 of 1998-10-22.

=head1 LEGALESE

THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.

=cut

#####################################################################

package WWW::Search::Lycos;

use strict;
use warnings;

my
$VERSION = do { my @r = (q$Revision: 2.224 $ =~ /\d+/g); sprintf "%d."."%03d" x $#r, @r };
our $MAINTAINER = 'Martin Thurn <mthurn@cpan.org>';

use Carp;
use URI::Escape;
use base 'WWW::Search';
use WWW::Search::Result;

sub native_setup_search
  {
  my ($self, $native_query, $native_options_ref) = @_;
  $self->{_debug} = $native_options_ref->{'search_debug'};
  $self->{_debug} = 2 if ($native_options_ref->{'search_parse_debug'});
  $self->{_debug} = 0 if (!defined($self->{_debug}));

  # lycos.com returns 10 hits per page no matter what.
  $self->{'_hits_per_page'} = 10;

  # $self->{agent_e_mail} = 'mthurn@cpan.org';
  $self->user_agent('non-robot');

  $self->{_next_to_retrieve} = 1;
  $self->{'_num_hits'} = 0;

  if (! defined($self->{_options}))
    {
    # As of 2013: http://search.lycos.com/web?q=yoda
    $self->{'search_base_url'} = 'http://search.lycos.com';
    $self->{_options} = {
                         'search_url' => $self->{'search_base_url'} .'/web',
                         'q' => $native_query,
                        };
    } # if

  my $options_ref = $self->{_options};

  # Copy in options which were passed in our second argument:
  if (defined($native_options_ref))
    {
    foreach (keys %$native_options_ref)
      {
      $options_ref->{$_} = $native_options_ref->{$_};
      } # foreach
    } # if

  # Copy in options which were set by a child object:
  if (defined($self->{'_child_options'}))
    {
    foreach (keys %{$self->{'_child_options'}})
      {
      $self->{'_options'}->{$_} = $self->{'_child_options'}->{$_};
      } # foreach
    } # if

  # Finally figure out the url.
  $self->{_next_url} = $self->{_options}{'search_url'} .'?'. $self->hash_to_cgi_string($self->{_options});
  } # native_setup_search


sub preprocess_results_page_OFF
  {
  my $self = shift;
  my $sPage = shift;
  print STDERR '='x 10, $sPage, '='x 10, "\n";
  return $sPage;
  } # preprocess_results_page


sub _parse_tree
  {
  my $self = shift;
  my $oTree = shift;
  my $hits_found = 0;
  if (! $self->approximate_result_count)
    {
    my $oTITLE = $oTree->look_down(
                                   _tag => 'h2',
                                   # class => 'ltGry',
                                  );
    if (ref $oTITLE)
      {
      my $sRC = $oTITLE->as_text;
      print STDERR " +   RC == $sRC\n" if 2 <= $self->{_debug};
      if ($sRC =~ m!\s*\d+\s+thru\s+\d+\s+of\s+([0-9,]+)\b!i)
        {
        my $sCount = $1;
        print STDERR " +     raw    count == $sCount\n" if 3 <= $self->{_debug};
        $sCount =~ s!,!!g;
        print STDERR " +     cooked count == $sCount\n" if 3 <= $self->{_debug};
        $self->approximate_result_count($sCount);
        } # if
      } # if
    } # if
  my ($sURL, $sTitle, $sDesc);
  my $sScore = '';
  my $sSize = '';
  my $sDate = '';
  my @aoIS = $oTree->look_down(_tag => 'div',
                               class => 'resultText',
                              );
 IS_TAG:
  foreach my $oIS (@aoIS)
    {
    next IS_TAG if ! ref $oIS;
    warn " DDD   oIS is ===". $oIS->as_HTML ."===\n" if 2 <= $self->{_debug};
    my $oTitle = $oIS->look_down(_tag => 'h4');
    if (! ref $oTitle)
      {
      warn " EEE   no child <h4> element\n" if 2 <= $self->{_debug};
      next IS_TAG;
      } # if
    my $sTitle = $oTitle->as_text;
    my $oURL = $oIS->look_down(_tag => 'p',
                               class => 'baseURL',
                              );
    if (! ref $oURL)
      {
      warn " EEE   no child <p baseURL> element\n" if 2 <= $self->{_debug};
      next IS_TAG;
      } # if
    $oURL->detach;
    $oURL->delete;
    my $oDesc = $oIS->look_down(_tag => 'p');
    if (! ref $oDesc)
      {
      warn " EEE   no child <p> element\n" if 2 <= $self->{_debug};
      next IS_TAG;
      } # if
    my $sDesc = $oDesc->as_text;
    print STDERR " +   found desc ===$sDesc===\n" if 2 <= $self->{_debug};
    my $oParent = $oIS->parent;
    warn " DDD   oParent is ===". $oParent->as_HTML ."===\n" if 2 <= $self->{_debug};
    my $sURL = $oParent->attr('title');

    my $hit = new WWW::Search::Result;
    $hit->add_url($sURL);
    $hit->title($sTitle);
    $hit->description(_strip($sDesc));
    push(@{$self->{cache}}, $hit);
    $self->{'_num_hits'}++;
    $hits_found++;
    } # foreach $oB
  # Find the next link, if any:
  my @aoA = $oTree->look_down(_tag => 'a',
                              title => 'Next',
                              # sub { $_[0]->as_text eq 'Next >' },
                             );
 A_TAG:
  # We want the last "next" link on the page:
  my $oA = $aoA[-1];
  if (ref $oA)
    {
    print STDERR " +   oAnext is ===", $oA->as_HTML, "===\n" if 2 <= $self->{_debug};
    $self->{_next_url} = $self->absurl($self->{'_prev_url'}, $oA->attr('href'));
    } # if
 SKIP_NEXT_LINK:

  return $hits_found;
  } # _parse_tree


sub _strip
  {
  my $sRaw = shift;
  my $s = &WWW::Search::strip_tags($sRaw);
  # Strip leading whitespace:
  $s =~ s!\A[\240\t\r\n\ ]+  !!x;
  # Strip trailing whitespace:
  $s =~ s!  [\240\t\r\n\ ]+\Z!!x;
  return $s;
  } # _strip

1;

__END__