The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
#############################################################
# AdvancedWeb.pm
# by Jim Smyser
# Copyright (c) 1999 by Jim Smyser & USC/ISI
# $Id: AdvancedWeb.pm,v 2.02 2000/04/04 23:53:27 jims Exp $
#############################################################


package WWW::Search::AltaVista::AdvancedWeb;

=head1 NAME

WWW::Search::AltaVista::AdvancedWeb - class for advanced Alta Vista web searching

=head1 SYNOPSIS

  use WWW::Search;
 my $search = new WWW::Search('AltaVista::AdvancedWeb');
 $search->native_query(WWW::Search::escape_query('(bmw AND mercedes) AND NOT (used OR Ferrari)'));
 $search->maximum_to_retrieve('100'); 
  while (my $result = $search->next_result())
    { 
    print $result->url, "\n"; 
    }

=head1 DESCRIPTION

Class hack for Advance AltaVista web search mode originally written by  
John Heidemann F<http://www.altavista.com>. 

This hack now allows for AltaVista AdvanceWeb search results
to be sorted and relevant results returned first. Initially, this 
class had skiped the 'r' option which is used by AltaVista to sort
search results for relevancy. Sending advance query using the 
'q' option resulted in random returned search results which made it 
impossible to view best scored results first.  

This class exports no public interface; all interaction should
be done through WWW::Search objects.

=head1 HELP

Use AND to join two terms that must both be present for a
document to count as a match.

Use OR to join two terms if either one counts.

Use AND NOT to join two terms if the first must be present and
the second must NOT.

Use NEAR to join two terms if they both must appear and be within
10 words of each other.

Try this example:

cars AND bmw AND mercedes 

You don't have to capitalize the "operators" AND, OR, AND NOT, or
NEAR. But many people do to make it clear what is a query term
and what is an instruction to the search engine.

One other wrinkle that's very handy: you can group steps together
with parentheses to tell the system what order you want it to
perform operations in.

(bmw AND mercedes) NEAR cars AND NOT (used OR Ferrari) 

Keep in mind that grouping should be used as much as possible
because if you attempt to enter a long query using AND to join
the words you may not receive any results because the entire
query would be like one long phrase. For best reuslts follow
the example herein.

=head1 AUTHOR

C<WWW::Search> hack by Jim Smyser, <jsmyser@bigfoot.com>.

=head1 COPYRIGHT

Copyright (c) 1996 University of Southern California.
All rights reserved.                                            
                                                               
Redistribution and use in source and binary forms are permitted
provided that the above copyright notice and this paragraph are
duplicated in all such forms and that any documentation, advertising
materials, and other materials related to such distribution and use
acknowledge that the software was developed by the University of
Southern California, Information Sciences Institute.  The name of the
University may not be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.

=head1 VERSION HISTORY

2.02 - Added HELP POD. Misc. Clean-up for latest changes.

2.01 - Additional query modifiers added for even better results.

2.0 - Minor change to set lowercase Boolean operators to uppercase.

1.9 - First hack version release.

=cut
#'

#####################################################################

require Exporter;
@EXPORT = qw();
@EXPORT_OK = qw();
@ISA = qw(WWW::Search::AltaVista Exporter);
$VERSION = '2.02';
use WWW::Search::AltaVista;
use WWW::Search(generic_option);


# public
sub version { $VERSION }

# private
sub native_setup_search
{
    my($self, $native_query, $native_options_ref) = @_;
    $self->user_agent('user');
    $self->{_next_to_retrieve} = 0;
    # Upper case all lower case Boolean operators. Be nice if
    # I could just uppercase the entire string, but this may
    # have undesirable search side effects. 
    if (!defined($self->{_options})) {
    $self->{_options} = {
        'pg' => 'aq',
        'text' => 'yes',
        'what' => 'web',
        'fmt' => 'd',
        'q' => $native_query,
         'search_url' => 'http://www.altavista.com/cgi-bin/query',
        };
    };
    my($options_ref) = $self->{_options};
    if (defined($native_options_ref)) {
    # Copy in new options.
    foreach (keys %$native_options_ref) {
        $options_ref->{$_} = $native_options_ref->{$_};
    };
    };
    # Process the options.
    my($options) = '';
    foreach (keys %$options_ref) {
    # printf STDERR "option: $_ is " . $options_ref->{$_} . "\n";
    next if (generic_option($_));
    $options .= $_ . '=' . $options_ref->{$_} . '&';
    };
    $self->{_debug} = $options_ref->{'search_debug'};
    $self->{_debug} = 2 if ($options_ref->{'search_parse_debug'});
    $self->{_debug} = 0 if (!defined($self->{_debug}));
    # Finally figure out the url.

    # Here I remove known Boolean operators from the 'r' query option 
    # which is used by AltaVista to sort the results. Finally, clean 
    # up by removing as many of the double ++'s as possibe left behind.
    $native_query =~ s/AND//ig;
    $native_query =~ s/OR//ig;
    $native_query =~ s/NOT//ig;
    $native_query =~ s/NEAR//ig;
    $native_query =~ s/"//g;
    $native_query =~ s/%28//g;
    $native_query =~ s/%29//g;
    $native_query =~ s/(\w)\053\053/$1\053/g;
    # strip down the query words
    $native_query =~ s/\W*(\w+\W+\w+\w+\W+\w+).*/$1/;
    $self->{_base_url} = 
    $self->{_next_url} =
    $self->{_options}{'search_url'} .
    "?" . $options .
    "r=" . $native_query;
    print $self->{_base_url} . "\n" if ($self->{_debug});
    # Return to AltaVista for parsing work....
    return $self->SUPER::native_retrieve_some(@_);
}
1;