The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package WWW::Google::PageRank;

# -*- perl -*-

use strict;
use warnings;

use vars qw($VERSION);

use LWP::UserAgent;
use URI::Escape;

$VERSION = '0.19';

sub new {
  my $class = shift;
  my %par = @_;
  my $self;
  $self->{ua} = LWP::UserAgent->new(agent => $par{agent} ||
				    'Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)')
    or return;
  $self->{ua}->env_proxy if $par{env_proxy};
  $self->{ua}->proxy('http', $par{proxy}) if $par{proxy};
  $self->{ua}->timeout($par{timeout}) if $par{timeout};
  $self->{host} = $par{host} || 'toolbarqueries.google.com';
  bless($self, $class);
}

sub get {
  my ($self, $url) = @_;
  return unless defined $url and $url =~ m[^https?://]i;

  my $ch = '6' . _compute_ch_new('info:' . $url);
  my $query = 'http://' . $self->{host} . '/tbr?client=navclient-auto&ch=' . $ch .
    '&ie=UTF-8&oe=UTF-8&features=Rank&q=info:' . uri_escape($url);

  my $resp = $self->{ua}->get($query);
  if ($resp->is_success && $resp->content =~ /Rank_\d+:\d+:(\d+)/) {
    if (wantarray) {
      return ($1, $resp);
    } else {
      return $1;
    }
  } else {
    if (wantarray) {
      return (undef, $resp);
    } else {
      return;
    }
  }
}

sub _compute_ch_new {
  my $url = shift;

  my $ch = _compute_ch($url);
  $ch = (($ch % 0x0d) & 7) | (($ch / 7) << 2);

  return _compute_ch(pack("V20", map {my $t = $ch; _wsub($t, $_*9); $t} 0..19));
}

sub _compute_ch {
  my $url = shift;

  my @url = unpack("C*", $url);
  my ($a, $b, $c, $k) = (0x9e3779b9, 0x9e3779b9, 0xe6359a60, 0);
  my $len = scalar @url;

  while ($len >= 12) {
    _wadd($a, $url[$k+0] | ($url[$k+1] << 8) | ($url[$k+2] << 16) | ($url[$k+3] << 24));
    _wadd($b, $url[$k+4] | ($url[$k+5] << 8) | ($url[$k+6] << 16) | ($url[$k+7] << 24));
    _wadd($c, $url[$k+8] | ($url[$k+9] << 8) | ($url[$k+10] << 16) | ($url[$k+11] << 24));

    _mix($a, $b, $c);

    $k += 12;
    $len -= 12;
  }

  _wadd($c, scalar @url);

  _wadd($c, $url[$k+10] << 24) if $len > 10;
  _wadd($c, $url[$k+9] << 16) if $len > 9;
  _wadd($c, $url[$k+8] << 8) if $len > 8;
  _wadd($b, $url[$k+7] << 24) if $len > 7;
  _wadd($b, $url[$k+6] << 16) if $len > 6;
  _wadd($b, $url[$k+5] << 8) if $len > 5;
  _wadd($b, $url[$k+4]) if $len > 4;
  _wadd($a, $url[$k+3] << 24) if $len > 3;
  _wadd($a, $url[$k+2] << 16) if $len > 2;
  _wadd($a, $url[$k+1] << 8) if $len > 1;
  _wadd($a, $url[$k]) if $len > 0;

  _mix($a, $b, $c);

  return $c; # integer is positive always
}

sub _mix {
  my ($a, $b, $c) = @_;

  _wsub($a, $b); _wsub($a, $c); $a ^= $c >> 13;
  _wsub($b, $c); _wsub($b, $a); $b ^= ($a << 8) % 4294967296;
  _wsub($c, $a); _wsub($c, $b); $c ^= $b >>13;
  _wsub($a, $b); _wsub($a, $c); $a ^= $c >> 12;
  _wsub($b, $c); _wsub($b, $a); $b ^= ($a << 16) % 4294967296;
  _wsub($c, $a); _wsub($c, $b); $c ^= $b >> 5;
  _wsub($a, $b); _wsub($a, $c); $a ^= $c >> 3;
  _wsub($b, $c); _wsub($b, $a); $b ^= ($a << 10) % 4294967296;
  _wsub($c, $a); _wsub($c, $b); $c ^= $b >> 15;

  @_[0 .. $#_] = ($a, $b, $c);
}

sub _wadd { $_[0] = int(($_[0] + $_[1]) % 4294967296);}
sub _wsub { $_[0] = int(($_[0] - $_[1]) % 4294967296);}

1;


__END__

=head1 NAME

WWW::Google::PageRank - Query google pagerank of page

=head1 SYNOPSIS

 use WWW::Google::PageRank;
 my $pr = WWW::Google::PageRank->new;
 print scalar($pr->get('http://www.yahoo.com/')), "\n";

=head1 DESCRIPTION

The C<WWW::Google::PageRank> is a class implementing a interface for
querying google pagerank.

To use it, you should create C<WWW::Google::PageRank> object and use its
method get(), to query page rank of URL.

It uses C<LWP::UserAgent> for making request to Google.

=head1 CONSTRUCTOR METHOD

=over 4

=item  $gpr = WWW::Google::PageRank->new(%options);

This method constructs a new C<WWW::Google::PageRank> object and returns it.
Key/value pair arguments may be provided to set up the initial state.
The following options correspond to attribute methods described below:

   KEY                     DEFAULT
   -----------             --------------------
   agent                   "Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)"
   proxy                   undef
   timeout                 undef
   env_proxy               undef
   host                    "toolbarqueries.google.com"

C<agent> specifies the header 'User-Agent' when querying Google.  If
the C<proxy> option is passed in, requests will be made through
specified poxy. C<proxy> is the host which serve requests from Googlebar.

If the C<env_proxy> option is passed in with a TRUE value, then proxy
settings are read from environment variables (see
C<LWP::UserAgent::env_proxy>)

=back

=head1 QUERY METHOD

=over 4

=item  $pr = $gpr->get('http://www.yahoo.com');

Queries Google for a specified pagerank URL and returns pagerank. If
query successfull, integer value from 0 to 10 returned. If query fails
for some reason (google unreachable, url does not begin from
'http://', undefined url passed) it return C<undef>.

In list context this function returns list from two elements where
first is the result as in scalar context and the second is the
C<HTTP::Response> object (returned by C<LWP::UserAgent::get>). This
can be usefull for debugging purposes and for querying failure
details.

=back

=head1 BUGS

If you find any, please report ;)

=head1 AUTHOR

Yuri Karaban F<E<lt>ykar@cpan.orgE<gt>>.

Algorithm of computing checksum taken from mozilla module
pagerankstatus F<http://pagerankstatus.mozdev.org> by
Stephane Queraud F<E<lt>squeraud@toteme.comE<gt>>.

Algorithm was modified (15-09-2004) according to new algorithm of
computingchecksum in googlebar.

=head1 COPYRIGHT

Copyright 2004-2011, Yuri Karaban, All Rights Reserved.

You may use, modify, and distribute this package under the
same terms as Perl itself.