The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package CPAN::Search::Lite::PPM;
use strict;
use LWP::UserAgent;
use SOAP::Lite;
use LWP::Simple;
use HTTP::Date;
use XML::SAX;
use CPAN::Search::Lite::Util qw($repositories has_data);
use CPAN::Search::Lite::DBI::Index;
use CPAN::Search::Lite::DBI qw($dbh);
our $VERSION = 0.77;

our $dbh = $CPAN::Search::Lite::DBI::dbh;
our %wanted = map {$_ => 1} qw(SOFTPKG ABSTRACT ARCHITECTURE);
our $arch = '';
my %arch = ('5.6' => 'MSWin32-x86-multi-thread',
	    '5.8' => 'MSWin32-x86-multi-thread-5.8',
	   );

my %months = ('Jan' => '01',
	      'Feb' => '02',
	      'Mar' => '03',
	      'Apr' => '04',
	      'May' => '05',
	      'Jun' => '06',
	      'Jul' => '07',
	      'Aug' => '08',
	      'Sep' => '09',
	      'Oct' => '10',
	      'Nov' => '11',
	      'Dec' => '12',
	     );
my @tries = qw(searchsummary.ppm package.lst);

sub new {
    my ($class, %args) = @_;
    foreach (qw(db user passwd dists) ) {
      die "Must supply a '$_' argument" unless defined $args{$_};
    }
    my $cdbi = CPAN::Search::Lite::DBI::Index->new(%args);
    my $self = {dists => $args{dists}, ppms => {}, setup => $args{setup},
		curr_mtimes => {}, update_mtimes => {}};
    bless $self, $class;
}

sub fetch_info {
  my $self = shift;
  unless ($self->{setup}) {
    $self->fetch_mtime() or return;
  }
  my $dists = $self->{dists};
  my $ppm = {};
  for my $id (keys %$repositories) {
    my $location = $repositories->{$id}->{LOCATION};
    print "Getting ppm information from $location\n";
    my $packages = $self->summary($id, $location);
    next unless $packages;
    if (ref($packages) eq 'HASH') {
      foreach my $package (keys %$packages) {
	next unless $dists->{$package};
	my $version = ppd2cpan_version($packages->{$package}->{version});
	my $abstract = $packages->{$package}->{abstract};
	$dists->{$package}->{description} = $abstract
	  unless $dists->{$package}->{description};
	$ppm->{$id}->{$package} = {
				   version => $version,
				   abstract => $abstract,
				  };
      }
    }
    else {
      $ppm->{$id} = 1;
    }
  }
  $self->{ppms} = $ppm;
  $self->update_mtime() if (has_data($self->{update_mtimes}));
  return 1;
}

sub fetch_mtime {
  my $self = shift;
  my $mtimes = {};
  unless ($dbh) {
    $self->{error_msg} = q{No db handle available};
    return;
  }
  my $sql = q{ SELECT rep_id,mtime FROM reps };
  my $sth = $dbh->prepare($sql);
  $sth->execute() or do {
    $self->db_error($sth);
    return;
  };
  while (my ($rep_id, $mtime) = $sth->fetchrow_array) {
    next unless $rep_id;
    $mtimes->{$rep_id} = $mtime;
  }
  $sth->finish;
  $self->{curr_mtimes} = $mtimes;
  return 1;
}

sub update_mtime {
  my $self = shift;
  my $mtimes = $self->{update_mtimes};
  unless ($dbh) {
    $self->{error_msg} = q{No db handle available};
    return;
  }
  my $sth;
  foreach my $id(keys %$mtimes) {
    my $mtime = $mtimes->{$id};
    next unless (defined $id and defined $mtime);
    my $sql = q{ UPDATE LOW_PRIORITY reps } .
      qq{ SET mtime="$mtime" WHERE rep_id=$id};
    $sth = $dbh->prepare($sql);
    $sth->execute() or do {
      $self->db_error($sth);
      return;
    };
    $sth->finish;
  }
  $dbh->commit or do {
    $self->db_error($sth);
    return;
  };
  return 1;
}

sub summary {
  my ($self, $id, $url) = @_;
  $url .= '/' unless $url =~ m@/$@;
  my $file;
  my ($type, $length, $mtime, $expires, $server);
  foreach my $try (@tries) {
    ($type, $length, $mtime, $expires, $server) = head("$url$try");
    if (defined $mtime) {
      $file = $try;
      last;
    }
  }
  unless (defined $mtime) {
    print "Could not get ppm info from $url\n";
    return;
  }

  my $mtimes = $self->{curr_mtimes};
  my $string = time2str($mtime);
  my ($wday, $day, $month, $year, $time, $tz) = split ' ', $string;
  my $stamp = "$year-$months{$month}-$day $time";
  if (defined $mtimes->{$id} and $mtimes->{$id} eq $stamp) {
    print "$url is up to date\n";
    return 1;
  }

  $arch = $arch{$repositories->{$id}->{PerlV}};
  my $packages = parse($url, $file);
  unlink $file;
  unless (has_data($packages)) {
    print "Info from $url contains no data\n";
    return;
  }
  $self->{update_mtimes}->{$id} = $stamp;
  return $packages;
}

sub parse {
  my ($url, $file) = @_;
  $url .= '/' unless ($url =~ m@/$@);
  my $remote = $url . $file;
  unless (is_success(getstore($remote, $file) )) {
    print "Cannot obtain $file from $url";
    return;
  }

  XML::SAX->add_parser(q(XML::SAX::ExpatXS));
  my $factory = XML::SAX::ParserFactory->new();
  my $handler = PPMHandler->new();
  my $parser = $factory->parser( Handler => $handler);

  eval { $parser->parse_uri($file); };
  if ($@) {
    print "Error in parsing $file: $@\n";
    return;
  }
  my $pkgs = $handler->{pkgs};
  return $pkgs;
}

sub ppd2cpan_version {
  local $_ = shift;
  s/(,0)*$//;
  tr/,/./;
  return $_;
}

sub db_error {
  my ($obj, $sth) = @_;
  return unless $dbh;
  $sth->finish if $sth;
  $obj->{error_msg} = q{Database error: } . $dbh->errstr;
}

# begin the in-line package
package PPMHandler;
use strict;
use warnings;

my $curr_el = '';
sub new {
    my $type = shift;
    return bless {text => '', pkgs => {}, ppd => {}}, $type;
}

sub start_document {
  my ($self) = @_;
  # print "Starting document\n";
  $self->{text} = '';
}

sub start_element {
  my ($self, $element) = @_;
  $curr_el = $element->{Name};
  return unless $wanted{$curr_el};
  #print "Starting $element->{Name}\n";
  my $ppd = $self->{ppd};
  $ppd->{keep} = 0 if $curr_el eq 'SOFTPKG';
  $self->display_text();
  foreach my $ak (keys %{ $element->{Attributes} } ) {
    my $at = $element->{Attributes}->{$ak};
    my $name = $at->{Name};
    my $value = $at->{Value};
    $ppd->{keep} = 1 if ($curr_el eq 'ARCHITECTURE' and $value eq $arch);
    $ppd->{$curr_el}->{$name} = $value if $curr_el eq 'SOFTPKG';
    #print qq(Attribute $at->{Name} = "$at->{Value}"\n);
  }
}

sub characters {
  my ($self, $characters) = @_;
  my $text = $characters->{Data};
  $text =~ s/^\s*//;
  $text =~ s/\s*$//;
  $self->{text} .= $text;
}

sub end_element {
  my ($self, $element) = @_;
  $curr_el = $element->{Name};
  return unless $wanted{$curr_el};
  $self->display_text();
  if ($curr_el eq 'SOFTPKG') {
    my $ppd = $self->{ppd};
    if ($ppd->{keep}) {
      $self->{pkgs}->{$ppd->{SOFTPKG}->{NAME}} = 
	{version => $ppd->{SOFTPKG}->{VERSION},
	 abstract => $ppd->{ABSTRACT}->{value}
	};
    }
  }
  # print "Ending $element->{Name}\n";
}

sub display_text {
  my $self = shift;
  my $ppd = $self->{ppd};
  if ( defined( $self->{text} ) && $self->{text} ne "" ) {
    $ppd->{$curr_el}->{value} = $self->{text};
    #print " text: [$self->{text}]\n";
    $self->{text} = '';
  }
}

sub end_document {
  my ($self) = @_;
  # print "Document finished\n";
}

1; #Ye Olde 'Return True' for the in-line package..

__END__

=head1 NAME

CPAN::Search::Lite::PPM - extract ppm package information from repositories

=head1 DESCRIPTION

This module gets information on available ppm packages on remote 
repositories. The repositories searched are specified in
C<$respositories> of I<CPAN::Search::Lite::Util>. Only those
distributions whose names appear from I<CPAN::Search::Lite::Info>
are saved. After creating a I<CPAN::Search::Lite::PPM> object through
the C<new> method and calling the C<fetch_info> method, the 
information is available as:

   my $ppms = $ppm_obj->{ppms};
   for my $rep_id (keys %{$ppms}) {
     print "For repository with id = $rep_id:\n";
     for my $package (keys %{$ppms->{$id}}) {
       print << "END";
 
 Package: $package
 Version: $ppms->{$rep_id}->{$package}->{version}
 Abstract: $ppms->{$rep_id}->{$package}->{abstract}

 END
     }
   }

=head1 SEE ALSO

L<CPAN::Search::Lite::Index>

=cut

=cut