The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
# -*- perl -*-

#
# Author: Slaven Rezic
#
# Copyright (C) 2009,2010,2012,2013 Slaven Rezic. All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# Mail: slaven@rezic.de
# WWW:  http://www.rezic.de/eserte/
#

use strict;
use warnings;

use CPAN::DistnameInfo ();

######################################################################

{
    package Parse::CPAN::Packages::Fast;

    our $VERSION = '0.07';
    $VERSION =~ s{_}{};

    use IO::Uncompress::Gunzip qw($GunzipError);
    use CPAN::Version ();

    # Note: this function is possibly interactive, i.e. if CPAN.pm was
    # never configured, or if CPAN's Config.pm needs reconfiguration.
    sub _default_packages_file_interactive {
	my($class) = @_;
	require CPAN;
	no warnings 'once';
	local $CPAN::Be_Silent = 1;
	CPAN::HandleConfig->load;
	my $packages_file = $CPAN::Config->{keep_source_where} . "/modules/02packages.details.txt.gz";
	$packages_file;
    }

    # Note: this function is guaranteed to be non-interactive, but it
    # is using just default locations to look at the CPAN config, or
    # the 02packages files.
    sub _default_packages_file_batch {
	my($class) = @_;

	my $home_cpandir = do {
	    no warnings 'uninitialized'; # HOME may be uninitialized on some systems e.g. Windows
	    "$ENV{HOME}/.cpan";
	};
	if (!$INC{"CPAN/MyConfig.pm"}) {
	    my $home_myconfig = "$home_cpandir/CPAN/MyConfig.pm";
	    if (-r $home_myconfig) {
		local @INC = ($home_cpandir);
		eval { require "CPAN::MyConfig" };
	    }
	}
	if ($INC{"CPAN/MyConfig.pm"} && $CPAN::Config->{keep_source_where}) {
	    my $packages_file = $CPAN::Config->{keep_source_where} . "/modules/02packages.details.txt.gz";
	    if (-r $packages_file && -s $packages_file) {
		return $packages_file;
	    }
	}

	# Cannot find a usable CPAN::MyConfig, try a default location
	my $packages_file = "$home_cpandir/sources/modules/02packages.details.txt.gz";
	if (-r $packages_file && -s $packages_file) {
	    return $packages_file;
	}

	undef;
    }

    *_default_packages_file = \&_default_packages_file_interactive;

    sub new {
	my($class, $packages_file) = @_;

	if (!$packages_file) {
	    $packages_file = $class->_default_packages_file;
	    if (!$packages_file) {
		die "packages file not specified and cannot be determined from CPAN.pm configuration";
	    }
	}

	my %pkg_to_dist;
	my %dist_to_pkgs;
	my %pkg_ver;

	my $FH = IO::Uncompress::Gunzip->new($packages_file)
	    or die "Can't open $packages_file: $GunzipError";
	# overread header
	while(<$FH>) {
	    last if /^$/;
	}
	# read payload
	while(<$FH>) {
	    my($pkg, $ver, $dist) = split;
	    $pkg_to_dist{$pkg} = $dist;
	    $pkg_ver{$pkg} = $ver;
	    push @{ $dist_to_pkgs{$dist} }, $pkg;
	}
	
	bless { pkg_to_dist  => \%pkg_to_dist,
		dist_to_pkgs => \%dist_to_pkgs,
		pkg_ver      => \%pkg_ver,
	      }, $class;
    }

    sub package {
	my($self, $package_name) = @_;
	return undef if !exists $self->{pkg_ver}{$package_name};
	Parse::CPAN::Packages::Fast::Package->new($package_name, $self);
    }

    sub packages {
	my $self = shift;
	keys %{ $self->{pkg_ver} };
    }

    sub package_count {
	my $self = shift;
	scalar keys %{ $self->{pkg_ver} };
    }

    sub distribution {
	my($self, $distribution_name) = @_;
	die "Distribution $distribution_name does not exist" if !exists $self->{dist_to_pkgs}{$distribution_name}; # XXX die or not?
	Parse::CPAN::Packages::Fast::Distribution->new($distribution_name, $self);
    }

    sub distributions {
	my $self = shift;
	map { Parse::CPAN::Packages::Fast::Distribution->new($_, $self) } keys %{ $self->{dist_to_pkgs} };
    }

    sub distribution_count {
	my $self = shift;
	my @dists = $self->distributions;
	scalar @dists;
    }

    sub latest_distribution {
	my($self, $distribution_name) = @_;
	my @candidates;
	for my $candidate (keys %{ $self->{dist_to_pkgs} }) {
	    if ($candidate =~ m{^./../.*/\Q$distribution_name}) {
		# Possibly pure CPAN::DistnameInfo is somewhat faster
		# than Parse::CPAN::Packages::Fast::Distribution (no
		# inside-out handling, no additional DESTROY)
		my $d = CPAN::DistnameInfo->new($candidate);
		no warnings 'uninitialized'; # Some distributions have no parseable dist name
		if ($d->dist eq $distribution_name) {
		    push @candidates, $d;
		}
	    }
	}
	return if !@candidates; # XXX die or not?
	my $best_candidate = pop @candidates;
	my $best_candidate_version = $best_candidate->version;
	for my $candidate (@candidates) {
	    my $this_version = $candidate->version;
	    if (CPAN::Version->vlt($best_candidate_version, $this_version)) {
		$best_candidate = $candidate;
		$best_candidate_version = $this_version;
	    }
	}
	Parse::CPAN::Packages::Fast::Distribution->new($best_candidate->pathname, $self);
    }

    sub latest_distributions {
	my $self = shift;
	my %latest_dist;
	for my $pathname (keys %{ $self->{dist_to_pkgs} }) {
	    my $d = Parse::CPAN::Packages::Fast::Distribution->new($pathname, $self);
	    my $dist = $d->dist;
	    next if !defined $dist;
	    if (!exists $latest_dist{$dist}) {
		$latest_dist{$dist} = $d;
	    } else {
		if (CPAN::Version->vlt($latest_dist{$dist}->version, $d->version)) {
		    $latest_dist{$dist} = $d;
		}
	    }
	}
	values %latest_dist;
    }

    sub latest_distribution_count {
	my $self = shift;
	my @dists = $self->latest_distributions;
	scalar @dists;
    }

    # Addition: fast module lookup without loading whole packages file
    # Not yet official!
    sub _module_lookup {
	die "Usage?" if @_ != 4;
	my($class, $module, $orig_packages_file, $cache_file) = @_;
	require Search::Dict;
	my $pwhfh = $class->_get_plain_packages_fh($orig_packages_file, $cache_file);
	my $skey = "$module ";
	return if Search::Dict::look($pwhfh, $skey, 0, 1) == -1;
	while () {
	    my $got = <$pwhfh>;
	    if (index($got, $skey) == 0) {
		chomp $got;
		my($pkg, $ver, $dist) = split /\s+/, $got;
		return {
			package => $pkg,
			version => $ver,
			dist    => $dist,
		       };
	    }
	    return if lc(substr($got, 0, length($skey))) gt lc($skey);
	    return if eof($pwhfh);
	}
    }
	
    sub _get_plain_packages_fh {
	die "Usage?" if @_ != 3;
	my(undef, $orig_packages_file, $cache_file) = @_;
	die "$orig_packages_file does not exist" if !-e $orig_packages_file;
	if (!-e $cache_file || -M $cache_file > -M $orig_packages_file) {
	    my $ifh = IO::Uncompress::Gunzip->new($orig_packages_file)
		or die "Can't open $orig_packages_file: $GunzipError";
	    require File::Temp;
	    require File::Basename;
	    my($tmpfh,$tmpfile) = File::Temp::tempfile(DIR => File::Basename::dirname($cache_file))
		or die "Can't create temporary file: $!";
	    while (<$ifh>) {
		last if /^$/;
	    }
	    {
		local $/ = \8192;
		while (<$ifh>) {
		    print $tmpfh $_;
		}
	    }
	    close $tmpfh
		or die "Error while writing temporary file $tmpfile: $!";
	    rename $tmpfile, $cache_file
		or die "While renaming $tmpfile to $cache_file: $!";
	}
	open my $fh, $cache_file
	    or die "Can't open $cache_file: $!";
	$fh;
    }
	
}

######################################################################

{

    package Parse::CPAN::Packages::Fast::Package;

    our $VERSION = $Parse::CPAN::Packages::Fast::VERSION;

    # Use inside-out technique for this member, to hide it in dumps etc.
    my %obj_to_packages;

    sub new {
	my($class, $package_name, $packages) = @_;
	my $self = bless { package  => $package_name,
			   version  => $packages->{pkg_ver}{$package_name},
			 }, 'Parse::CPAN::Packages::Fast::Package';
	$obj_to_packages{$self} = $packages;
	$self;
    }

    for my $method (qw(package version)) {
	no strict 'refs';
	*{$method} = sub { shift->{$method} };
    }

    sub distribution {
	my $self = shift;
	my $packages = $obj_to_packages{$self};
	my $dist = $packages->{pkg_to_dist}->{$self->package};
	Parse::CPAN::Packages::Fast::Distribution->new($dist, $packages);
    }

    sub prefix {
	my $self = shift;
	$self->distribution->prefix;
    }

    sub DESTROY {
	my $self = shift;
	delete $obj_to_packages{$self};
    }
}

######################################################################

{
    package Parse::CPAN::Packages::Fast::Distribution;

    our $VERSION = $Parse::CPAN::Packages::Fast::VERSION;

    use base qw(CPAN::DistnameInfo);

    # Use inside-out technique for this member, to hide it in dumps etc.
    my %obj_to_packages;

    sub new {
	my($class, $pathname, $packages) = @_;
	my $self = $class->SUPER::new($pathname);
	$obj_to_packages{$self} = $packages;
	$self;
    }
    
    sub prefix {
	my $self = shift;
	my $prefix = $self->pathname;
	$prefix =~ s{^authors/id/}{};
	$prefix;
    }

    sub contains {
	my $self = shift;
	my $packages = $obj_to_packages{$self};
	map { Parse::CPAN::Packages::Fast::Package->new($_, $packages) } @{ $packages->{dist_to_pkgs}{$self->pathname} };
    }

    sub DESTROY {
	my $self = shift;
	delete $obj_to_packages{$self};
    }

    # Methods found in original Parse::CPAN::Packages::Distribution
    sub add_package {
	die "NYI";
    }

    # Would be nice to have:
    sub is_latest_distribution {
	die "NYI";
    }
}

######################################################################

1;

__END__

=head1 NAME

Parse::CPAN::Packages::Fast - parse CPAN's package index

=head1 SYNOPSIS

    use Parse::CPAN::Packages::Fast;

    my $p = Parse::CPAN::Packages::Fast->new("/path/to/02packages.details.txt.gz");
    ## Or alternatively, if CPAN.pm is configured
    #my $p = Parse::CPAN::Packages::Fast->new;

    my $m = $p->package("Kwalify");
    # $m is a Parse::CPAN::Packages::Fast::Package object
    print $m->package, "\n";   # Kwalify
    print $m->version, "\n";   # 1.21

    my $d = $m->distribution;
    # $d is a Parse::CPAN::Packages::Fast::Distribution object
    print $d->dist,    "\n";   # Kwalify
    print $d->version, "\n";   # 1.21

=head1 DESCRIPTION

This is a largely API compatible rewrite of L<Parse::CPAN::Packages>.

Notable differences are

=over

=item * The method add_package of
Parse::CPAN::Packages::Fast::Distribution is not implemented

=item * Parse::CPAN::Packages::Fast::Distribution is really a
L<CPAN::DistnameInfo> (but this one is compatible with
Parse::CPAN::Packages::Distribution>

=item * A Parse::CPAN::Packages::Fast::Distribution object does not
have its packages included in the data structure, but it's necessary
to use the C<contains> method. Likewise, a
Parse::CPAN::Packages::Fast::Package object does not include the
containing distribution in the data structure, but it's necessary to
use the C<distribution> method.

=item * The C<new> constructor may be called without the path to the
C<02packages.details.txt> file. In this case L<CPAN.pm|CPAN>'s logic
is used to find an existing packages file. Note that this might be
interactive (i.e. if CPAN.pm was never configured, or needs
reconfiguration), so don't do this in batch systems.

=back

=head2 WHY?

Calling C<Parse::CPAN::Packages>' constructor is quite slow and takes
about 10 seconds on my machine. In contrast, the reimplementation just
takes a second.

I did some benchmarking of the original module and found no obvious
weak point to speed it up. Moose is used here, but does not seem to
cause the problem. I suspect that the real problem is just heavy use
of method calls.

Also, this module does not have problems with circular references like
the original. See L<Parse::CPAN::Packages/BUGS>.

=head1 SEE ALSO

L<Parse::CPAN::Packages>, L<CPAN::DistnameInfo>.

=cut