The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Copyrights 1998,2005-2016 by [Mark Overmeer].
#  For other contributors see ChangeLog.
# See the manual pages for details on the licensing terms.
# Pod stripped from pm file by OODoc 2.02.
use warnings;
use strict;

package CPAN::Site::Index;
use vars '$VERSION';
$VERSION = '1.13';

use base 'Exporter';

our @EXPORT_OK = qw/cpan_index cpan_mirror/;
our $VERSION;  # required in test-env

use Log::Report     'cpan-site', syntax => 'SHORT';

use version;
use File::Find      qw/find/;
use File::Copy      qw/copy/;
use File::Basename  qw/basename dirname/;
use HTTP::Date      qw/time2str/;
use File::Spec::Functions qw/catfile catdir splitdir/;
use LWP::UserAgent  ();
use Archive::Tar    ();
use Archive::Zip    qw(:ERROR_CODES :CONSTANTS);
use CPAN::Checksums ();
use IO::Zlib        ();

my $tar_gz      = qr/ \.tar\.gz$ | \.tar\.Z$ | \.tgz$/xi;
my $zip         = qr/ \.zip$ /xi;
my $cpan_update = 0.04; # days between reload of full CPAN index
my $ua;

sub safe_copy($$);
sub cpan_index($@);
sub register($$$);
sub package_inventory($$;$);
sub package_on_usual_location($);
sub inspect_archive;
sub inspect_tar_archive($$);
sub inspect_zip_archive($$);
sub collect_package_details($$$);
sub update_global_cpan($$);
sub load_file($$);
sub merge_global_cpan($$$);
sub create_details($$$$$);
sub calculate_checksums($);
sub read_details($);
sub remove_expired_details($$$);
sub mkdirhier(@);
sub cpan_mirror($$$@);

sub safe_copy($$)
{   my ($from, $to) = @_;
    trace "copy $from to $to";
    copy $from, $to
        or fault __x"cannot copy {from} to {to}", from => $from, to => $to;
}

sub cpan_index($@)
{   my ($mycpan, $globalcpan, %opts) = @_;
    my $lazy     = $opts{lazy};
    my $fallback = $opts{fallback};
    my $undefs   = exists $opts{undefs} ? $opts{undefs} : 1;

    unless($ua)
    {   $ua = LWP::UserAgent->new;
        $ua->env_proxy if $opts{env_proxy};
    }

    -d $mycpan
        or error __x"archive top '{dir}' is not a directory"
             , dir => $mycpan;

    my $program     = basename $0;
    $VERSION      ||= 'undef';   # test env at home
    trace "$program version $VERSION";

    my $global      = catdir $mycpan, 'global';
    my $mods        = catdir $mycpan, 'modules';
    my $authors     = catdir $mycpan, 'authors';
    mkdirhier $global, $mods, $authors;

    my $globdetails = update_global_cpan $mycpan, $globalcpan;

    # Create mailrc and modlist

    safe_copy catfile($global, '01mailrc.txt.gz')
            , catfile($authors, '01mailrc.txt.gz');

    safe_copy catfile($global, '03modlist.data.gz')
            , catfile($mods, '03modlist.data.gz');

    # Create packages details

    my $details     = catfile $mods, '02packages.details.txt.gz';
    my $newlist     = catfile $mods, '02packages.details.tmp.gz';
    my $newer;

    my $reuse_dists = {};
    if($lazy && -f $details)
    {   $reuse_dists = read_details $details;
        $newer       = -M $details;
        remove_expired_details $mycpan, $reuse_dists, $newer;
    }

    my ($mypkgs, $distdirs)
      = package_inventory $mycpan, $reuse_dists, $newer;

    merge_global_cpan $mycpan, $mypkgs, $globdetails
        if $fallback;

    create_details $details, $newlist, $mypkgs, $lazy, $undefs;

    if(-f $details)
    {   trace "backup old details file to $details.bak";
        safe_copy $details, "$details.bak";
    }

    if(-f $newlist)
    {   trace "promoting $newlist to current";
        rename $newlist, $details
            or error __x"cannot rename '{from}' in '{to}'"
                 , from => $newlist, to => $details;
    }

    calculate_checksums $distdirs;
}

#
# Package Inventory
#

# global variables for testing purposes (sorry)
our ($topdir, $findpkgs, %finddirs, $olddists, $index_age);

sub register($$$)
{   my ($package, $this_version, $dist) = @_;

    # warn "register $package, " . (defined $this_version ? $this_version : 'undef');

    if(ref $this_version)
    {   eval { $this_version = version->parse($this_version) };
        if($@)
        {   alert __x"error when creating version object for {pkg}: {err}"
               , pkg => $package, err => $@;
            return;
        }
    }
      
    my $registered_version = exists $findpkgs->{$package} ? $findpkgs->{$package}[0] : undef;
    $this_version =~ s/^v// if $this_version;

    return if defined $registered_version
           && $registered_version > $this_version;

    $findpkgs->{$package} = [ $this_version, $dist ];
}

sub package_inventory($$;$)
{   (my $mycpan, $olddists, $index_age) = @_;   #!!! see "my"
    $topdir   = catdir $mycpan, 'authors', 'id';
    mkdirhier $topdir;

    $findpkgs = {};
    trace "creating inventory from $topdir";

    find {wanted => \&inspect_archive, no_chdir => 1}, $topdir;
    ($findpkgs, \%finddirs);
}

sub package_on_usual_location($)
{   my $file  = shift;
    my ($top, $subdir, @rest) = splitdir $file;
    defined $subdir or return 0;

       !@rest             # path is at top-level of distro
    || $subdir eq 'lib';  # inside lib
}

sub inspect_archive
{   my $fn = $File::Find::name;
    return unless -f $fn
               && ($fn =~ $tar_gz || $fn =~ $zip);

    (my $dist = $fn) =~ s!^\Q$topdir\E[\\/]!!;

    if(defined $index_age && -M $fn > $index_age)
    {
        unless(exists $olddists->{$dist})
        {   trace "not the latest: $dist";
            return;
        }

        trace "latest older than index: $dist";
        foreach (@{$olddists->{$dist}})
        {   my ($pkg, $version) = @$_;
            register $pkg, $version, $dist;
        }
        return;
    }

    trace "inspecting archive $fn";
    $finddirs{$File::Find::dir}++;

    return inspect_tar_archive $dist, $fn
        if $fn =~ $tar_gz;

    return inspect_zip_archive $dist, $fn
        if $fn =~ $zip;
}

sub inspect_tar_archive($$)
{   my ($dist, $fn) = @_;

    my $arch =  Archive::Tar->new;
    $arch->read($fn, 1)
        or error __x"no files in tar archive '{fn}': {err}"
             , fn => $fn, err => $arch->error;

    foreach my $file ($arch->get_files)
    {   my $fn = $file->full_path;
        $file->is_file && $fn =~ m/\.pm$/i && package_on_usual_location $fn
            or next;
        collect_package_details $fn, $dist, $file->get_content_by_ref;
    }
}

sub inspect_zip_archive($$)
{   my ($dist, $fn) = @_;

    my $arch =  Archive::Zip->new;
    $arch->read($fn)==AZ_OK
        or error __x"no files in zip archive '{fn}': {err}"
             , fn => $fn, err => $arch->error;

    foreach my $member ($arch->membersMatching( qr/\.pm$/i ))
    {   my $fn = $member->fileName;
        $member->isTextFile && package_on_usual_location $fn
            or next;
        my ($contents, $status) = $member->contents;
        $status==AZ_OK
            or error "error in zip file {fn}: {err}"
               , fn => $fn, err => $status;
        collect_package_details $fn, $dist, \$contents;
    }
}

sub collect_package_details($$$)
{   my ($fn, $dist) = (shift, shift);
    my @lines  = split /\r?\n/, ${shift()};
    my $in_pod = 0;
    my $package;
    local $VERSION = undef;  # may get destroyed by eval

    while(@lines)
    {   local $_ = shift @lines;
        last if m/^__(?:END|DATA)__$/;

        $in_pod = ($1 ne 'cut') if m/^=(\w+)/;
        next if $in_pod || m/^\s*#/;

        $_ .= shift @lines
            while @lines && m/package|use|VERSION/ && !m/\;/;

        if( m/^\s* package \s* ((?:\w+\:\:)*\w+) (?:\s+ (\S*))? \s* ;/x )
        {   my ($thispkg, $v) = ($1, $2);
            my $thisversion;
            if($v)
            {   $thisversion = eval {qv($v)};
                alert __x"illegal version for {pkg}, found '{version}': {err}"
                   , pkg => $thispkg, version => $v, err => $@  if $@;
            }

            # second package in file?
            register $package, $VERSION, $dist
                if defined $package;

            ($package, $VERSION) = ($thispkg, $thisversion);
            trace "pkg $package from $fn";
        }

        if( m/^ (?:use\s+version\s*;\s*)?
            (?:our)? \s* \$ ((?: \w+\:\:)*) VERSION \s* \= (.*)/x )
        {   defined $2 or next;
            my ($ns, $vers) = ($1, $2);

            # some versions of CPAN.pm do contain lines like "$VERSION =~ ..."
            # which also need to be processed.
            eval "\$VERSION =$vers";
            if(defined $VERSION)
            {   ($package = $ns) =~ s/\:\:$//
                    if length $ns;
                trace "pkg $package version $VERSION";
            }
        }
    }

    $VERSION = $VERSION->numify if ref $VERSION;
    register $package, $VERSION, $dist
        if defined $package;
}

sub update_global_cpan($$)
{   my ($mycpan, $globalcpan) = @_;

    my $global = catdir $mycpan, 'global';
    my ($mailrc, $globdetails, $modlist) = 
       map { catfile $global, $_ }
         qw/01mailrc.txt.gz 02packages.details.txt.gz 03modlist.data.gz/;

    return $globdetails
       if -f $globdetails && -f $globdetails && -f $modlist
       && -M $globdetails < $cpan_update;

    info "(re)loading global CPAN files";

    mkdirhier $global;
    load_file "$globalcpan/authors/01mailrc.txt.gz",   $mailrc;
    load_file "$globalcpan/modules/02packages.details.txt.gz", $globdetails;
    load_file "$globalcpan/modules/03modlist.data.gz", $modlist;
    $globdetails;
}

sub load_file($$)
{   my ($from, $to) = @_;
    my $response = $ua->get($from, ':content_file' => $to);
    return if $response->is_success;

    unlink $to;
    error __x"failed to get {uri} for {to}: {err}"
      , uri => $from, to => $to, err => $response->status_line;
}

sub merge_global_cpan($$$)
{   my ($mycpan, $pkgs, $globdetails) = @_;

    trace "merge packages with CPAN core list in $globdetails";
    my $cpan_pkgs = read_details $globdetails;

    while(my ($cpandist, $cpanpkgs) = each %$cpan_pkgs)
    {   foreach (@$cpanpkgs)
        {  my ($pkg, $version) = @$_;
           next if exists $pkgs->{$pkg};
           $pkgs->{$pkg} = [$version, $cpandist];
        }
    }
}

sub create_details($$$$$)
{  my ($details, $filename, $pkgs, $lazy, $undefs) = @_;

   trace "creating package details in $filename";
   my $fh = IO::Zlib->new($filename, 'wb')
      or fault __x"generating gzipped {fn}", fn => $filename;

   my $lines = keys %$pkgs;
   my $date  = time2str time;
   my $how   = $lazy ? "lazy" : "full";

   info "produced list of $lines packages $how";

   my $program     = basename $0;
   my $module      = __PACKAGE__;
   $fh->print (<<__HEADER);
File:         02packages.details.txt
URL:          file://$details
Description:  Packages listed in CPAN and local repository
Columns:      package name, version, path
Intended-For: private CPAN
Line-Count:   $lines
Written-By:   $program with $module $CPAN::Site::Index::VERSION ($how)
Last-Updated: $date

__HEADER

   foreach my $pkg (sort keys %$pkgs)
   {  my ($version, $path) = @{$pkgs->{$pkg}};

      $version = 'undef'
          if !defined $version || $version eq '';

      next
          if $version eq 'undef' && !$undefs;

      $path    =~ s,\\,/,g;
      $fh->printf("%-30s\t%s\t%s\n", $pkg, $version, $path);
   }
}

sub calculate_checksums($)
{   my $dirs = shift;
    trace "updating checksums";

    foreach my $dir (keys %$dirs)
    {   trace "summing $dir";
        CPAN::Checksums::updatedir($dir)
            or warning 'failed calculating checksums in {dir}', dir => $dir;
    }
}

sub read_details($)
{   my $fn = shift;
    -f $fn or return {};
    trace "collecting all details from $fn";

    my $fh    = IO::Zlib->new($fn, 'rb')
       or fault __x"cannot read from {fn}", fn => $fn;

    my $line;   # skip header, search first blank
    do { $line = $fh->getline } until $line =~ m/^\s*$/;

    my $time_last_update = (stat $fn)[9];
    my %dists;

    while(my $line = $fh->getline)
    {   chomp $line;
        my ($pkg, $version, $dist) = split ' ', $line, 3;

        unless($dist)
        {   warning "$fn error line=\n  $line";
            next;
        }

        push @{$dists{$dist}}, [$pkg, $version];
    }

    \%dists;
}

sub remove_expired_details($$$)
{   my ($mycpan, $dists, $newer) = @_;
    trace "extracting only existing local distributions";

    my $authors = catdir $mycpan, 'authors', 'id';
    foreach my $dist (keys %$dists)
    {   my $fn = catfile $authors, $dist;
        if(! -f $fn)
        {   # removed local or a global dist
            delete $dists->{$dist};
        }
        elsif(-M $fn < $newer)
        {   trace "dist $dist file updated, reindexing";
            delete $dists->{$dist};
        }
    }
}

sub mkdirhier(@)
{   foreach my $dir (@_)
    {   next if -d $dir;
        mkdirhier dirname $dir;

        mkdir $dir, 0755
            or fault __x"cannot create directory {dir}", dir => $dir;

        trace "created $dir";
    }
    1;
}

sub cpan_mirror($$$@)
{   my ($mycpan, $globalcpan, $mods, %opts) = @_;
    @$mods or return;
    my %need = map { ($_ => 1) } @$mods;
    my $auth = catdir $mycpan, 'authors', 'id';

    unless($ua)
    {   $ua = LWP::UserAgent->new;
        $ua->env_proxy if $opts{env_proxy};
    }

    my $globdetails
             = update_global_cpan $mycpan, $globalcpan;

    my $fh   = IO::Zlib->new($globdetails, 'rb')
        or fault __x"cannot read from {fn}", fn => $globdetails;

    while(my $line = $fh->getline)   # skip header, search first blank
    {   last if $line =~ m/^\s*$/;
    }

    $ua ||= LWP::UserAgent->new;
    while(my $line = $fh->getline)
    {   my ($pkg, $version, $dist) = split ' ', $line;
        delete $need{$pkg} or next;

        my $to = catfile $auth, split m#/#, $dist;
        if(-f $to)
        {   trace __x"package {pkg} present in distribution {dist}"
              , pkg => $pkg, dist => $dist;
            next;
        }

        my $source   = "$globalcpan/authors/id/$dist";
        mkdirhier dirname $to;
        my $response = $ua->get($source, ':content_file' => $to);
        unless($response->is_success)
        {   unlink $to;
            error __x"failed to get {uri} for {to}: {err}"
              , uri => $source, to => $to, err => $response->status_line;
        }

        info __x"got {pkg} in {dist}", pkg => $pkg, dist => $dist;
    }

    warning __x"package {pkg} does not exist", pkg => $_
        for sort keys %need;
}

1;