#!/usr/bin/env perl
use strict;
use warnings;
use File::Temp;
use Path::Class;
use List::Util qw(sum);
use Getopt::Long::Descriptive;
#-----------------------------------------------------------------------------
# Copyright 2013 Jeffrey Ryan Thalhammer <jeff@stratopan.com>
#-----------------------------------------------------------------------------
# To diagnose leaks:
# use Pinto::Util;
# *Pinto::Util::throw = sub { warn ">>> Called my throw"; die "@_" };
# use Devel::Leak::Object qw{ GLOBAL_bless };
#-----------------------------------------------------------------------------
my ($opt, $usage) = describe_options(
"$0 %o TARGETS",
[ 'root|r=s', "Root of repository", ],
[ 'cpan|c=s', "Path to a local cpan", ],
[ 'batch-size|s=i', "Distributions per batch", { default => 100 } ],
[ 'batch-offset|o=i', "Batch number to start at", { default => 0 } ],
[ 'batch-count|n=i', "Number of batches to run", { default => 0 } ],
);
my $batch_size = $opt->batch_size;
my $batch_offset = $opt->batch_offset;
my $batch_count = $opt->batch_count;
my $root = $opt->root || File::Temp->newdir;
my $cpan = $opt->cpan or die "Must specify --cpan";
#-----------------------------------------------------------------------------
unless (-e $root) {
require Pinto::Initializer;
my %init_args = (sources => "file://$cpan", root => $root, no_history => 1);
Pinto::Initializer->init(%init_args);
}
#-----------------------------------------------------------------------------
chomp (my @dists = ($ARGV[0] || '') eq '-' ? <STDIN> : search_for_dists($cpan));
my @batches = map { [splice @dists, 0, $batch_size, ()] } 1..int (@dists/$batch_size+1);
# Lop off the leading batches, if offset given
# TODO: make sure batch offset is in range
splice @batches, 0, $batch_offset, () if $batch_offset;
# Lop off trailing batches, if count is given
# TODO: make sure count is in range
@batches = @batches[0..($batch_count-1)] if $batch_count;
# Count the total number of dists
my $count = (@batches - 1) * $batch_size + @{ $batches[-1] };
#-----------------------------------------------------------------------------
printf "Loading %i distributions (in batches of %i) into repository at %s\n", $count, $batch_size, $root;
require Pinto;
my $pinto = Pinto->new(root => "$root");
my $n = $batch_offset;
while (@batches) {
local $ENV{PINTO_NO_CHECKSUMS} = 1;
local $ENV{PINTO_ALLOW_DUPLICATES} = 1;
my $batch = shift @batches;
s|^.*/authors/id/./../|| for @{ $batch };
my $start = time;
my $result = $pinto->run(Pull => (targets => $batch, no_recurse => 1, no_fail => 1, message => ''));
printf "Batch %i loaded in %i seconds at %s\n", $n, time - $start, scalar localtime;
if ($n % 100 == 0) {
print "Vacuuming database\n";
$pinto->repo->db->schema->storage->dbh->do('VACUUM;');
$pinto->repo->db->schema->storage->dbh->do('ANALYZE;');
}
$n++;
}
#-----------------------------------------------------------------------------
sub search_for_dists {
my ($cpan) = @_;
printf "Searching for distributions beneath %s\n", $cpan;
require File::Find;
my @dists;
my $cb = sub {push @dists, $_ if /[.](gz|tgz|zip|bz2)$/i and not /perl/ and not /BadExample/};
File::Find::find( {no_chdir => 1, wanted => $cb}, dir($cpan)->subdir( qw(authors id) ));
@dists = map {$_->[0]} sort {$a->[1] <=> $b->[1]} map { [$_ => (stat $_)[9]] } @dists;
printf "Found %d distributions in total\n", scalar @dists;
return @dists;
}