The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
=head1 NAME

Data::Downloader::Repository

=head1 DESCRIPTION

A repository is a collection of files managed by Data::Downloader
which have a common root directory.  Files within a given repository
may come from various RSS feeds and/or url templates.  They may be
stored in multiple top-level subdirectories, which are referred to
as "disks" (since in practice they may be located on different devices).

=head1 METHODS

=over

=cut

package Data::Downloader::Repository;
use Log::Log4perl qw/:easy/;
use Params::Validate qw/validate/;
use File::stat qw/stat/;
use YAML::XS qw/Dump/;
use Number::Format qw/format_number/;
use Data::Downloader::Utils qw/human_size/;
use Fcntl qw(:flock SEEK_END);
use strict;
use warnings;

=item save

Save a repository.  Also rebuild the metadata view (in
case that has changed).

=cut

# TODO handle updating a repository, including doing the
# right thing in different circumstances, e.g. if the
# symlink pattern is changed

sub save {
    my $self = shift;
    my $status = $self->SUPER::save(@_);
    return $status unless $status;
    DEBUG "saved metadata source ".$self->name.", rebuilding pivot view and filemetadata table";
    Data::Downloader::MetadataPivot->rebuild_pivot_view;
    Data::Downloader::MetadataPivot->do_setup;
    Data::Downloader::FileMetadata->rebuild_table;
    Data::Downloader::FileMetadata->do_setup;
    $status;
}

=item download_all

Parameters  :

    fake (boolean) -- fake the download.

Download all known files associated with this repository.

=cut

sub download_all {
    my $self = shift;
    my %args = @_; # TODO validate (fake =>1)
    for my $file (@{ $self->files }) { # TODO only where not downloaded?
        DEBUG "downloading file : ".$file->filename;
        $file->download(%args);
        for my $datum (@{ $file->metadata }) {
            TRACE "    " . $datum->name . " == " . $datum->value;
        }
    }
}

=item cache

Get a cache object for this repository.  See Data::Downloader::Cache.

=cut

sub cache {
    my $self = shift;
    my $strategy = $self->cache_strategy or return;
    my $cache_class = "Data::Downloader::Cache::$strategy";
    eval "use $cache_class";
    LOGDIE "error using $cache_class : $@" if $@;
    return $cache_class->new(repository => $self);
}

sub _initialize_stats {
    my $self = shift;
    return if $self->stat_info;
    unless ($self->stat_info) {
        $self->stat_info({last_stat_update => undef, last_fsck => undef, repository => $self->id});
        $_->save for $self->stat_info;
    }
}

=item update_stats

Update the stats for this repository, e.g. the atimes, and any
aggregate stats.  Won't update the stats before a specified
interval has elapsed.

Parameters :

    interval -- a Datetime::Duration object or "0" to force an update.
    defaults to one hour.

=cut

sub update_stats {
    my $self = shift;
    my $args = validate(@_, { interval => 0 });
    my $duration = $args->{duration};
    if (!defined($duration)) {
        $duration = DateTime::Duration->new(hours => 1);
    }
    $self->_initialize_stats; # only if necessary
    return if $duration &&
        $self->stat_info->last_stat_update &&
        ($self->stat_info->last_stat_update->add_duration($duration)) > DateTime->now();
    # Also set an advisory lock; only one process should do this.
    my $lockfile = $self->db->database.".dado_stats_lock";

    open my $lock, ">$lockfile" or do {
	ERROR "cannot write to $lockfile";
	return;
    };
    flock($lock, LOCK_EX) or return;
    DEBUG "updating stats ($$ locking $lockfile)";
    my $files = Data::Downloader::File::Manager->get_files([on_disk => 1 ]);
    for my $file (@$files) {
        $file->load(speculative => 1) or next;
        my $stat = stat($file->storage_path) or next;
        $file->atime( DateTime->from_epoch(epoch => $stat->atime) );
        $file->save(changes_only => 1) or do {
	    ERROR $file->error;
	    return;
	};
    }
    $self->stat_info->last_stat_update(DateTime->now());
    $self->stat_info->save or do {
	ERROR $self->stat_info->error;
	return;
    };
    flock ($lock, LOCK_UN) or LOGWARN "cannot unlock $lockfile";
}

=item dump_stats

Print statistics about this repository to STDOUT.

=cut

sub dump_stats {
    my $self = shift;
    my $args = validate(@_, {yaml => 0});
    my %stats;

    @stats{qw/known_files/} = $self->db->simple->select(
        'file',
        [ 'count(1)', ],
        { repository => $self->id }
    )->list;

    @stats{qw/count size/} = $self->db->simple->select(
        'file',
        [ 'count(1)', 'sum(size)' ],
        { repository => $self->id, on_disk => 1 }
    )->list;

    $stats{size_h} = human_size($stats{size});

    if ($args->{yaml}) {
        print Dump(\%stats);
        return;
    }

    do {$stats{$_} = format_number($stats{$_} || 0)} for grep {$_ !~ /_h$/} keys %stats;


    print <<EOSTATS;
Total known files       : $stats{known_files}
Number of files on disk : $stats{count}
Size of files on disk   : $stats{size} bytes ($stats{size_h})

EOSTATS
}

=back

=head1 SEE ALSO

L<Rose::DB::Object>

L<Data::Downloader/SCHEMA>

=cut

1;