The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package GitStore;
BEGIN {
  $GitStore::AUTHORITY = 'cpan:YANICK';
}
{
  $GitStore::VERSION = '0.13';
}
#ABSTRACT: Git as versioned data store in Perl

use Moose;
use Moose::Util::TypeConstraints;
use Git::PurePerl;
use Carp;
use Storable qw(nfreeze thaw);

use Path::Class qw/ dir file /;

use List::Util qw/ first /;

use GitStore::Revision;

no warnings qw/ uninitialized /;

subtype 'PurePerlActor' =>
    as 'Git::PurePerl::Actor';

coerce PurePerlActor 
    => from 'Str'
    => via { 
    s/<(.*?)>//;
    Git::PurePerl::Actor->new( name => $_, email => $1 );
};

has 'repo' => ( is => 'ro', isa => 'Str', required => 1 );

has 'branch' => ( is => 'rw', isa => 'Str', default => 'master' );
has author => ( 
    is => 'rw', 
    isa => 'PurePerlActor',  
    default => sub { 
        Git::PurePerl::Actor->new( 
            name  => 'anonymous', 
            email => 'anon@127.0.0.1' 
        );
} );

sub _clean_directories {
    my ( $self, $dir ) = @_;

    $dir ||= $self->root;

    my $nbr_files = keys %{ $dir->{FILES} };

    for my $d ( keys %{ $dir->{DIRS} } ) {
        if( my $f = $self->_clean_directories( $dir->{DIRS}{$d} ) ) {
            $nbr_files += $f;
        }
        else {
            delete $dir->{DIRS}{$d};
        }
    }

    return $nbr_files;
}

sub _expand_directories {
    my( $self, $object ) = @_;

    my %dir = ( DIRS => {}, FILES => {} );

    for my $entry ( map { $_->directory_entries } $object ) {
        if ( $entry->object->isa( 'Git::PurePerl::Object::Tree' ) ) {
            $dir{DIRS}{$entry->filename} 
                = $self->_expand_directories( $entry->object );
        }
        else {
            $dir{FILES}{$entry->filename} = $entry->sha1;
        }
    }

    return \%dir;
}

has 'root' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );

has 'git' => (
    is => 'ro',
    isa => 'Git::PurePerl',
    lazy => 1,
    default => sub {
        my $repo = $_[0]->repo;
        return Git::PurePerl->new( 
            ( $repo =~ m/\.git$/ ? 'gitdir' : 'directory') => $repo 
        );
    }
);

sub BUILD {
    my $self = shift;
    
    $self->load();
    
}

sub BUILDARGS {
    my $class = shift;

    if ( @_ == 1 && ! ref $_[0] ) {
        return { repo => $_[0] };
    } else {
        return $class->SUPER::BUILDARGS(@_);
    }
}

sub branch_head {
    my ( $self, $branch ) = @_;
    $branch ||= $self->branch;

    return $self->git->ref_sha1('refs/heads/' . $branch);
}

# Load the current head version from repository. 
sub load {
    my $self = shift;
    
    my $head = $self->branch_head or do {
        $self->root({ DIRS => {}, FILES => {} });
        return;
    };

    my $commit = $self->git->get_object($head);
    my $tree = $commit->tree;

    my $root = $self->_expand_directories( $tree );
    $self->root($root);

}

sub _normalize_path {
    my ( $self, $path ) = @_;

    $path = join '/', @$path if ref $path eq 'ARRAY';

    # Git doesn't like paths prefixed with a '/'
    $path =~ s#^/+##;

    return $path;
}

sub get_revision {
    my ( $self, $path ) = @_;

    $path = file( $self->_normalize_path($path) );

    my $head = $self->branch_head
        or return;

    my $commit = $self->git->get_object($head);
    my @q = ( $commit );

    my $file = $self->_find_file( $commit->tree, $path )
        or return;

    my $latest_file_sha1 = $file->object->sha1;
    my $last_commit;

    while ( @q ) {
        push @q, $q[0]->parents;
        $last_commit = $commit;
        $commit = shift @q;

        my $f = $self->_find_file( $commit->tree, file($path) )
            or last;

        last if $f->object->sha1 ne $latest_file_sha1;
    }

    return GitStore::Revision->new(
        gitstore => $self,
        path     => $path,
        sha1     => $last_commit->sha1,
    );
}

sub get {
    my ( $self, $path ) = @_;
    
    $path = file( $self->_normalize_path($path) );

    my $dir = $self->_cd_dir($path) or return;

    my $sha1 = $dir->{FILES}{$path->basename} or return;

    my $object = $self->git->get_object($sha1) or return;

    return _cond_thaw($object->content);
}

sub set {
    my ( $self, $path, $content ) = @_;
    
    $path = file( $self->_normalize_path($path) );

    my $dir = $self->_cd_dir($path,1) or return;

    $content = nfreeze( $content ) if ( ref $content );

    my $blob = Git::PurePerl::NewObject::Blob->new( content => $content );
    $self->git->put_object($blob);

    $dir->{FILES}{$path->basename} = $blob->sha1;
}

*remove = \&delete;
sub delete {
    my ( $self, $path ) = @_;
    
    $path = file( $self->_normalize_path($path) );

    my $dir = $self->_cd_dir($path) or return;

    return delete $dir->{FILES}{$path->basename};
}

sub _cd_dir {
    my( $self, $path, $create ) = @_;

    my $dir = $self->root;

    for ( grep { !/^\.$/ } $path->dir->dir_list ) {
        if ( $dir->{DIRS}{$_} ) {
            $dir = $dir->{DIRS}{$_};
        }
        else {
            return unless $create;
            $dir = $dir->{DIRS}{$_} = { DIRS => {}, FILES => {} };
        }
    }

    return $dir;
}

sub _build_new_directory_entry {
    my( $self, $dir ) = @_;

    my @children;
    
    while ( my( $filename, $sha1 ) = each %{ $dir->{FILES} } ) {
        push @children,
            Git::PurePerl::NewDirectoryEntry->new(
                mode     => '100644',
                filename => $filename,
                sha1     => $sha1,
            );
    }

    while ( my( $dirname, $dir ) = each %{ $dir->{DIRS} } ) {
        my $tree = $self->_build_new_directory_entry($dir);
        push @children, Git::PurePerl::NewDirectoryEntry->new(
            mode     => '040000',
            filename => $dirname,
            sha1     => $tree->sha1,
        );
    }

    my $tree = Git::PurePerl::NewObject::Tree->new(
        directory_entries => \@children,
    );
    $self->git->put_object($tree);

    return $tree;
}

sub commit {
    my ( $self, $message ) = @_;

    unless ( $self->_clean_directories ) {
        # TODO surely there's a better way?
        $self->set( '.gitignore/dummy', 'dummy file to keep git happy' );
    }
    
    # TODO only commit if there were changes
    
    my $tree = $self->_build_new_directory_entry( $self->root );

    # there might not be a parent, if it's a new branch
    my $parent = eval { $self->git->ref( 'refs/heads/'.$self->branch )->sha1 };

    my $timestamp = DateTime->now;
    my $commit = Git::PurePerl::NewObject::Commit->new(
        ( parent => $parent ) x !!$parent,
        tree => $tree->sha1,
        author => $self->author,
        committer => $self->author,
        comment => $message||'',
        authored_time  => $timestamp,
        committed_time => $timestamp,
    );
    $self->git->put_object($commit);

    # reload
    $self->load;
}

sub discard {
    my $self = shift;

    $self->load;
}

sub _cond_thaw {
    my $data = shift;

    my $magic = eval { Storable::read_magic($data); };
    if ($magic && $magic->{major} && $magic->{major} >= 2 && $magic->{major} <= 5) {
        my $thawed = eval { Storable::thaw($data) };
        if ($@) {
            # false alarm... looked like a Storable, but wasn't.
            return $data;
        }
        return $thawed;
    } else {
        return $data;
    }
}

sub _find_file {
    my( $self, $tree, $path ) = @_;

    my @path = grep { !/^\.$/ } $path->dir->dir_list;

    if ( my $part = shift @path ) {
        my $entry = first { $_->filename eq $part } $tree->directory_entries 
            or return;

        my $object = $self->git->get_object( $entry->sha1 );

        return unless ref $object eq 'Git::PurePerl::Object::Tree';

        return $self->_find_file( $object, file(@path,$path->basename) );
    }

    return first { $_->filename eq $path->basename } $tree->directory_entries;
}

sub history {
    my ( $self, $path ) = @_;

    my $head = $self->branch_head
        or return;

    my @q = ( $self->git->get_object($head) );

    my @commits;
    while ( @q ) {
        push @q, $q[0]->parents;
        unshift @commits, shift @q;
    }

    my @history_commits;
    my %sha1_seen;

    for my $c ( @commits ) {
        my $file = $self->_find_file( $c->tree, file($path) ) or next;
        push @history_commits, $c unless $sha1_seen{ $file->object->sha1 }++;        
    }

    return map {
        GitStore::Revision->new( 
            path => $path, 
            gitstore => $self,
            sha1 => $_->sha1,
        )
    } @history_commits;

}

sub list {
    my( $self, $regex ) = @_;

    croak "'$regex' is not a a regex"
        if $regex and ref $regex ne 'Regexp';

    my $head = $self->branch_head or return;

    my $commit = $self->git->get_object($head);
    my $tree = $commit->tree;

    my $root = $self->_expand_directories( $tree );

    my @dirs = ( [ '', $root ] );
    my @entries;

    while( my $dir = shift @dirs ) {
        my $path = $dir->[0];
        $dir = $dir->[1];
        push @dirs, [ "$path/$_" => $dir->{DIRS}{$_} ]
            for sort keys  %{$dir->{DIRS}}; 

        for ( sort keys %{$dir->{FILES}} ) {
            my $f = "$path/$_";
            $f =~ s#^/##;  # TODO improve this
            next if $regex and $f !~ $regex;
            push @entries, $f;
        }
    }

    return @entries;
}


no Moose;
__PACKAGE__->meta->make_immutable;

1;

__END__

=pod

=head1 NAME

GitStore - Git as versioned data store in Perl

=head1 VERSION

version 0.13

=head1 SYNOPSIS

    use GitStore;

    my $gs = GitStore->new('/path/to/repo');
    $gs->set( 'users/obj.txt', $obj );
    $gs->set( ['config', 'wiki.txt'], { hash_ref => 1 } );
    $gs->commit();
    $gs->set( 'yyy/xxx.log', 'Log me' );
    $gs->discard();
    
    # later or in another pl
    my $val = $gs->get( 'user/obj.txt' ); # $val is the same as $obj
    my $val = $gs->get( 'config/wiki.txt' ); # $val is { hashref => 1 } );
    my $val = $gs->get( ['yyy', 'xxx.log' ] ); # $val is undef since discard

=head1 DESCRIPTION

It is inspired by the Python and Ruby binding. check SEE ALSO

=head1 METHODS

=head2 new

    GitStore->new('/path/to/repo');
    GitStore->new( repo => '/path/to/repo', branch => 'mybranch' );
    GitStore->new( repo => '/path/to/repo', author => 'Someone Unknown <unknown\@what.com>' );

=over 4

=item repo

your git directory or work directory (I<GitStore> will assume it's a work
directory if it doesn't end with C<.git>).

=item branch

your branch name, default is 'master'

=item author

It is used in the commit info

=back

=head2 set($path, $val)

    $gs->set( 'yyy/xxx.log', 'Log me' );
    $gs->set( ['config', 'wiki.txt'], { hash_ref => 1 } );
    $gs->set( 'users/obj.txt', $obj );

Store $val as a $path file in Git

$path can be String or ArrayRef. Any leading slashes ('/') in the path
will be stripped, as to make it a valid Git path.  The same 
grooming is done for the C<get()> and C<delete()> methods.

$val can be String or Ref[HashRef|ArrayRef|Ref[Ref]] or blessed Object

=head2 get($path)

    $gs->get( 'user/obj.txt' );
    $gs->get( ['yyy', 'xxx.log' ] );

Get $val from the $path file

$path can be String or ArrayRef

=head2 get_revision( $path )

Like C<get()>, but returns the L<GitStore::Revision> object corresponding to
the latest Git revision on the monitored branch for which C<$path> changed.

=head2 delete($path)

=head2 remove($path)

remove $path from Git store

=head2 commit

    $gs->commit();
    $gs->commit('Your Comments Here');

commit the B<set> changes into Git

=head2 discard

    $gs->discard();

discard the B<set> changes

=head2 history($path)

Returns a list of L<GitStore::Revision> objects representing the changes
brought to the I<$path>. The changes are returned in ascending commit order.

=head2 list($regex)

    @entries = $gs->list( qr/\.txt$/ );

Returns a list of all entries in the repository, possibly filtered by the 
optional I<$regex>.

=head1 FAQ

=head2 why the files are B<not> there?

run

    git checkout

=head2 any example?

    # if you just need a local repo, that's all you need.
    mkdir sandbox
    cd sandbox
    git init
    # use GitStore->new('/path/to/this/sandbox')
    # set something
    git checkout
    
    # follows are for remote git url
    git remote add origin git@github.com:fayland/sandbox2.git
    git push origin master
    # do more GitStore->new('/path/to/this/sandbox') later
    git checkout
    git pull origin master
    git push

=head1 KNOWN BUGS

If all files are deleted from the repository, a 'dummy' file
will be created to keep Git happy.

=head1 SEE ALSO

=over 4

=item Article

L<http://www.newartisans.com/2008/05/using-git-as-a-versioned-data-store-in-python.html>

=item Python binding

L<http://github.com/jwiegley/git-issues/tree/master>

=item Ruby binding

L<http://github.com/georgi/git_store/tree/master>

=back

=head1 Git URL

L<http://github.com/fayland/perl-git-store/tree/master>

=head1 AUTHORS

=over 4

=item *

Fayland Lam <fayland@gmail.com>

=item *

Yanick Champoux <yanick@cpan.org>

=back

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2012 by Fayland Lam <fayland@gmail.com>.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut