The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# mt-aws-glacier - AWS Glacier sync client
# Copyright (C) 2012  Victor Efimov
# vs@vs-dev.com http://vs-dev.com
# License: GPLv3
#
# This file is part of "mt-aws-glacier"
#
#    mt-aws-glacier is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    mt-aws-glacier is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.


package Net::Amazon::TreeHash;



use strict;
use warnings;
use Digest::SHA qw/sha256/;

=head1 NAME

Net::Amazon::TreeHash - An implementation of the Amazon AWS TreeHash checksum algorithm

=head1 VERSION

Version 0.71

=cut

our $VERSION = '0.71';

=head1 SYNOPSIS

This module implements TreeHash algorithm for Amazon AWS Glacier API (version 2012-06-01)

Usage:

	use Net::Amazon::TreeHash;

	my $th = Net::Amazon::TreeHash->new();
	
	$th->eat_data(\$mydata);
	...
	
	$th->calc_tree();
	my $hash = $th->get_final_hash();

=head1 NOT IMPLEMENTED

A function to get hash of the part of data (such function would be usefull for Glacier multipart upload)

=head1 SEE ALSO

An application for AWS Glacier synchronization. It is available at L<https://github.com/vsespb/mt-aws-glacier>.

=head1 AUTHOR

Victor Efimov C<< <vs at vs-dev dot com> >>

https://github.com/vsespb/mt-aws-glacier

=head1 BUGS

Does not work for 0-length files (however it's useless for Glacier).

=cut


sub new
{
    my ($class, %args) = @_;
    my $self = \%args;
    $self->{tree} = [];
    $self->{pending} = {};
    $self->{unit} ||= 1048576;
    $self->{processed_size} = 0; # MB
    bless $self, $class;
    return $self;
}


sub eat_file
{
	my ($self, $fh) = @_;
	while (1) {
		my $r = sysread($fh, my $data, $self->{unit});
		if (!defined($r)) {
			die;
		} elsif ($r > 0) {
			$self->_eat_data_one_mb(\$data);
		} else {
			return;
		}
	}
}

sub eat_data
{
	my ($self, $dataref)  = @_;
	my $mb = $self->{unit};
	my $n = length($$dataref);
	my $i = 0;
	while ($i < $n) {
		my $part = substr($$dataref, $i, $mb);
		$self->_eat_data_one_mb(\$part);
		$i += $mb
	}
}


sub _eat_data_one_mb
{
	my ($self, $dataref)  = @_;
	$self->{tree}->[0] ||= [];
	push @{ $self->{tree}->[0] }, { joined => 0, start => $self->{processed_size}, finish => $self->{processed_size}, hash => sha256($$dataref) };
	$self->{processed_size}++;
}

sub calc_tree
{
	my ($self)  = @_;
	my $prev_level = 0;
	while (scalar @{ $self->{tree}->[$prev_level] } > 1) {
		my $curr_level = $prev_level+1;
		$self->{tree}->[$curr_level] = [];
		
		my $prev_tree = $self->{tree}->[$prev_level];
		my $curr_tree = $self->{tree}->[$curr_level];
		my $len = scalar @$prev_tree;
		for (my $i = 0; $i < $len; $i += 2) {
			if ($len - $i > 1) {
				my $a = $prev_tree->[$i];
				my $b = $prev_tree->[$i+1];
				push @$curr_tree, { joined => 0, start => $a->{start}, finish => $b->{finish}, hash => sha256( $a->{hash}.$b->{hash} ) };
			} else {
				push @$curr_tree, $prev_tree->[$i];
			}
		}
		
		$prev_level = $curr_level;
	}
}


sub get_final_hash
{
	my ($self)  = @_;
	return unpack('H*', $self->{tree}->[ $#{$self->{tree}} ]->[0]->{hash} );
}


1;