The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Original Copyright 2008 Orbitz WorldWide (python)
# Perl port 2013 Jean Stebens (perl)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

package Whisper;
{
  $Whisper::VERSION = '1.035';
}

use 5.012;

use strict;
use warnings;

use POSIX;

our $VERSION;

use base 'Exporter';
our @EXPORT    = qw( wsp_info wsp_fetch );

# ABSTRACT: Handle Whisper fixed-size database files
 
# This module is an implementation of the Whisper database API
# Here is the basic layout of a whisper data file .wsp:
#
# File = Header,Data
#       Header = Metadata,ArchiveInfo+
#               Metadata = aggregationType,maxRetention,xFilesFactor,archiveCount
#               ArchiveInfo = Offset,SecondsPerPoint,Points
#       Data = Archive+
#               Archive = Point+
#                       Point = timestamp,value

my $metadata_Format = "N2f>N";
my $metadata_Size = length pack($metadata_Format, 0);

my $archiveInfo_Format = "N3";
my $archiveInfo_Size = length pack($archiveInfo_Format, 0);

my $point_Format = "Nd>";
my $point_Size = length pack($point_Format, 0);

my $long_Format = "N";
my $long_Size = length pack($long_Format, 0);

my $float_Format = "f";
my $float_Size = length pack($float_Format, 0);

my $value_Format = "d";
my $value_Size = length pack($value_Format, 0);

my $aggtype = {
	1 => 'average',
	2 => 'sum',
	3 => 'last',
	4 => 'max',
	5 => 'min'
};

sub __read_header {
	my $file = shift;

	read($file, my $rec, $metadata_Size) or die("unable to read");
	my ($aggregationType, $maxRetention, $xff, $archiveCount) = unpack($metadata_Format, $rec);
	
	my $archives = [];
	foreach(0..$archiveCount-1) {
        push(@$archives, __read_archiveinfo($file));
	}

	return {
		aggregationType => $aggregationType,
		maxRetention => $maxRetention,
		xFilesFactor => $xff,
		archiveCount => $archiveCount,
		archives => $archives,
		fileSize => (stat($file))[7],
	};
}

sub __read_archiveinfo {
	my $file = shift;

	read($file, my $rec, $archiveInfo_Size) or die("unable to read");
	my ($offset, $secondsPerPoint, $points) = unpack($archiveInfo_Format, $rec);

	return {
		offset => $offset,
		secondsPerPoint => $secondsPerPoint,
		points => $points,
		retention => $secondsPerPoint * $points,
		size => $points * $point_Size,
	};
}

sub wsp_info {
	my %param = @_;

	my $dbfile = $param{file};

	die("You need to specify a wsp file\n") unless $dbfile;

	open(my $file, "<", $dbfile) or die("Unable to read whisper file: $dbfile\n");
	binmode($file); 

	my $header = __read_header($file);
	$header->{fileSize} = (stat($file))[7];

	close($file);
	return $header;
}

sub wsp_fetch {
	my %param = @_;

	my $dbfile = $param{file};
	my $from = $param{from};
	my $until = $param{until};
	my $format = $param{format};
	my $date_format = $param{date_format};

	die("You need to specify a wsp file\n") unless $dbfile;
	
	open(my $file, "<", $dbfile) or die("Unable to read whisper file: $dbfile\n");
	binmode($file); 

	my $header = __read_header($file);

	my $now = time;
	my $oldest = $now - $header->{maxRetention};

	# defaults
	$until ||= $now;
	$from ||= 0;

	die("Invalid time interval") unless $from < $until;

	# from borders
	$from = $oldest if $from < $oldest;

	# until borders
	$until = $now if $until > $now;

	my $diff = $now - $from;

	my $archive;
	# Get first archive which spans our wanted timeslot
	foreach my $a (@{ $header->{archives} }) {
		if( $a->{retention} >= $diff ) {
			$archive = $a;
			last;
		}
	}
	die("No archive satisfies our needs") unless $archive;

	my $from_interval = ($from - ( $from % $archive->{secondsPerPoint})) + $archive->{secondsPerPoint};
	my $until_interval = ($until - ( $until % $archive->{secondsPerPoint})) + $archive->{secondsPerPoint};

	my $offset = $archive->{offset};

	seek($file, $offset, 0);
	read($file, my $packed_point, $point_Size) or die("unable to read");
	my ($base_interval, $base_value) = unpack($point_Format, $packed_point);

	if( $base_interval == 0 ) {
		my $step = $archive->{secondsPerPoint};
		my $points = ($until_interval - $from_interval) / $step;
		my @timeinfo = ($from_interval, $until_interval, $step);
		my @values = (undef) x $points;
		return { 
			start => $from_interval, 
			end => $until_interval,
			step => $step,
			values => \@values,
			cnt => scalar @values,
		}
	}

	# Determine fromOffset
	my $from_time_distance = $from_interval - $base_interval;
	my $from_point_distance = $from_time_distance / $archive->{secondsPerPoint};
	my $from_byte_distance = $from_point_distance * $point_Size;
	my $from_offset = $archive->{offset} + ( $from_byte_distance % $archive->{size});

	# Determine untilOffset
	my $until_time_distance = $until_interval - $base_interval;
	my $until_point_distance = $until_time_distance / $archive->{secondsPerPoint};
	my $until_byte_distance = $until_point_distance * $point_Size;
	my $until_offset = $archive->{offset} + ( $until_byte_distance % $archive->{size});

	# Read all the points in the interval
	seek($file, $from_offset, 0);
	my $series;
	if( $from_offset < $until_offset ) {
		read($file, $series, ($until_offset - $from_offset));
	} else {
		#We do wrap around the archive, so we need two reads
		my $archive_end = $archive->{offset} + $archive->{size};
		read($file, my $first, $archive_end - $from_offset);
		seek($file, $archive->{offset}, 0 );
		read($file, my $second, $until_offset - $archive->{offset});
		$series = $first . $second;
	
	}

	# Unpack the series
	my $points = length($series) / $point_Size;
	my $series_format = $point_Format x $points;
	my @series_unpacked = unpack($series_format, $series);

	my $values = [ (undef) x $points ];
	my $current_interval = $from_interval;
	my $step = $archive->{secondsPerPoint};

	my $index = 0;
	while( @series_unpacked ) {
		my ($point_time, $point_value ) = splice(@series_unpacked, 0, 2);
		if( $point_time == $current_interval ) {
			$values->[$index] = $point_value;
		}
		$current_interval += $step;
		$index++;
	}

	# Generate datetime,data tuples
	my $keys = [ (undef) x @$values ];
	if( $format ) {

			my $current = $from_interval;
			while( my ($i, $val) = each @$values ) {

				my $timestamp = $current;
				# Format the datetime field if wanted
				if( $date_format ) {
					$timestamp = POSIX::strftime($date_format, localtime($current));
				}

				if( $format eq 'tuples' ) {
					$values->[$i] = [ $timestamp, $val ];
				} 
				if( $format eq "split" ) {
					$keys->[$i] = $timestamp;
				}

				$current += $step;
			}

			# Format start/end too
			if( $date_format ) {
				$from_interval = POSIX::strftime($date_format, localtime($from_interval));
				$until_interval = POSIX::strftime($date_format, localtime($until_interval));
			}

	}

	close($file);

	my $resp = {
		start => $from_interval,
		end => $until_interval,
		step => $step,
		values => $values,
		cnt => scalar @$values,
	};

	if( $format && $format eq "split" ) {
		$resp->{keys} = $keys;
	}

	return $resp;
} 
     
1;

__END__

=head1 NAME

Whisper - Handle Whisper fixed-size database files

=head1 SYNOPSIS

	use Whisper;

	# Read archive information
	my $info = wsp_info( file => "/path/to/my/database.wsp"); 

	# Fetch archive data
	my $data = wsp_fetch( 
		file => "/path/to/my/database.wsp",
		from => $from,
		until => $until
	);

	# Fetch archive data in the tuples format: 
	# { values => [ [timestamp, data], [timestamp,data], ... ] }
	my $tuple_data = wsp_fetch(
		file => "/path/to/my/database.wsp",
		from => $from,
		until => $until,
		format => 'tuples'
	);

	# Fetch archive data in the split format: 
	# { keys => [timestamp1, timestamp2], values => [data1, data2] }
    my $split_data = wsp_fetch(
        file => "/path/to/my/database.wsp",
        from => $from,
        until => $until,
        format => 'tuples'
    );
	
	# Same as fetch tuple/split data but with POSIX::strftime formatted datetime
	my $formatted_tuple_data = wsp_fetch(
		file => "/path/to/my/database.wsp",
		from => $from,
		until => $until,
		format => 'tuples'
		date_format => '%Y/%m/%d %H:%M:%S'
	);


=head1 DESCRIPTION

This is a simple Whisper (fixed-size database) reader.

Whisper archive/databse files (.wsp) are similiar to RRD archive files. 
For more details about Whisper see L<http://graphite.wikidot.com/whisper>

The following operations are supported:

	wsp_info	Read basic archive information
	wsp_fetch	Fetch data points from archive

These operations are planned:

	wsp_create	Create wsp database
	wsp_update	Add a data point to a wsp database
	wsp_update_bulk	Add multiple data points to a wsp database
	wsp_merge	Merge two wsp database files

Feel free to help implement the above operations.
 
=head1 EXPORTS
 
By default, C<use Whisper> exports all the functions listed below.  

=head1 FUNCTIONS

=head2 wsp_info ( %parameters )

=head3 Parameters

	file    String filepath towards a valid .wsp file

=head3 Returns

Returns a hash reference with Header/Metadata information:

	{
		'aggregationType' => 1,
		'fileSize' => 32872,
		'archiveCount' => 2,
		'xFilesFactor' => '0.5',
		'maxRetention' => 2592000

		'archives' => [
			{
				'secondsPerPoint' => 300,
				'points' => 2016,
				'retention' => 604800,
				'size' => 24192,
				'offset' => 40
			},
			{
				'secondsPerPoint' => 3600,
				'points' => 720,
				'retention' => 2592000,
				'size' => 8640,
				'offset' => 24232
			}
		],
	};


=head2 wsp_fetch ( %parameters )

=head3 Parameters

 - file		String filepath	towards a valid .wsp file
 - from		epoch timestamp, defaults to oldest timepoint in archive
 - until		epoch timestamp, defaults to now
 - format		Valid formats are:
    - tuples	returns the values in a tuple format: [ [timestamp1, data1], [timestamp2, data2], ... ]
    - split	returns an array for the timestamps in 'keys' and one for the data in 'values': { keys => [timestamp1, timestamp2], values => [data1, data2] }
 - date_format	Dictates the POSIX::strftime format for timestamps in tuples, defaults to epoch timestamp: %s

=head3 Returns

Returns a hash refrence with data points and meta data for the given range:

	{
		'step' => 300,
		'end' => 1374830700,
		'start' => 1374830100,
		'values' => [
			'0.000000',
			'1.000000'
		],
		'cnt' => 2
	};

In combination with tuples format, the values is an array of arrays with timestamp,data tuples:

	{
		'step' => 300,
		'end' => 1374830700,
		'start' => 1374830100,
		'values' => [
			[ 1374830100, '0.000000' ],
			[ 1374830400, '1.000000' ]
		],
		'cnt' => 2
	};

In combination with split format, the values are accessible under 'values' and timestamps under 'keys'

    {
        'step' => 300,
        'end' => 1374830700,
        'start' => 1374830100,
        'values' => [
            '0.000000',
            '1.000000'
        ],
		'keys' => [
			1374830100,
			1374830400
		],
        'cnt' => 2
    };

Or in combination with date_format .e.g: "%Y/%m/%d %H:%M"

	{
		'step' => 300,
		'end' => '2013/07/26 11:25',
		'start' => '2013/07/26 11:15',
		'values' => [
			[ '2013/07/26 11:15', '0.000000' ],
			[ '2013/07/26 11:20', '1.000000' ]
		],
		'cnt' => 2
	};

=head1 CVS

Current CVS: L<https://github.com/corecache/libwhisper-perl>

=head1 COPYRIGHT AND LICENSE

Original Copyright 2008 Orbitz WorldWide (python)
Perl port 2013 Jean Stebens (perl)

=cut