The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.

#==================================================================== -*-perl-*-
#
# DateTime::Event::Predict
#
# DESCRIPTION
#   Predict new dates from a set of dates
#
# AUTHORS
#   Brian Hann
#
#===============================================================================

package DateTime::Event::Predict;

use 5.006;

use strict;

use DateTime;
use Params::Validate qw(:all);
use Carp qw(carp croak confess);
use Scalar::Util;
use Data::Dumper;

use POSIX qw(ceil);

use DateTime::Event::Predict::Profile qw(:buckets);

our $VERSION = '0.01_03';


#===============================================================================#

sub new {
    my $proto = shift;
    
    my %opts = validate(@_, {
    	dates       => { type => ARRAYREF, optional => 1 },
    	profile     => { type => SCALAR | OBJECT | HASHREF, optional => 1 },
    	#stdev_limit => { type => SCALAR,          default  => 2 },
    });
    
    my $class = ref( $proto ) || $proto;
    my $self = { #Will need to allow for params passed to constructor
    	dates   		 => [],
    	distinct_buckets => {},
    	interval_buckets => {},
    	total_epoch_interval    => 0,
    	largest_epoch_interval  => 0,
    	smallest_epoch_interval => 0,
    	mean_epoch_interval     => 0,
    	
    	#Whether this data set has been trained or not
    	trained => 0,
    };
    bless($self, $class);
    
    $opts{profile} = 'default' if ! $opts{profile};
    
    $self->profile( $opts{profile} );
    
    return $self;
}

# Get or set list of dates
# ***NOTE: Should make this validate for 'can' on the DateTime methods we need and on 'isa' for DateTime
sub dates {
	my $self   = shift;
	my ($dates) = @_;
	
	validate_pos(@_, { type => ARRAYREF, optional => 1 });
	
	if (! defined $dates) {
		return wantarray ? @{$self->{dates}} : $self->{dates};
	}
	elsif (defined $dates) {
		foreach my $date (@$dates) {
			$self->_trim_date( $date );
			$self->add_date($date);
		}
	}
	
	return 1;
}

# Add a date to the list of dates
sub add_date {
	my $self   = shift;
	my ($date) = @_;
	
	validate_pos(@_, { isa => 'DateTime' }); #***Or we could attempt to parse the date, or use can( epoch() );
	
	$self->_trim_date( $date );
	
	push(@{ $self->{dates} }, $date);
	
	return 1;
}

#Get or set the profile for this predictor
sub profile {
	my $self      = shift;
	my ($profile) = @_; # $profile can be a string specifying a profile name that is provided by default, or a profile object, or options to create a new profile
	
	validate_pos(@_, { type => SCALAR | OBJECT | HASHREF, optional => 1 });
	
	# If no profile is provided, return the current profile
	if (! defined $profile || ! $profile) { return $self->{profile}; }
	
	my $new_profile;
	
	# Profile is an actual DTP::Profile object
	if (Scalar::Util::blessed($profile) && $profile->can('buckets')) {
		$new_profile = $profile;
	}
	# Profile is a hashref of options to create a new DTP::Profile object with
	elsif (ref($profile) eq 'HASH') {
		$new_profile = DateTime::Event::Predict::Profile->new(
			%$profile,
		);
	}
	# Profile is the name of a profile alias
	else {
		$new_profile = DateTime::Event::Predict::Profile->new( profile => $profile );
	}
	
	# Add the distinct buckets
    foreach my $bucket ( $new_profile->_distinct_buckets() ) {
    	$self->{distinct_buckets}->{ $bucket->name } = {
    		accessor => $bucket->accessor,
    		duration => $bucket->duration,
    		order    => $bucket->order,
    		weight   => $bucket->weight,
    		buckets  => {},
    	};
    }
    
    # Add the interval buckets
    foreach my $bucket ( $new_profile->_interval_buckets() ) {
    	$self->{interval_buckets}->{ $bucket->name } = {
    		accessor => $bucket->accessor,
    		order    => $bucket->order,
    		weight   => $bucket->weight,
    		buckets  => {},
    	};
    }
	
	$self->{profile} = $new_profile;
	
	return 1;
}

# Gather statistics about the dates
sub train {
	my $self = shift;
	
	# Sort the dates chronologically
	my @dates = sort { $a->hires_epoch() <=> $b->hires_epoch() } @{ $self->{dates} }; #*** Need to convert this to DateTime->compare($dt1, $dt2)
	
	# Last and first dates
	$self->{last_date} = $dates[$#dates];
	$self->{first_date} = $dates[0];
	
	# Clear out anything already in the the buckets
	foreach my $bucket (values %{$self->{distinct_buckets}}, values %{$self->{interval_buckets}} ) {
		$bucket->{buckets} = {};
	}
	
	my $prev_date;
	foreach my $index (0 .. $#{ $self->{dates} }) {
		# The date to work on
		my $date = $dates[ $index ];
		
		# Get which dates were before and after the date we're working on
		my ($before, $after);
		if ($index > 0) { $before = $dates[ $index - 1 ]; }
		if ($index < $#{ $self->{dates} }) { $after = $dates[ $index + 1 ]; }
		
		# Increment the date-part buckets
		while (my ($name, $dbucket) = each %{ $self->{distinct_buckets} }) {
			# Get the accessor method by using can()
			my $cref = $date->can( $dbucket->{accessor} );
				croak "Can't call accessor '" . $dbucket->{accessor} . "' on " . ref($date) . " object" unless $cref;
				
			# Increment the number of instances for the value given when we use this bucket's accessor on $date
			$dbucket->{buckets}->{ &$cref($date) }++;
		}
		
		# If this is the first date we have nothing to diff, so we'll skip on to the next one
		if (! $prev_date) { $prev_date = $date; next; }
		
		# Get a DateTime::Duration object representing the diff between the dates
		my $dur = $date->subtract_datetime( $prev_date );
		
		# Increment the interval buckets
		# Intervals: here we default to the largest interval that we can see. So, for instance, if
		#   there is a difference of months we will not increment anything smaller than that.
		while (my ($name, $bucket) = each %{ $self->{interval_buckets} }) {
			my $cref = $dur->can( $bucket->{accessor} );
				croak "Can't call accessor '" . $bucket->{accessor} . "' on " . ref($dur) . " object" unless $cref;
			my $interval = &$cref($dur);
			$bucket->{buckets}->{ $interval }++;
		}
		
		# Add the difference between dates in epoch seconds
		my $epoch_interval = $date->hires_epoch() - $prev_date->hires_epoch();
		
		### Epoch interval: $epoch_interval
		
		$self->{total_epoch_interval} += $epoch_interval;
		
		# Set the current date to this date
		$prev_date = $date;
	}
	
	# Average interval between dates in epoch seconds
	$self->{mean_epoch_interval} = $self->{total_epoch_interval} / (scalar @dates - 1); #Divide total interval by number of intervals
	
	# Mark this object as being trained
	$self->{trained}++;
}

sub predict {
	my $self = shift;
	
	my %opts = validate(@_, {
		max_predictions => { type => SCALAR,     optional => 1 }, # How many predictions to return
		stdev_limit     => { type => SCALAR,     default  => 2 }, # Number of standard deviations to search through, default to 2
		min_date		=> { isa  => 'DateTime', optional => 1 }, # If set, make no prediction before 'min_date'
		callbacks       => { type => ARRAYREF,   optional => 1 }, # Arrayref of coderefs to call when making predictions
	});
	
	# Force max predictions to one if we were called in scalar context
	if (! defined $opts{'max_predictions'}) {
		$opts{'max_predictions'} = 1 if ! wantarray;
	}
	
	# Train this set of dates if they're not already trained
	$self->train if ! $self->_is_trained;
	
	# Make a copy of the distinct and interval bucket hashes so we can mess with them
	my %distinct_buckets = %{ $self->{distinct_buckets} };
	my %interval_buckets = %{ $self->{interval_buckets} };
	
	# Figure the mean, variance, and standard deviation for each bucket
	foreach my $bucket (values %distinct_buckets, values %interval_buckets) {
		my ($mean, $variance, $stdev) = $self->_bucket_statistics($bucket);
		
		$bucket->{mean}     = $mean;
		$bucket->{variance} = $variance;
		$bucket->{stdev}    = $stdev;
	}
	
	# Get the most recent of the provided dates by sorting them by their epoch seconds
	my $most_recent_date = (sort { $b->hires_epoch() <=> $a->hires_epoch() } @{ $self->{dates} })[0];
	
	# Make a starting search date that has been moved ahead by the average interval beteween dates (in epoch seconds)
	my $duration = new DateTime::Duration(
		seconds => $self->{mean_epoch_interval}, # **Might need to round off hires second info here?
	);
	my $start_date = $most_recent_date + $duration;
	
	# A hash of predictions, dates are keyed by their hires_epoch() value
	my %predictions = ();
	
	# Start with using the distinct buckets to make predictions
	if (%distinct_buckets) {
		# Get a list of buckets after sorting the buckets from largest date part to smallest (i.e. year->month->day->hour ... microsecond, etc)
		my @distinct_bucket_keys = sort { $self->{distinct_buckets}->{ $b }->{order} <=> $self->{distinct_buckets}->{ $a }->{order} } keys %distinct_buckets;
		
		# Get the first bucket name 
		my $first_bucket_name = shift @distinct_bucket_keys;
		
		# Start recursively descending down into the various date parts, searching in each one
		$self->_date_descend_distinct(
			%opts,
			
			date        	 	 => $start_date,
			most_recent_date 	 => $most_recent_date,
			bucket_name 	 	 => $first_bucket_name,
			distinct_buckets 	 => \%distinct_buckets,
			distinct_bucket_keys => \@distinct_bucket_keys,
			predictions 	 	 => \%predictions,
		);
		
		# Now that we (hopefully) have some predictions, put them each through _interval_check to check
		# the predictiosn against the interval bucket statistics
		if (%interval_buckets) {
			while (my ($hires, $prediction) = each %predictions) {
				# Delete the date from the predictions hash if it's not good according to the interval statistics
				if (! $self->_interval_check( $prediction )) {
					delete $predictions{ $hires };
				}
			}
		}
	}
	# No distinct buckets, just interval buckets
	elsif (%interval_buckets) {
		# Get a list of buckets after sorting the buckets from largest interval to smallest (i.e. years->months->days->hours, etc)
		my @interval_bucket_keys = sort { $self->{interval_buckets}->{ $b }->{order} <=> $self->{interval_buckets}->{ $a }->{order} } keys %interval_buckets;
		
		# Get the first bucket name 
		my $first_bucket_name = shift @interval_bucket_keys;
		
		# Start recursively descending down into the date interval types, searching in each one
		$self->_date_descend_interval(
			%opts,
			
			date        	 	 => $start_date,
			most_recent_date 	 => $most_recent_date,
			bucket_name 	 	 => $first_bucket_name,
			interval_buckets 	 => \%interval_buckets,
			interval_bucket_keys => \@interval_bucket_keys,
			predictions 	 	 => \%predictions,
		);
	}
	# WTF, no buckets. That's bad!
	else {
		croak("No buckets supplied!");
	}
	
	# Sort the predictions by their total deviation
	my @predictions = sort { $a->{_dtp_deviation} <=> $b->{_dtp_deviation} } values %predictions;
	
	return wantarray ? @predictions : $predictions[0];
}

# Descend down into the distinct date parts, looking for predictions
sub _date_descend_distinct {
	my $self = shift;
	#my %opts = @_;
	
	# Validate the options
	my %opts = validate(@_, {
		date        	 	 => { isa => 'DateTime' },				 # The date to start searching in
		most_recent_date 	 => { isa => 'DateTime' },               # The most recent date of the dates provided
		bucket_name 	 	 => { type => SCALAR },					 # The bucket (date-part) to start searching in
		distinct_buckets 	 => { type => HASHREF },				 # A hashref of all buckets to use when looking for good predictions
		distinct_bucket_keys => { type => ARRAYREF },				 # A list of bucket names that we shift out of to get the next bucket to use
		stdev_limit 	 	 => { type => SCALAR },					 # The limit of how many standard deviations to search through
		predictions 	 	 => { type => HASHREF },				 # A hashref of predictions we find
		max_predictions  	 => { type => SCALAR,     optional => 1 }, # The maxmimum number of predictions to return (prevents overly long searches)
		min_date		 	 => { isa  => 'DateTime', optional => 1 }, # If set, make no prediction before 'min_date'
		callbacks 	     	 => { type => ARRAYREF,   optional => 1 }, # A list of custom coderefs that are called on each possible prediction
	});	
	
	# Copy the options over into simple scalars so it's easier on my eyes
	my $date 				 = delete $opts{'date'};        # Delete these ones out as we'll be overwriting them below
	my $bucket_name 		 = delete $opts{'bucket_name'};
	my $distinct_buckets 	 = $opts{'distinct_buckets'};
	my $distinct_bucket_keys = $opts{'distinct_bucket_keys'};
	my $stdev_limit 		 = $opts{'stdev_limit'};
	my $predictions 		 = $opts{'predictions'};
	my $max_predictions 	 = $opts{'max_predictions'};
	my $callbacks       	 = $opts{'callbacks'};
	
	# We've reached our max number of predictions, return
	return 1 if defined $max_predictions && (scalar keys %$predictions) >= $max_predictions;
	
	# Get the actual bucket hash for this bucket name
	my $bucket = $distinct_buckets->{ $bucket_name };
	
	# The search range is the standard deviation multiplied by the number of standard deviations to search through
	my $search_range = ceil( $bucket->{stdev} * $stdev_limit );
	
	#The next bucket to search down into
	my $next_bucket_name = "";
	if (scalar @$distinct_bucket_keys > 0) {
		$next_bucket_name = shift @$distinct_bucket_keys;
	}
	
	foreach my $search_inc ( 0 .. $search_range ) {
		# Make an inverted search increment so we can search backwards
		my $neg_search_inc = $search_inc * -1;
		
		# Put forwards and backwards in the searches
		my @searches = ($search_inc, $neg_search_inc);
		
		# Make sure we only search on 0 once (i.e. 0 * -1 == 0)
		@searches = (0) if $search_inc == 0;
		
		foreach my $increment (@searches) {
			# We've reached our max number of predictions, return
			return 1 if defined $max_predictions && (scalar keys %$predictions) >= $max_predictions;
			
			# Make a duration object using the accessor for this bucket
			my $duration_increment = new DateTime::Duration( $bucket->{duration} => $increment );
			
			# Get the new date
			my $new_date = $date + $duration_increment;
			
			# Trim the date down to just the date parts we care about
			$self->_trim_date( $new_date );
			
			# Skip this date if it's before or on the most recent date
			if (DateTime->compare( $new_date, $opts{'most_recent_date'} ) <= 0) { # New date is before the most recent one, or is same as most recent one
				next;
			}
			
			# Skip this date if the "min_date" option is set, and it's before or on that date
			if ($opts{'min_date'} && DateTime->compare($new_date, $opts{'min_date'}) <= 0) {
				next;
			}
			
			# If we have no more buckets to search into, determine if this date is a good prediction
			if (! $next_bucket_name) {
				if ($self->_distinct_check( %opts, date => $new_date )) {
					$predictions->{ $new_date->hires_epoch() } = $new_date;
				}
			}
			#If we're not at the smallest bucket, keep searching!
			else {
				$self->_date_descend_distinct(
					%opts,
					date        => $new_date,
					bucket_name => $next_bucket_name,
				);
			}
		}
	}
	
	return 1;
}

# Descend down into the date intervals, looking for predictions
sub _date_descend_interval {
	my $self = shift;
	
	# Validate the options
	my %opts = validate(@_, {
		date        	 	 => { isa => 'DateTime' },				 # The date to start searching in
		most_recent_date 	 => { isa => 'DateTime' },               # The most recent date of the dates provided
		bucket_name 	 	 => { type => SCALAR },					 # The bucket (date-part) to start searching in
		interval_buckets 	 => { type => HASHREF },				 # A hashref of all buckets to use when looking for good predictions
		interval_bucket_keys => { type => ARRAYREF },				 # A list of bucket names that we shift out of to get the next bucket to use
		stdev_limit 	 	 => { type => SCALAR },					 # The limit of how many standard deviations to search through
		predictions 	 	 => { type => HASHREF },				 # A hashref of predictions we find
		max_predictions  	 => { type => SCALAR,     optional => 1 }, # The maxmimum number of predictions to return (prevents overly long searches)
		min_date		 	 => { isa  => 'DateTime', optional => 1 }, # If set, make no prediction before 'min_date'
		callbacks 	     	 => { type => ARRAYREF,   optional => 1 }, # A list of custom coderefs that are called on each possible prediction
	});	
	
	# Copy the options over into simple scalars so it's easier on my eyes
	my $date 				 = delete $opts{'date'};        # Delete these ones out as we'll be overwriting them below
	my $bucket_name 		 = delete $opts{'bucket_name'};
	my $interval_buckets 	 = $opts{'interval_buckets'};
	my $interval_bucket_keys = $opts{'interval_bucket_keys'};
	my $stdev_limit 		 = $opts{'stdev_limit'};
	my $predictions 		 = $opts{'predictions'};
	my $max_predictions 	 = $opts{'max_predictions'};
	my $callbacks       	 = $opts{'callbacks'};
	
	# We've reached our max number of predictions, return
	return 1 if defined $max_predictions && (scalar keys %$predictions) >= $max_predictions;
	
	# Get the actual bucket hash for this bucket name
	my $bucket = $interval_buckets->{ $bucket_name };
	
	# The search range is the standard deviation multiplied by the number of standard deviations to search through
	my $search_range = ceil( $bucket->{stdev} * $stdev_limit );
	
	#The next bucket to search down into
	my $next_bucket_name = "";
	if (scalar @$interval_bucket_keys > 0) {
		$next_bucket_name = shift @$interval_bucket_keys;
	}
	
	foreach my $search_inc ( 0 .. $search_range ) {
		# Make an inverted search increment so we can search backwards
		my $neg_search_inc = $search_inc * -1;
		
		# Put forwards and backwards in the searches
		my @searches = ($search_inc, $neg_search_inc);
		
		# Make sure we only search on 0 once (i.e. 0 * -1 == 0)
		@searches = (0) if $search_inc == 0;
		
		foreach my $increment (@searches) {
			# We've reached our max number of predictions, return
			return 1 if defined $max_predictions && (scalar keys %$predictions) >= $max_predictions;
			
			# Make a duration object using the accessor for this bucket
			my $duration_increment = new DateTime::Duration( $bucket->{accessor} => $increment );
			
			# Get the new date
			my $new_date = $date + $duration_increment;
			
			# Trim the date down to just the date parts we care about
			$self->_trim_date( $new_date );
			
			# Skip this date if it's before or on the most recent date
			if (DateTime->compare( $new_date, $opts{'most_recent_date'} ) <= 0) { # New date is before the most recent one, or is same as most recent one
				next;
			}
			
			# Skip this date if the "min_date" option is set, and it's before or on that date
			if ($opts{'min_date'} && DateTime->compare($new_date, $opts{'min_date'}) <= 0) {
				next;
			}
			
			# If we have no more buckets to search into, determine if this date is a good prediction
			if (! $next_bucket_name) {
				if ($self->_interval_check( %opts, date => $new_date )) {
					$predictions->{ $new_date->hires_epoch() } = $new_date;
				}
			}
			#If we're not at the smallest bucket, keep searching!
			else {
				$self->_date_descend_interval(
					%opts,
					date        => $new_date,
					bucket_name => $next_bucket_name,
				);
			}
		}
	}
	
	return 1;
}

# Check to see if a given date is good according to the supplied distinct buckets by going through each bucket
# and comparing this date's deviation from that bucket's mean. If it is within the standard deviation for
# each bucket then consider it a good match.
sub _distinct_check {
	my $self = shift;
	
	# Temporarily allow extra options
	validation_options( allow_extra => 1 );
	my %opts = validate(@_, {
		date        	 	 => { isa => 'DateTime' },				   # The date to check
		distinct_buckets 	 => { type => HASHREF },				   # List of enabled buckets
		callbacks 	     	 => { type => ARRAYREF,   optional => 1 }, # A list of custom coderefs that are called on each possible prediction
	});
	validation_options( allow_extra => 0 );
	
	my $date             = $opts{'date'};
	my $distinct_buckets = $opts{'distinct_buckets'};
	my $callbacks        = $opts{'callbacks'};
	
	my $good = 1;
	my $date_deviation = 0;
	foreach my $bucket (values %$distinct_buckets) {
		# Get the value for this bucket's access for the $new_date
		my $cref = $date->can( $bucket->{accessor} );
		my $datepart_val = &$cref($date);
		
		# If the deviation of this datepart from the mean is within the standard deviation, 
		# this date ain't good.
		
		my $deviation = abs($datepart_val - $bucket->{mean});
		$date_deviation += $deviation;
		
		if ($deviation > $bucket->{stdev} )  {
			$good = 0;
			last;
		}
	}
	
	# All the dateparts were within their standard deviations, check for callbacks and push this date into the set of predictions
	if ($good == 1) {
		# Stick the date's total deviation into the object so it can be used for sorting in predict()
		$date->{_dtp_deviation} += $date_deviation;
		
		# Run each hook we were passed
		foreach my $callback (@$callbacks) {
			# If any hook returns false, this date is a no-go and we can stop processing it
			if (! &$callback($date)) {
				$good = 0;
				last;
			}
		}
		
		# If the date is still considered good, return true
		if ($good == 1) {
			return 1;
		}
		# Otherwise return false
		else {
			return 0;
		}
	}
}

# Check to see if a given date is good according to the supplied interval buckets by going through each bucket
# and comparing this date's deviation from that bucket's mean. If it is within the standard deviation for
# each bucket then consider it a good match.
sub _interval_check {
	my $self = shift;
	
	# Temporarily allow extra options
	validation_options( allow_extra => 1 );
	my %opts = validate(@_, {
		date        	 	 => { isa => 'DateTime' },				   # The date prediction to check
		most_recent_date 	 => { isa => 'DateTime' },                 # The most recent date of the dates provided
		interval_buckets 	 => { type => HASHREF },				   # List of enabled interval buckets
		callbacks 	     	 => { type => ARRAYREF,   optional => 1 }, # A list of custom coderefs that are called on each possible prediction
	});
	validation_options( allow_extra => 0 );
	
	my $date             = $opts{'date'};
	my $most_recent_date = $opts{'most_recent_date'};
	my $interval_buckets = $opts{'interval_buckets'};
	my $callbacks        = $opts{'callbacks'};
	
	# Flag specifying whether the predicted date is "good" (within the standard deviation) or not
	my $good = 1;
	
	# Total deviation of the predicted date from each of the bucket standard deviations
	my $date_deviation = 0;
	
	# Get a duration object for the span between the most recent date supplied and the predicted date
	my $dur = $date->subtract_datetime( $most_recent_date );
	
	foreach my $bucket (values %$interval_buckets) {
		my $cref = $dur->can( $bucket->{accessor} );
			croak "Can't call accessor '" . $bucket->{accessor} . "' on " . ref($dur) . " object" unless $cref;
		my $interval = &$cref($dur);
		
		my $deviation = abs($interval - $bucket->{mean});
		$date_deviation += $deviation;
		
		if ($deviation > $bucket->{stdev} )  {
			$good = 0;
			last;
		}
	}
	
	# All the dateparts were within their standard deviations, check for callbacks and push this date into the set of predictions
	if ($good == 1) {
		# Stick the date's total deviation into the object so it can be used for sorting in predict()
		$date->{_dtp_deviation} += $date_deviation;
		
		# Run each hook we were passed
		foreach my $callback (@$callbacks) {
			# If any hook returns false, this date is a no-go and we can stop processing it
			if (! &$callback($date)) {
				$good = 0;
				last;
			}
		}
		
		# If the date is still considered good, return true
		if ($good == 1) {
			return 1;
		}
		# Otherwise return false
		else {
			return 0;
		}
	}
}

# Get the mean, variance, and standard deviation for a bucket
sub _bucket_statistics {
	my $self   = shift;
	my $bucket = shift;
	
	my $total = 0;
	my $count = 0;
	while (my ($value, $occurances) = each %{ $bucket->{buckets} }) {
		# Gotta loop for each time the value has been found, incrementing the total by the value
		for (1 .. $occurances) {
			$total += $value;
			$count++;
		}
	}
	
	my $mean = $total / $count;
	
	# Get the variance
	my $total_variance = 0;
	while (my ($value, $occurances) = each %{ $bucket->{buckets} }) {
		# Gotta loop for each time the value has been found
		my $this_variance = ($value - $mean) ** 2;
		
		$total_variance += $this_variance * $occurances;
	}
	
	my $variance = $total_variance / $count;
	my $stdev = sqrt($variance);
	
	return ($mean, $variance, $stdev);
}

# Whether this instance has been trained by train() or not
sub _is_trained {
	my $self = shift;
	
	return ($self->{trained} > 0) ? 1 : 0;
}  

# Utility method to print out the dates added to this instance
sub _print_dates {
	my $self = shift;
	
	foreach my $date (sort { $a->hires_epoch() <=> $b->hires_epoch() } @{ $self->{dates} }) {
		print $date->mdy('/') . ' ' . $date->hms . "\n";
	}
}

# Trim the date parts that are smaller than the smallest one we care about. If we only care about
# the year, month, and day, and during the initial search create an offset date that has an hour
# or minute that is off from the most recent given date, then when we do a comparison to see if
# we're predicting a date we've already been given it's possible that we could have that same
# date, just with the hour and second set forward a bit.
sub _trim_dates {
	my $self    = shift;
	my (@dates) = @_;
	
	# Get the smallest bucket we have turned on
	my @buckets = (sort { $a->order <=> $b->order } grep { $_->on && $_->trimmable } $self->profile->buckets)[0];
	my $smallest_bucket = $buckets[0];
	
	return if ! defined $smallest_bucket || ! $smallest_bucket || ! @buckets;
	
	foreach my $date (@dates) {
		confess "Can't trim a non-DateTime value" unless $date->isa( 'DateTime' );
		
		#foreach my $bucket (grep { $_->trimmable && ($_->order < $smallest_bucket->order) } values %DateTime::Event::Predict::Profile::BUCKETS) {
		foreach my $bucket (grep { $_->order < $smallest_bucket->order } values %DISTINCT_BUCKETS) {
			# Clone the date so we don't modify anything we shouldn't
			$date->clone->truncate( to => $smallest_bucket->accessor );
		}
	}
}

# Useless syntactic sugar
sub _trim_date { return &_trim_dates(@_); }

1; # End of DateTime::Event::Predict
    
__END__
    
=pod
    
=head1 NAME

DateTime::Event::Predict - Predict new dates from a set of dates

=head1 SYNOPSIS

Given a set of dates this module will predict the next date or dates to follow.

  use DateTime::Event::Predict;

  my $dtp = DateTime::Event::Predict->new(
      profile => {
          buckets => ['day_of_week'],
      },
  );

  # Add today's date: 2009-12-17
  my $date = new DateTime->today();
  $dtp->add_date($date);

  # Add the previous 14 days
  for  (1 .. 14) {
      my $new_date = $date->clone->add(
          days => ($_ * -1),
      );

      $dtp->add_date($new_date);
  }

  # Predict the next date
  my $predicted_date = $dtp->predict;

  print $predicted_date->ymd;

  # 2009-12-18

Here we create a new C<DateTime> object with today's date (it being December 17th, 2009 currently). We
then use L<add_date|add_date> to add it onto the list of dates that C<DateTime::Event::Predict> (DTP)
will use to make the prediction.

Then we take the 14 previous days (December 16-2) and them on to same list one by one. This gives us a
good set to make a prediction out of.

Finally we call L<predict|predict> which returns a C<DateTime> object representing the date that DTP has
calculated will come next.

=head1 HOW IT WORKS

Predicting the future is not easy, as anyone except, perhaps, Nostradamus will tell you. Events can occur
with perplexing randomness and discerning any pattern in the noise is nigh unpossible.

However, if you have a set of data to work with that you know for certain contains some sort of
regularity, and you have enough information to discover that regularity, then making predictions from
that set can be possible. The main issue with our example above is the tuning we did with this sort
of information.

When you configure your instance of DTP, you will have to tell what sorts of date-parts to keep
track of so that it has a good way of making a prediction. Date-parts can be things like
"day of the week", "day of the year", "is a weekend day", "week on month", "month of year", differences
between dates counted by "week", or "month", etc. Dtpredict will collect these identifiers from all the
provided dates into "buckets" for processing later on.



=head1 EXAMPLES

=over 4

=item Predicting Easter

=item Predicting 

=back

=head1 METHODS

=head2 new

Constructor

	my $dtp = DateTime::Event::Predict->new();

=head2 dates

Arguments: none | \@dates

Return value: \@dates

Called with no argument this method will return an arrayref to the list of the dates currently in the instance.

Called with an arrayref to a list of L<DateTime|DateTime> objects (C<\@dates>) this method will set the dates for this instance to C<\@dates>.

=head2 add_date

Arguments: $date

Return value: 

Adds a date on to the list of dates in the instance, where C<$date> is a L<DateTime|DateTime> object.

=head2 profile

Arguments: $profile

Set the profile for which date-parts will be 

  # Pass in preset profile by its alias
  $dtp->profile( profile => 'default' );
  $dtp->profile( profile => 'holiday' );

  # Create a new profile
  my $profile = new DateTime::Event::Predict::Profile(
      buckets => [qw/ minute hour day_of_week day_of_month /],
  );

  $dtp->profile( profile => $profile );

=head3 Provided profiles

The following profiles are provided for use by-name:

=head2 predict

Arguments: %options

Return Value: $next_date | @next_dates

Predict the next date(s) from the dates supplied.

  my $predicted_date = $dtp->predict();
  
If list context C<predict> returns a list of all the predictions, sorted by their probability:

  my @predicted_dates = $dtp->predict();
  
The number of prediction can be limited with the C<max_predictions> option.
	
Possible options

  $dtp->predict(
      max_predictions => 4, # Once 4 predictions are found, return back
      callbacks => [
          sub { return ($_->second % 4) ? 0 : 1 } # Only predict dates with second values that are divisible by four.
      ],
  );
  
=over 4

=item max_predictions

Maximum number of predictions to find.

=item callbacks

Arrayref of subroutine callbacks. If any of them return a false value the date will not be returned as a prediction.

=back

=head2 train

Train this instance of DTP

=head1 TODO

=over 4

=item *

It would be be cool if you could pass your own buckets in with a certain type, so you could, say, look for recurrence based
on intervals of 6 seconds, or 18 days, whatever.

=item *

We need to be able to handle recording more than one interval per diff. If the dates are all offset from each other by 1 day 6 hours (May 1, 3:00; May 2, 6:00),
we can't be predicting a new date that's exactly 1 day after the most recent one.
  ^ The best way to do this is probably to record intervals as epoch seconds, so everything is taken into account. Maybe record epoch seconds in addition
    to whole regular intervals like days & hours.

=back
 
=head1 AUTHOR

Brian Hann, C<< <brian.hann at gmail.com> >>

=head1 BUGS

Please report any bugs or feature requests to C<bug-datetime-event-predict at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=DateTime-Event-Predict>.  I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc DateTime::Event::Predict


You can also look for information at:

=over 4

=item * RT: CPAN's request tracker

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=DateTime-Event-Predict>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/DateTime-Event-Predict>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/DateTime-Event-Predict>

=item * Search CPAN

L<http://search.cpan.org/dist/DateTime-Event-Predict/>

=back

=head1 COPYRIGHT & LICENSE

Copyright 2009 Brian Hann, all rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=head1 SEE ALSO

L<DateTime>, L<DateTime::Event::Predict::Profile>

=cut