The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Hailo::Engine::Default;
BEGIN {
  $Hailo::Engine::Default::AUTHORITY = 'cpan:AVAR';
}
BEGIN {
  $Hailo::Engine::Default::VERSION = '0.64';
}

use 5.010;
use Any::Moose;
use List::Util qw<min first shuffle>;
use List::MoreUtils qw<uniq>;

with qw[ Hailo::Role::Arguments Hailo::Role::Engine ];

has repeat_limit => (
    isa     => 'Int',
    is      => 'rw',
    lazy    => 1,
    default => sub {
        my ($self) = @_;
        my $order = $self->order;
        return min(($order * 10), 50);
    }
);

sub BUILD {
    my ($self) = @_;

    # This performance hack is here because in our tight loops calling
    # $self->storage->sth->{...} is actually a significant part of the
    # overall program execution time since we're doing two method
    # calls and hash dereferences for each call to the database.

    my $sth = $self->storage->sth;
    while (my ($k, $v) = each %$sth) {
        $self->{"_sth_$k"} = $v;
    }

    return;
}

## no critic (Subroutines::ProhibitExcessComplexity)
sub reply {
    my $self = shift;
    my $tokens = shift // [];

    # we will favor these tokens when making the reply> shuffle them
    # and discard half.
    my @key_tokens = do {
        my $i = 0;
        grep { $i++ % 2 == 0 } shuffle(@$tokens);
    };

    my (@key_ids, %token_cache);
    for my $token_info (@key_tokens) {
        my $text = $token_info->[1];
        my $info = $self->_token_similar($text);
        next unless defined $info;
        my ($id, $spacing) = @$info;
        next unless defined $id;
        push @key_ids, $id;
        next if exists $token_cache{$id};
        $token_cache{$id} = [$spacing, $text];
    }

    # sort the rest by rareness
    @key_ids = $self->_find_rare_tokens(\@key_ids, 2);

    # get the middle expression
    my $seed_token_id = shift @key_ids;
    my ($orig_expr_id, @token_ids) = $self->_random_expr($seed_token_id);
    return unless defined $orig_expr_id; # we don't know any expressions yet

    # remove key tokens we're already using
    @key_ids = grep { my $used = $_; !first { $_ == $used } @token_ids } @key_ids;

    my $expr_id = $orig_expr_id;

    # construct the end of the reply
    $self->_construct_reply('next', $expr_id, \@token_ids, \@key_ids);

    # construct the beginning of the reply
    $self->_construct_reply('prev', $expr_id, \@token_ids, \@key_ids);

    # translate token ids to token spacing/text
    my @reply = map {
        $token_cache{$_} // ($token_cache{$_} = $self->_token_info($_))
    } @token_ids;
    return \@reply;
}

sub _token_info {
    my ($self, $id) = @_;

    $self->{_sth_token_info}->execute($id);
    my @res = $self->{_sth_token_info}->fetchrow_array;
    return \@res;
}

sub learn {
    my ($self, $tokens) = @_;
    my $order = $self->order;

    # only learn from inputs which are long enough
    return if @$tokens < $order;

    my %token_cache;

    for my $token (@$tokens) {
        my $key = join '', @$token;
        next if exists $token_cache{$key};
        $token_cache{$key} = $self->_token_id_add($token);
    }

    # process every expression of length $order
    for my $i (0 .. @$tokens - $order) {
        my @expr = map { $token_cache{ join('', @{ $tokens->[$_] }) } } $i .. $i+$order-1;
        my $expr_id = $self->_expr_id(\@expr);

        if (!defined $expr_id) {
            $expr_id = $self->_add_expr(\@expr);
            $self->{_sth_inc_token_count}->execute($_) for uniq(@expr);
        }

        # add link to next token for this expression, if any
        if ($i < @$tokens - $order) {
            my $next_id = $token_cache{ join('', @{ $tokens->[$i+$order] }) };
            $self->_inc_link('next_token', $expr_id, $next_id);
        }

        # add link to previous token for this expression, if any
        if ($i > 0) {
            my $prev_id = $token_cache{ join('', @{ $tokens->[$i-1] }) };
            $self->_inc_link('prev_token', $expr_id, $prev_id);
        }

        # add links to boundary token if appropriate
        my $b = $self->storage->_boundary_token_id;
        $self->_inc_link('prev_token', $expr_id, $b) if $i == 0;
        $self->_inc_link('next_token', $expr_id, $b) if $i == @$tokens-$order;
    }

    return;
}

# sort token ids based on how rare they are
sub _find_rare_tokens {
    my ($self, $token_ids, $min) = @_;
    return unless @$token_ids;

    my %links;
    for my $id (@$token_ids) {
        next if exists $links{$id};
        $self->{_sth_token_count}->execute($id);
        $links{$id} = $self->{_sth_token_count}->fetchrow_array;
    }

    # remove tokens which are too rare
    my @ids = grep { $links{$_} >= $min } @$token_ids;

    @ids = sort { $links{$a} <=> $links{$b} } @ids;

    return @ids;
}

# increase the link weight between an expression and a token
sub _inc_link {
    my ($self, $type, $expr_id, $token_id) = @_;

    $self->{"_sth_${type}_count"}->execute($expr_id, $token_id);
    my $count = $self->{"_sth_${type}_count"}->fetchrow_array;

    if (defined $count) {
        $self->{"_sth_${type}_inc"}->execute($expr_id, $token_id);
    }
    else {
        $self->{"_sth_${type}_add"}->execute($expr_id, $token_id);
    }

    return;
}

# add new expression to the database
sub _add_expr {
    my ($self, $token_ids) = @_;

    # add the expression
    $self->{_sth_add_expr}->execute(@$token_ids);
    return $self->storage->dbh->last_insert_id(undef, undef, "expr", undef);
}

# look up an expression id based on tokens
sub _expr_id {
    my ($self, $tokens) = @_;
    $self->{_sth_expr_id}->execute(@$tokens);
    return $self->{_sth_expr_id}->fetchrow_array();
}

# return token id if the token exists
sub _token_id {
    my ($self, $token_info) = @_;

    $self->{_sth_token_id}->execute(@$token_info);
    my $token_id = $self->{_sth_token_id}->fetchrow_array();

    return unless defined $token_id;
    return $token_id;
}

# get token id (adding the token if it doesn't exist)
sub _token_id_add {
    my ($self, $token_info) = @_;

    my $token_id = $self->_token_id($token_info);
    $token_id = $self->_add_token($token_info) unless defined $token_id;
    return $token_id;
}

# return all tokens (regardless of spacing) that consist of this text
sub _token_similar {
    my ($self, $token_text) = @_;
    $self->{_sth_token_similar}->execute($token_text);
    return $self->{_sth_token_similar}->fetchrow_arrayref;
}

# add a new token and return its id
sub _add_token {
    my ($self, $token_info) = @_;
    $self->{_sth_add_token}->execute(@$token_info);
    return $self->storage->dbh->last_insert_id(undef, undef, "token", undef);
}

# return a random expression containing the given token
sub _random_expr {
    my ($self, $token_id) = @_;

    my $expr;

    if (!defined $token_id) {
        $self->{_sth_random_expr}->execute();
        $expr = $self->{_sth_random_expr}->fetchrow_arrayref();
    }
    else {
        # try the positions in a random order
        for my $pos (shuffle 0 .. $self->order-1) {
            my $column = "token${pos}_id";

            # get a random expression which includes the token at this position
            $self->{"_sth_expr_by_$column"}->execute($token_id);
            $expr = $self->{"_sth_expr_by_$column"}->fetchrow_arrayref();
            last if defined $expr;
        }
    }

    return unless defined $expr;
    return @$expr;
}

# return a new next/previous token
sub _pos_token {
    my ($self, $pos, $expr_id, $key_tokens) = @_;

    $self->{"_sth_${pos}_token_get"}->execute($expr_id);
    my $pos_tokens = $self->{"_sth_${pos}_token_get"}->fetchall_arrayref();

    if (defined $key_tokens) {
        for my $i (0 .. $#{ $key_tokens }) {
            my $want_id = $key_tokens->[$i];
            my @ids     = map { $_->[0] } @$pos_tokens;
            my $has_id  = grep { $_ == $want_id } @ids;
            next unless $has_id;
            return splice @$key_tokens, $i, 1;
        }
    }

    my @novel_tokens;
    for my $token (@$pos_tokens) {
        push @novel_tokens, ($token->[0]) x $token->[1];
    }
    return $novel_tokens[rand @novel_tokens];
}

sub _construct_reply {
    my ($self, $what, $expr_id, $token_ids, $key_ids) = @_;
    my $order          = $self->order;
    my $repeat_limit   = $self->repeat_limit;
    my $boundary_token = $self->storage->_boundary_token_id;

    my $i = 0;
    while (1) {
        if (($i % $order) == 0 and
            (($i >= $repeat_limit * 3) ||
             ($i >= $repeat_limit and uniq(@$token_ids) <= $order))) {
            last;
        }

        my $id = $self->_pos_token($what, $expr_id, $key_ids);
        last if $id eq $boundary_token;

        given ($what) {
            when ('next') {
                push @$token_ids, $id;
                $expr_id = $self->_expr_id([@$token_ids[-$order..-1]]);
            }
            when ('prev') {
                unshift @$token_ids, $id;
                $expr_id = $self->_expr_id([@$token_ids[0..$order-1]]);
            }
        }
    } continue {
        $i++;
    }

    return;
}

__PACKAGE__->meta->make_immutable;

=encoding utf8

=head1 NAME

Hailo::Engine::Default - The default engine backend for L<Hailo|Hailo>

=head1 DESCRIPTION

This backend implements the logic of replying to and learning from
input using the resources given to the L<engine
roles|Hailo::Role::Engine>.

=head1 AUTHORS

Hinrik E<Ouml>rn SigurE<eth>sson, hinrik.sig@gmail.com

E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@cpan.org>

=head1 LICENSE AND COPYRIGHT

Copyright 2010 Hinrik E<Ouml>rn SigurE<eth>sson and
E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@cpan.org>

This program is free software, you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut