The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package App::LogStats;
use strict;
use warnings;
use File::Spec;
use Getopt::Long qw/GetOptionsFromArray/;
use IO::Interactive::Tiny;

our $VERSION = '0.09';

our $DEFAULT_RCFILE_NAME = '.statsrc';

use Class::Accessor::Lite (
    new => 1,
    rw  => [qw/
        result
        config
    /],
);

our @RESULT_LIST = (qw/
    count sum _line_
    average median mode _line_
    max min range variance stddev
/);
our %MORE_RESULT = (
    median   => 1,
    mode     => 1,
    variance => 1,
    stddev   => 1,
);

our @DRAW_TABLE = (
    [' ',' ','-',' '],
    [' ',' ',' '],
    [' ',' ','-',' '],
    [' ',' ',' '],
    [' ',' ','-',' '],
    [' ',' ','-',' '],
);

sub run {
    my $self = shift;
    $self->_prepare(\@_)->_main->_finalize;
}

sub _prepare {
    my ($self, $argv) = @_;

    my $config = $self->_read_rc( $self->_rc_file($argv) );
    $self->_merge_opt($config, $argv);

    $self->config($config);

    $self;
}

sub _rc_file {
    my ($self, $argv) = @_;

    my $rc = 0;
    for my $opt (@{$argv}) {
        if ($opt =~ m!--rc=([^\s]+)!) {
            return $1;
        }
        return $opt if $rc == 1;
        $rc = 1 if $opt eq '--rc';
    }
    return $DEFAULT_RCFILE_NAME;
}

sub _read_rc {
    my ($self, $rc_file) = @_;

    my %config;

    for my $dir ('/etc/', $ENV{STATSRC_DIR}, $ENV{HOME}, '.') {
        next unless $dir;
        my $file = File::Spec->catfile($dir, $rc_file);
        next unless -e $file;
        $self->_parse_rc($file => \%config);
    }

    return \%config;
}

sub _parse_rc {
    my ($self, $file, $config) = @_;

    open my $fh, '<', $file or die "Could not open file: $file";
    while (<$fh>) {
        chomp;
        next if /\A\s*\Z/sm;
        if (/\A(\w+):\s*(.+)\Z/sm) {
            my ($key, $value) = ($1, $2);
            if ($key eq 'file') {
                push @{$config->{$key}}, $value;
            }
            else {
                $config->{$key} = $value;
            }
        }
    }
    close $fh;
}

sub _merge_opt {
    my ($self, $config, $argv) = @_;

    Getopt::Long::Configure('bundling');
    GetOptionsFromArray(
        $argv,
        'file=s@'       => \$config->{file},
        'd|delimiter=s' => \$config->{delimiter},
        'f|fields=s'    => \$config->{fields},
        't|through'     => \$config->{through},
        'di|digit=i'    => \$config->{digit},
        's|strict'      => \$config->{strict},
        'no-comma'      => \$config->{no_comma},
        'tsv'           => \$config->{tsv},
        'csv'           => \$config->{csv},
        'more'          => \$config->{more},
        'cr'            => \$config->{cr},
        'crlf'          => \$config->{crlf},
        'rc=s'          => \$config->{rc},
        'h|help'        => sub {
            $self->_show_usage(1);
        },
        'v|version' => sub {
            print "stats v$App::LogStats::VERSION\n";
            exit 1;
        },
    ) or $self->_show_usage(2);

    push @{$config->{file}}, @{$argv};

    $self->_validate_config($config);
}

sub _show_usage {
    my ($self, $exitval) = @_;

    require Pod::Usage;
    Pod::Usage::pod2usage($exitval);
}

sub _validate_config {
    my ($self, $config) = @_;

    if (!$config->{digit} || $config->{digit} !~ m!^\d+$!) {
        $config->{digit} = 2;
    }

    $config->{delimiter} = "\t" unless defined $config->{delimiter};

    if ($config->{fields}) {
        for my $f ( split ',', $config->{fields} ) {
            $config->{field}->{$f} = 1;
        }
        delete $config->{fields};
    }
    else {
        $config->{field}->{1} = 1;
    }
}

sub _main {
    my $self = shift;

    my $r = +{};

    if ( ! IO::Interactive::Tiny::is_interactive(*STDIN) ) {

        while ( my $line = <STDIN> ) {
            $self->_loop(\$line => $r);
        }

    }
    elsif ( scalar @{ $self->config->{file} } ) {

        for my $file (@{$self->config->{file}}) {
            open my $fh, '<', $file or die "$file: No such file";
            while ( my $line = <$fh> ) {
                $self->_loop(\$line => $r);
            }
            close $fh;
        }

    }

    $self->_after_calc($r);

    $self->result($r);
    $self;
}

sub _loop {
    my ($self, $line_ref, $r) = @_;

    my $line = $$line_ref;

    print $line if $self->config->{through};
    chomp $line;
    return unless $line;
    $self->_calc_line($r, [ split $self->config->{delimiter}, $line ]);
}

sub _calc_line {
    my ($self, $r, $elements) = @_;

    my $strict = $self->config->{strict};
    my $i = 0;
    for my $element (@{$elements}) {
        $i++;
        next unless $self->config->{field}{$i};
        if ( (!$strict && $element =~ m!\d!)
                || ($strict && $element =~ m!^(\d+\.?(:?\d+)?)$!) ) {
            my ($num) = ($element =~ m!^(\d+\.?(:?\d+)?)!);
            $num ||= 0; # FIXME
            $r->{$i}{count}++;
            $r->{$i}{sum} += $num;
            $r->{$i}{max}  = $num
                if !defined $r->{$i}{max} || $num > $r->{$i}{max};
            $r->{$i}{min}  = $num
                if !defined $r->{$i}{min} || $num < $r->{$i}{min};
            push @{$r->{$i}{list}}, $num if $self->config->{more};
        }
    }
}

sub _after_calc {
    my ($self, $r) = @_;

    for my $i (keys %{$r}) {
        next unless $r->{$i}{count};
        $r->{show_result} ||= 1;
        $r->{$i}{average} = $r->{$i}{sum} / $r->{$i}{count};
        $r->{$i}{range}   = $r->{$i}{max} - $r->{$i}{min};
        if ($self->config->{more}) {
            $r->{$i}{median}   = $self->_calc_median($i, $r);
            $r->{$i}{mode}     = $self->_calc_mode($i, $r);
            $r->{$i}{variance} = $self->_calc_variance($i, $r);
            $r->{$i}{stddev}   = $self->_calc_stddev($i, $r);
        }
    }
}

sub _calc_median {
    my ($self, $i, $r) = @_;

    my $list = $r->{$i}{list};

    return unless ref $list eq 'ARRAY';
    return $list->[0] unless @{$list} > 1;
    @{$list} = sort { $a <=> $b } @{$list};
    my $element_count = scalar(@{$list});
    return $list->[ $#{$list} / 2 ] if $element_count & 1;
    my $mid = $element_count / 2;
    return ( $list->[ $mid - 1 ] + $list->[ $mid ] ) / 2;
}

sub _calc_mode {
    my ($self, $i, $r) = @_;

    my $list = $r->{$i}{list};

    return unless ref $list eq 'ARRAY';
    return $list->[0] unless @{$list} > 1;
    my %hash;
    $hash{$_}++ for @{$list};
    my $max_val = ( sort { $b <=> $a } values %hash )[0];
    for my $key (keys %hash) {
        delete $hash{$key} unless $hash{$key} == $max_val;
    }
    return _calc_average([keys %hash]);
}

sub _calc_variance {
    my ($self, $i, $r) = @_;

    my $list = $r->{$i}{list};

    return 0 unless @{$list} > 1;
    my $average = $r->{$i}{average};
    return _calc_sum([ map { ($_ - $average) ** 2 } @{$list} ]) / $#{$list};
}

sub _calc_stddev {
    my ($self, $i, $r) = @_;

    return 0 unless @{$r->{$i}{list}} > 1;
    my $variance = defined $r->{$i}{variance}
        ? $r->{$i}{variance} : $self->_calc_variance($i, $r);
    return sqrt($variance);
}

sub _calc_average {
    my $list = shift;

    my $sum = 0;
    for my $i (@{$list}) {
        $sum += $i;
    }
    return $sum / scalar(@{$list});
}

sub _calc_sum {
    my $list = shift;

    my $sum = 0;
    $sum += $_ for (@{$list});

    return $sum;
}

sub _finalize {
    my $self = shift;

    return unless $self->result->{show_result};

    my $output_lines;
    if ($self->config->{tsv}) {
        $output_lines = $self->_view_delimited_line("\t");
    }
    elsif ($self->config->{csv}) {
        $output_lines = $self->_view_delimited_line(',');
    }
    else {
        $output_lines = $self->_view_table;
    }

    my $lf = $self->config->{cr} ? "\r" : $self->config->{crlf} ? "\r\n" : "\n";

    print $lf unless $self->config->{quiet};
    for my $line ( @{$output_lines} ) {
        print $line, $lf;
    }
}

sub _view_delimited_line {
    my ($self, $delimiter) = @_;

    my @fields = sort keys %{$self->config->{field}};
    my @output;
    push @output, join($delimiter, '', map { $self->_quote($_) } @fields);
    for my $col (@RESULT_LIST) {
        next if !$self->config->{more} && $MORE_RESULT{$col};
        next if $col eq '_line_';
        my @rows = ( $self->_quote($col) );
        for my $i (@fields) {
            push @rows, $self->_quote( $self->_facing($self->result->{$i}{$col}) );
        }
        push @output, join($delimiter, @rows);
    }
    return \@output;
}

sub _view_table {
    my $self = shift;

    my @fields = sort keys %{$self->config->{field}};

    require Text::ASCIITable;
    my $t = Text::ASCIITable->new;
    $t->setCols('', @fields);
    for my $col (@RESULT_LIST) {
        next if !$self->config->{more} && $MORE_RESULT{$col};
        if ($col eq '_line_') {
            $t->addRowLine;
            next;
        }
        my @rows;
        for my $i (@fields) {
            push @rows, $self->_facing($self->result->{$i}{$col});
        }
        $t->addRow($col, @rows);
    }
    return [ split( "\n", $t->draw(@DRAW_TABLE) ) ];
}

sub _quote {
    my ($self, $value, $quote) = @_;

    return $value unless $self->config->{csv};
    $quote ||= '"';
    return "$quote$value$quote";
}

sub _facing {
    my ($self, $value) = @_;

    return '-' unless defined $value;

    if ($value =~ m!\.!) {
        $value = sprintf("%.". $self->config->{digit}. 'f',  $value);
    }

    unless ($self->config->{no_comma}) {
        my ($n, $d) = split /\./, $value;
        while ( $n =~ s!(.*\d)(\d\d\d)!$1,$2! ){};
        $value = $d ? "$n\.$d" : $n;
    }

    return $value;
}

1;

__END__

=head1 NAME

App::LogStats - calculate lines


=head1 SYNOPSIS

    use App::LogStats;

    my $stats = App::LogStats->new->run(@ARGV);


=head1 DESCRIPTION

App::LogStats helps you to calculate data from lines.

See: L<stats> command


=head1 METHODS

=head2 run

to run command


=head1 REPOSITORY

App::LogStats is hosted on github
<http://github.com/bayashi/App-LogStats>


=head1 AUTHOR

Dai Okabayashi E<lt>bayashi@cpan.orgE<gt>


=head1 SEE ALSO

L<stats>

few stats codes were copied from L<Statistics::Lite>


=head1 LICENSE

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See L<perlartistic>.

=cut