The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Message::Passing::Filter::Regexp;
use 5.014002;
use strict;
use warnings;
use Moo;
use MooX::Types::MooseLike::Base qw/ HashRef ArrayRef Str /;
use namespace::clean -except => 'meta';
use DateTime;
use JSON::Types;
use Message::Passing::Filter::Regexp::Log;

with qw/ Message::Passing::Role::Filter /;
use vars qw( $VERSION );

$VERSION = 0.02;

has format => (
    is => 'ro',
    isa => Str,
    default => sub { ':default' },
);

has regexfile => (
    is => 'ro',
    isa => Str,
    default => sub { '/etc/message-passing/regexfile' },
);

has capture => (
    is => 'ro',
    isa => ArrayRef,
    default => sub { [] }, 
);

has mutate => (
    is => 'ro',
    isa => HashRef,
    default => sub { {} },
);

has _regex => (
    is => 'ro',
    lazy => 1,
    builder => '_build_regex',
);

sub _build_regex {
    my $self = shift;
    Message::Passing::Filter::Regexp::Log->new(
        format    => $self->format,
        capture   => $self->capture,
        regexfile => $self->regexfile,
    );
};

sub filter {
    my ($self, $message) = @_;
    my $log_line = $message->{'@message'};
    my %data;
    my @fields = $self->_regex->capture;
    my $re = $self->_regex->regexp;
    @data{@fields} = $log_line =~ /$re/;
    for ( keys %{ $self->mutate } ) {
        my $type = $self->mutate->{$_};
        $data{$_} = eval "$type $data{$_}";
    };
    $message->{'@fields'} = {
        %data,
    };
    return $message;
}

1;


1;
__END__
=head1 NAME

Message::Passing::Filter::Regexp - Regexp Capture Filter For Message::Passing

=head1 SYNOPSIS

    # regexfile
    [FORMAT]
    :default = %date %status %remotehost %domain %request %originhost %responsetime %upstreamtime %bytes %referer %ua %xff
    :nginxaccesslog = %date %status %remotehost %bytes %responsetime
    [REGEXP]
    %date = (?#=date)\[(?#=ts)\d{2}\/\w{3}\/\d{4}(?::\d{2}){3}(?#!ts) [-+]\d{4}\](?#!date)
    %status = (?#=status)\d+(?#!status)
    %remotehost = (?#=remotehost)\S+(?#!remotehost)
    %domain = (?#=domain).*?(?#!domain)
    %request = (?#=request)-|(?#=method)\w+(?#!method) (?#=url).*?(?#!url) (?#=version)HTTP/\d\.\d(?#!version)(?#!request)
    %originhost = (?#=originhost)-|(?#=oh).*?(?#!oh):\d+(?#!originhost)
    %responsetime = (?#=responsetime)-|.*?(?#!responsetime)
    %upstreamtime = (?#=upstreamtime).*?(?#!upstreamtime)
    %bytes = (?#=bytes)\d+(?#!bytes)
    %referer = (?#=referer)\"(?#=ref).*?(?#!ref)\"(?#!referer)
    %useragent = (?#=useragent)\"(?#=ua).*?(?#!ua)\"(?#!useragent)
    %xforwarderfor = (?#=xforwarderfor)\"(?#=xff).*?(?#!xff)\"(?#!xforwarderfor)

    # message-passing-cli
    use Message::Passing::DSL;
    run_message_server message_chain {
        input file => (
            class => 'FileTail',
            output_to => 'decoder',
        );
        decoder decoder => (
            class => 'JSON',
            output_to => 'logstash',
        );
        filter logstash => (
            class => 'ToLogstash',
            output_to => 'regexp',
        );
        filter regexp => (
            class => 'Regexp',
            format => ':nginxaccesslog',
            capture => [qw( ts status remotehost url oh responsetime upstreamtime bytes )]
            output_to => 'encoder',
        );
        encoder("encoder",
            class => 'JSON',
            output_to => 'stdout',
            output_to => 'es',
        );
        output stdout => (
            class => 'STDOUT',
        );
        output elasticsearch => (
            class => 'ElasticSearch',
            elasticsearch_servers => ['127.0.0.1:9200'],
        );
    };

=head1 DESCRIPTION

This filter passes all incoming messages through with regexp captures.

Note it must be running after Message::Passing::Filter::ToLogstash because it don't process with json format but directly capture C<< $message->{'@message'} >> data lines into C<< %{ $message->{'@fields'} } >>

=head1 ATTRIBUTES

=head2 regexfile

Path of your regexfile. Default is /etc/message-passing/regexfile.

=head2 format

Name of a defined format in your regexfile.

=head2 capture

ArrayRef of regex names which you want to capture and has been defined in your regexfile. note delete the prefix C<%>.

=head1 TODO

Need to keep numberic type for json. C<to_json> will make all element to be "string". That's bad for elasticsearch term_stats API.

=head1 SEE ALSO

Idea steal from <http://logstash.net> Grok filter

Config Format see L<Config::Tiny>

=head1 AUTHOR

chenryn, E<lt>rao.chenlin@gmail.com<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2012 by chenryn

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.14.2 or,
at your option, any later version of Perl 5 you may have available.


=cut