The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#! /usr/bin/perl

use strict;
use warnings;

use Getopt::Long;
use File::Tail::Scribe;
use POSIX ();
use FindBin ();
use File::Basename ();
use File::Spec::Functions;
use Pod::Usage;
use Sys::Hostname;
use YAML::Any;
use Proc::ProcessTable;

my $script = File::Basename::basename($0);
my $SELF = catfile $FindBin::Bin, $script;
my @saved_argv = @ARGV;

my $sigset = POSIX::SigSet->new();
my $hup = POSIX::SigAction->new('sigHUP_handler',
				   $sigset,
				   &POSIX::SA_NODEFER);
POSIX::sigaction(&POSIX::SIGHUP, $hup);
my $term = POSIX::SigAction->new('sigTERM_handler',
				 $sigset,
				 &POSIX::SA_NODEFER);
POSIX::sigaction(&POSIX::SIGTERM, $term);
POSIX::sigaction(&POSIX::SIGINT, $term);
POSIX::sigaction(&POSIX::SIGQUIT, $term);

my @cat_re;
my %args = (
    config => '/etc/tail_to_scribe.conf',
    dirs => [ '/var/log/httpd' ],
    filter => '[._]log$',
    'exclude-dir' => [],
    'exclude-re' => [],
    'follow-symlinks' => 0,
    'sleep-interval' => 2,
    host => 'localhost',
    port => 1463,
    level => 'info',
    'retry-plan-a' => 'buffer',
    'retry-plan-b' => 'discard',
    'retry-buffer-size' => 100000,
    'retry-count' => 100,
    'retry-delay' => 10,
    'state-file-name' => '.tailtoscribe',
    'no-init' => 0,
);

GetOptions(\%args,
	   'category=s',
	   'config=s',
	   'dirs=s{1,}',
	   'excluded-dir=s{1,}',
	   'excluded-re=s{1,}',
	   'follow-symlinks',
	   'sleep-interval=i',
	   'filter=s',
	   'port=i',
	   'host=s',
	   'level=s',
	   'no-init',
	   'retry-plan-a=s',
	   'retry-plan-b=s',
	   'retry-buffer-size=i',
	   'retry-count=i',
	   'retry-delay=i',
	   'state-file-name=s',
	   'debug:s',
	   'daemon',
	   'pidfile=s',
	   "help|?",
           ) or pod2usage(-exitval => 2, -verbose => 0);

pod2usage(-exitval => 0, -verbose => 2) if $args{'help'};


my $dbg_file;
my $debug;
if (defined $args{debug}) {
    $debug++;
    if ($args{debug}) {
	open($dbg_file, '>', $args{debug}) or die "Failed to open debug file $args{debug}: $!";
    }
    else {
	$dbg_file = \*STDERR;
    }
    select($dbg_file);
    $| = 1;
}

my @excludes = @{$args{'exclude-dir'}};
push(@excludes, map { qr/$_/ } @{$args{'exclude-re'}});

my $hostname = hostname();
my $msg_filter = sub {
    my $self = shift;
    my $filename = shift;
    my $line = shift;
    $filename =~ s{^.*/}{};		      # remove leading dirs
    $filename =~ s{(?:[._-]access)?[._-][^._-]*$}{}; # remove extension
    $filename ||= 'default';                  # in case everything gets removed

    return ('info', 'httpd', "$hostname\t$filename\t$line");
};

if ( -f $args{config} ) {
    eval `cat $args{config}`;
    die "Failed to load \"$args{config}\": $@" if $@;
}

check_pid($args{pidfile}) if $args{pidfile};

if ($args{daemon}) {
    open STDIN, '/dev/null' or die "Can't read /dev/null: $!";
    open STDOUT, '>/dev/null' or die "Can't write to /dev/null: $!";
    defined(my $pid = fork) or die "Can't fork: $!";
    exit if $pid;
    POSIX::setsid() or die "Can't start a new session: $!";
    open STDERR, '>&STDOUT' or die "Can't dup stdout: $!";
}

write_pid($args{pidfile}) if $args{pidfile};

END {
    cleanup_pid($args{pidfile});
}

if ($debug) {
    print "Command line arguments\n " . Dump(\%args) . "===\n";
}

my $log = File::Tail::Scribe->new(
    directories => $args{dirs},
    filter => qr/$args{filter}/,
    exclude => \@excludes,
    follow_symlinks => $args{'follow-symlinks'},
    sleep_interval => $args{'sleep-interval'},
    scribe_options => {
	name       => 'scribe',
	min_level  => $args{level},
	host       => $args{host},
	port       => $args{port},
	default_category => $args{category},
	retry_plan_a => $args{'retry-plan-a'},
	retry_plan_b => $args{'retry-plan-b'},
	retry_buffer_size => $args{'retry-buffer-size'},
	retry_count => $args{'retry-count'},
	retry_delay => $args{'retry-delay'},
    },
    msg_filter => $msg_filter,
    default_level => $args{level},
    statefilename => $args{'state-file-name'},
    no_init => $args{'no-init'},
    );

$log->watch_files();

sub sigHUP_handler {
    $log->save_state();
    exec($SELF, @saved_argv) or die "Couldn't restart: $!\n";
}

sub sigTERM_handler {
    $log->save_state();
    cleanup_pid($args{pidfile});
    exit();
}

sub read_pid {
    my $pidfile = shift;
    open my $fh, '<', $pidfile or return;
    my $pid = <$fh>;
    close($fh);
    chomp $pid if $pid;
    return $pid;
}

sub write_pid {
    my $pidfile = shift;
    open my $fh, '>', $pidfile or die "Failed to open $pidfile for writing: $!";
    print $fh "$$\n";
    close($fh);
}

sub check_pid {
    my $pidfile = shift;
    my $pid = read_pid($pidfile) or return;
    my $t = Proc::ProcessTable->new();
    for my $p ( @{$t->table} ) {
	if ($p->pid == $pid && $p->cmndline =~ m/tail_to_scribe/) {
	    die "tail_to_scribe is already running, PID $pid, pidfile $pidfile\n";
	}
    }
}

sub cleanup_pid {
    my $pidfile = shift;
    if ($pidfile && (my $pid = read_pid($pidfile)) ) {
	unlink $pidfile if $pid == $$;
    }
}

__END__

=head1 NAME

tail_to_scribe.pl - Tail files and send to a Scribe logging system.


=head1 SYNOPSIS

  tail_to_scribe.pl [ --config=CONFIG_FILE ]
                    [ --daemon ]
                    [ --dirs DIR1 [DIR2 ...] ]
                    [ --excluded-dir XDIR1 [XDIR2 ...] ]
                    [ --excluded-re REGEXP1 [REGEXP2 ...] ]
                    [ --filter=REGEXP ]
                    [ --follow-symlinks ]
                    [ --no-init ]
                    [ --state-file-name=FILE ]
                    [ --sleep-interval=SECS ]
                    [ --port=PORT ] [ --host=HOST ]
                    [ --level=LEVEL ] [ --category=CATEGORY ]

=head1 DESCRIPTION

tail_to_scribe.pl monitors files in a given directory (or set of directories),
such as Apache log files in /var/log/httpd, and as the log files are written to,
takes the changes and sends them to a running instance of the Scribe logging
system.

=head1 OPTIONS

=head2 --daemon

Run in the background.

=head2 --dirs DIR1 [DIR2 ...]

The list of directories in which to monitor files for changes.  Defaults to /var/log/httpd.

=head2 --excluded-dir XDIR1 [XDIR2 ...]

A list of directories to exclude from monitoring. These must be full filesystem paths.  Defaults to empty (no exclusions).

=head2 --excluded-re REGEXP1 [REGEXP2 ...]

A list of exclude regular expressions; any directory paths that match will be excluded from monitoring.  Defaults to empty (no exclusions).

=head2 --filter=REGEXP

A file filter regular expression; only filenames that match will be monitored.  Defaults to '[._]log$' (files ending in .log or _log).  Set to '.*' to include all files.

=head2 --follow-symlinks

If set, follow symbolic links in the filesystem.

=head2 --no-init

If set, any existing state file will be ignored, and only changes from the
current file state will be sent.  Without --no-init, on the first run (before
any state file is created), any existing content in the monitored files will be
sent as well as changes (which could be a large amount of data if you have big
files).

=head2 --state-file-name=FILE

Name of file in which to store state between runs.  Defaults to '.tailtoscribe' in the working directory.

=head2 --sleep-interval=SECS

Where a kernel-based file change notification system is not available, this
specifies the number of seconds between scans for file changes.

B<To minimise CPU usage, installing L<Linux::Inotify2> is highly recommended.>

=head2 Scribe Options

=over 4

=item --host, --port

Host and port of Scribe server.  Defaults to localhost, port 1463.

=item --category=CATEGORY

Default Scribe logging category.  Defaults to 'httpd'.

=item --level=LEVEL

Default log level.  Defaults to 'info'.  May be set to any valid
L<Log::Dispatch> level (debug, info, notice, warning, error, critical, alert,
emergency).

=item --retry-plan-a=MODE, --retry-plan-b=MODE, --retry-buffer-size=SIZE, --retry-count=COUNT, --retry-delay=DELAY

See L<Log::Dispatch::Scribe> for full description of these options.

=back

=head2 --pidfile=FILE

Write process ID to file FILE.  tail_to_scribe.pl will use this file to check if
an instance is already running, and refuse to start if the PID in this file
corresponds to another tail_to_scribe.pl process.  Checks are skipped if no
pidfile is given.

=head2 --debug, --debug=FILE

Enable debugging to standard error or to file.

=head2 --config=CONFIG_FILE

Specify the location of the configuration file (an included perl script).
Defaults to /etc/tail_to_scribe.conf.  A typical configuration file might
look like this:

  # Set my arg values
  my %localargs = (
      dirs => [ '/var/log/httpd' ],
      filter => 'access[._]log$',
      'exclude-dir' => [ '/var/log/httpd/fastcgi' ],
      'state-file-name' => '/var/log/httpd/.tailtoscribe',
  );

  # Copy into args to override defaults
  $args{$_} = $localargs{$_} for keys %localargs;

  1; # Must return a true value

In addition to all of the options available on the command line, a custom
message filter may also be included, e.g.

  $msg_filter = sub {
    my ($self, $filename, $line) = @_;

    return ('info', 'httpd', "$filename\t$line");
  };

See L<File::Tail::Scribe/msg_filter> for more details on the msg_filter.

=head1 SIGNALS

HUP signal causes tail_to_scribe.pl to restart.  TERM/QUIT/INT cause it to save state and exit.

=head1 SEE ALSO

=over 4

=item * L<File::Tail::Scribe>

=item * L<File::Tail::Dir>

=item * L<http://notes.jschutz.net/109/perl/perl-client-for-facebooks-scribe-logging-software>

=item * L<http://github.com/facebook/scribe/>

=item * L<Log::Dispatch::Scribe>

=back

=head1 AUTHOR

Jon Schutz, C<< <jon at jschutz.net> >>  L<notes.jschutz.net>

=head1 BUGS

Please report any bugs or feature requests to C<bug-file-tail-scribe at
rt.cpan.org>, or through the web interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=File-Tail-Scribe>.  I will be
notified, and then you'll automatically be notified of progress on your bug as I
make changes.


=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc File::Tail::Scribe


You can also look for information at:

=over 4

=item * RT: CPAN's request tracker

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Tail-Scribe>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/File-Tail-Scribe>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/File-Tail-Scribe>

=item * Search CPAN

L<http://search.cpan.org/dist/File-Tail-Scribe/>

=back


=head1 ACKNOWLEDGEMENTS


=head1 COPYRIGHT & LICENSE

Copyright 2010 Jon Schutz, all rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.


=cut