#!/usr/bin/env perl
###############################################################################
## ----------------------------------------------------------------------------
## Egrep script similar to the egrep binary.
##
## The logic below supports -c -e -h -i -m -n -q -v options. The main focus is
## demonstrating Many-core Engine for Perl.
##
## This script was created to show how order can be preserved even though there
## are only 4 shared socket pairs in MCE no matter the number of workers.
##
## The usage description was largely ripped off from the egrep man page.
##
###############################################################################
use strict;
use warnings;
use FindBin;
use lib "$FindBin::Bin/../lib";
my $prog_name = $0; $prog_name =~ s{^.*[\\/]}{}g;
sub INIT {
## Provide file globbing support under Windows similar to Unix.
@ARGV = <@ARGV> if ($^O eq 'MSWin32');
}
use MCE;
###############################################################################
## ----------------------------------------------------------------------------
## Display usage and exit.
##
###############################################################################
sub usage {
print <<"::_USAGE_BLOCK_END_::";
NAME
$prog_name -- print lines matching a pattern
SYNOPSIS
$prog_name [options] PATTERN [FILE ...]
$prog_name [options] [-e PATTERN] [FILE ...]
DESCRIPTION
The $prog_name script searches the named input FILEs (or standard input
if no files are named, or the file name - is given) for lines containing
a match to the given PATTERN. By default, $prog_name prints the
matching lines.
The following options are available:
--chunk_size CHUNK_SIZE
Specify chunk size for MCE -- default: 220000
--max_workers MAX_WORKERS
Specify number of workers for MCE -- default: 8
-c Suppress normal output; instead print a count of matching lines
for each input file. With the -v option (see below), count
non-matching lines.
-e PATTERN
Use PATTERN as the pattern; useful to protect patterns beginning
with -.
-h Suppress the prefixing of filenames on output when multiple files
are searched.
-i Ignore case distinctions.
-m Stop reading a file after NUM matching lines.
-n Prefix each line of output with the line number within its input
file.
-q Quiet; do not write anything to standard output. Exit immediately
with zero status if any match is found, even if an error was
detected.
-v Invert the sense of matching, to select non-matching lines.
EXIT STATUS
The $prog_name utility exits 0 on success, and >0 if an error occurs or
no match was found.
::_USAGE_BLOCK_END_::
exit 1
}
###############################################################################
## ----------------------------------------------------------------------------
## Define defaults and process command-line arguments.
##
###############################################################################
my $flag = sub { 1; };
my $isOk = sub { (@ARGV == 0 or $ARGV[0] =~ /^-/) ? usage() : shift @ARGV; };
my $chunk_size = 220000;
my $max_workers = 8;
my $skip_args = 0;
my ($c_flag, $h_flag, $i_flag, $n_flag, $q_flag, $v_flag);
my ($multiple_files, $m_cnt);
my @files = (); my @patterns = (); my $re;
while ( my $arg = shift @ARGV ) {
unless ($skip_args) {
if ($arg eq '-') {
push @files, $arg;
next;
}
if ($arg =~ m/^-[chinqv]+$/) {
while ($arg) {
my $a = chop($arg);
$c_flag = $flag->() and next if ($a eq 'c');
$h_flag = $flag->() and next if ($a eq 'h');
$i_flag = $flag->() and next if ($a eq 'i');
$n_flag = $flag->() and next if ($a eq 'n');
$q_flag = $flag->() and next if ($a eq 'q');
$v_flag = $flag->() and next if ($a eq 'v');
}
next;
}
if ($arg eq '-e') {
my $pattern = shift;
push @patterns, $pattern if (defined $pattern);
next;
}
$m_cnt = $isOk->() and next if ($arg eq '-m');
$chunk_size = $isOk->() and next if ($arg eq '--chunk_size');
$max_workers = $isOk->() and next if ($arg eq '--max_workers');
$skip_args = $flag->() and next if ($arg eq '--');
usage() if ($arg =~ /^-/);
}
push @files, $arg;
}
push @patterns, shift @files if (@patterns == 0 && @files > 0);
usage() if (@patterns == 0);
$multiple_files = 1 if (!$h_flag && @files > 1);
if (@patterns > 1) {
$re = '(?:' . join('|', @patterns) . ')';
}
else {
$re = $patterns[0];
}
$re = ($i_flag) ? qr/$re/i : qr/$re/;
###############################################################################
## ----------------------------------------------------------------------------
## Launch Many-core Engine.
##
###############################################################################
## Defined user function to run in parallel.
sub user_func {
my ($self, $chunk_ref, $chunk_id) = @_;
my ($found_match, @matches, $line_count, @lines);
## Quickly determine if a match is found.
for (0 .. @patterns - 1) {
if ($v_flag) {
if ($$chunk_ref !~ /$patterns[$_]/) {
$found_match = 1; last;
}
}
else {
if ($$chunk_ref =~ /$patterns[$_]/) {
$found_match = 1; last;
}
}
}
## Obtain file handle to slurped data.
## Collect matched data if chunk (slurped) data contains a match.
open my $_MEM_FH, '<', $chunk_ref;
binmode $_MEM_FH;
unless ($found_match) {
1 while (<$_MEM_FH>);
}
else {
if ($v_flag) {
while (<$_MEM_FH>) {
if ($_ !~ /$re/) {
push @lines, $. if ($n_flag);
push @matches, $_;
}
}
}
else {
while (<$_MEM_FH>) {
if ($_ =~ /$re/) {
push @lines, $. if ($n_flag);
push @matches, $_;
}
}
}
}
$line_count = $.;
close $_MEM_FH;
## Send result to main thread.
my %wk_result = (
'found_match' => scalar @matches,
'line_count' => $line_count,
'matches' => \@matches,
'lines' => \@lines
);
$self->do('display_result', \%wk_result, $chunk_id);
return;
}
## Instantiate Many-core Engine and spawn workers.
my $mce = MCE->new(
chunk_size => $chunk_size,
max_workers => $max_workers,
user_func => \&user_func,
use_slurpio => 1
);
$mce->spawn();
###############################################################################
## ----------------------------------------------------------------------------
## Report line numbers containing null values.
##
###############################################################################
my ($file, %result, $abort_all, $abort_job, $found_match);
my $exit_status = 0;
my $total_matched = 0;
my $total_lines = 0;
my $order_id = 1;
keys(%result) = 4000;
## Callback function for displaying results. Output order is preserved.
sub display_result {
my ($wk_result, $chunk_id) = @_;
return if ($abort_job);
$result{$chunk_id} = $wk_result;
while (1) {
last unless exists $result{$order_id};
my $r = $result{$order_id};
if ($r->{found_match}) {
$found_match = 1;
if ($q_flag) {
$mce->abort(); $abort_all = $abort_job = 1;
last;
}
for my $i (0 .. @{ $r->{matches} } - 1) {
$total_matched++;
unless ($c_flag) {
printf "%s:", $file if ($multiple_files);
printf "%d:", $r->{lines}[$i] + $total_lines if ($n_flag);
print $r->{matches}[$i];
}
if ($m_cnt && $m_cnt == $total_matched) {
$mce->abort(); $abort_job = 1;
last;
}
}
}
$total_lines += $r->{line_count};
delete $result{$order_id};
$order_id++;
}
}
## Display total matched. Reset counters.
sub display_total_matched {
if ($c_flag) {
printf "%s:", $file if ($multiple_files);
print "$total_matched\n";
}
$total_matched = $total_lines = 0;
$abort_job = undef;
$order_id = 1;
}
## Process files, otherwise read from standard input.
if (@files > 0) {
foreach (@files) {
last if ($abort_all);
$file = $_;
if ($file eq '-') {
open(STDIN, ($^O eq 'MSWin32') ? 'CON' : '/dev/tty') or die $!;
$mce->process(\*STDIN);
display_total_matched();
}
elsif (! -e $file) {
print STDERR "$prog_name: $file: No such file or directory\n";
$exit_status = 2;
}
elsif (-d $file) {
print STDERR "$prog_name: $file: Is a directory\n";
$exit_status = 1;
}
else {
$mce->process($file);
display_total_matched();
}
}
}
else {
$file = "(STDIN)";
$mce->process(\*STDIN);
display_total_matched();
}
## Shutdown Many-core Engine and exit.
$mce->shutdown();
if (!$q_flag && $exit_status) {
exit($exit_status);
}
else {
exit(($found_match) ? 0 : 1);
}