The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
###############################################################################
## ----------------------------------------------------------------------------
## Cat script similar to the cat binary.
##
## The logic below only supports -n -u options. The focus is demonstrating
## Many-Core Engine for Perl.
##
## This script was created to show how order can be preserved even though there
## are only 4 shared socket pairs in MCE no matter the number of workers.
##
## Try running with -n option against a large file with long lines. This
## script will out-perform the cat binary in that case.
##
## The usage description was largely ripped off from the cat man page.
##
###############################################################################

use strict;
use warnings;

use Cwd 'abs_path'; ## Insert lib-path at the head of @INC.
use lib abs_path($0 =~ m{^(.*)[\\/]} && $1 || abs_path) . '/../lib';

my $prog_name = $0; $prog_name =~ s{^.*[\\/]}{}g;

sub INIT {
   ## Provide file globbing support under Windows similar to Unix.
   @ARGV = <@ARGV> if ($^O eq 'MSWin32');
}

use MCE;

###############################################################################
## ----------------------------------------------------------------------------
## Display usage and exit.
##
###############################################################################

sub usage {

   print <<"::_USAGE_BLOCK_END_::";

NAME
   $prog_name -- concatenate and print files

SYNOPSIS
   $prog_name [-nu] [file ...]

DESCRIPTION
   The $prog_name utility reads files sequentially, writing them to the
   standard output. The file operands are processed in command-line
   order. If file is a single dash ('-') or absent, $prog_name reads
   the standard input.

   The following options are available:

   --max-workers MAX_WORKERS
          Specify number of workers for MCE   -- default: auto

   --chunk-size CHUNK_SIZE
          Specify chunk size for MCE          -- default: 2 MiB

   -n     Number the output lines, starting at 1
   -u     Disable output buffering

EXIT STATUS
   The $prog_name utility exits 0 on success, and >0 if an error occurs.

EXAMPLES
   The command:

         $prog_name file1

   will print the contents of file1 to the standard output.

   The command:

         $prog_name file1 file2 > file3

   will sequentially print the contents of file1 and file2 to the file
   file3, truncating file3 if it already exists.

   The command:

         $prog_name file1 - file2 - file3

   will print the contents of file1, print data it receives from the stan-
   dard input until it receives an EOF (typing 'Ctrl/Z' in Windows, 'Ctrl/D'
   in UNIX), print the contents of file2, read and output contents of the
   standard input again, then finally output the contents of file3.

::_USAGE_BLOCK_END_::

   exit 1
}

###############################################################################
## ----------------------------------------------------------------------------
## Define defaults and process command-line arguments.
##
###############################################################################

my $flag = sub { 1; };
my $isOk = sub { (@ARGV == 0 or $ARGV[0] =~ /^-/) ? usage() : shift @ARGV; };

my $chunk_size  = '2m';
my $max_workers = 'auto';
my $skip_args   = 0;

my $n_flag = 0;
my $u_flag = 0;

my @files = ();

while ( my $arg = shift @ARGV ) {
   unless ($skip_args) {
      if ($arg eq '-') {
         push @files, $arg;
         next;
      }
      if ($arg =~ m/^-[nu]+$/) {
         while ($arg) {
            my $a = chop($arg);
            $n_flag = $flag->() and next if ($a eq 'n');
            $u_flag = $flag->() and next if ($a eq 'u');
         }
         next;
      }

      $skip_args   = $flag->() and next if ($arg eq '--');
      $max_workers = $isOk->() and next if ($arg =~ /^--max[-_]workers$/);
      $chunk_size  = $isOk->() and next if ($arg =~ /^--chunk[-_]size$/);

      if ($arg =~ /^--max[-_]workers=(.+)/) {
         $max_workers = $1;
         next;
      }
      if ($arg =~ /^--chunk[-_]size=(.+)/) {
         $chunk_size = $1;
         next;
      }

      usage() if ($arg =~ /^-/);
   }

   push @files, $arg;
}

if ($n_flag == 0 && $max_workers eq 'auto') {
   $max_workers = 2;
}

###############################################################################
## ----------------------------------------------------------------------------
## Launch Many-Core Engine.
##
###############################################################################

my $mce = MCE->new(

   chunk_size  => $chunk_size, max_workers => $max_workers,
   init_relay  => 0,
   use_slurpio => 1,

   user_func => sub {
      my ($mce, $chunk_ref, $chunk_id) = @_;

      if ($n_flag) {
         my $output = ''; my $line_count = ($$chunk_ref =~ tr/\n//);
         my $lines_read = MCE::relay { $_ += $line_count };

         open my $fh, '<', $chunk_ref;
         $output .= sprintf "%6d\t%s", ++$lines_read, $_ while (<$fh>);
         close $fh;

         $output .= ":$chunk_id";
         MCE->do('display_chunk', $output);
      }
      else {
         ## The following is another way to have ordered output. Workers
         ## write directly to STDOUT exclusively without any involvement
         ## from the manager process. The statements between relay_recv
         ## and relay run serially and most important orderly.
         ##
         ## (This is not recommended for sprintf above requiring extra
         ## CPU time. Thus, better to run in parallel and send the
         ## output to the manager process.)

         MCE->relay_recv;             ## my $val = MCE->relay_recv;
                                      ## 0, relay below simply forwards

         $| = 1; print $$chunk_ref;   ## exclusive access to STDOUT
                                      ## important, flush immediately

         MCE->relay;
      }

      return;
   }

)->spawn;

local $| = 1 if $u_flag;

###############################################################################
## ----------------------------------------------------------------------------
## Concatenate and print files
##
###############################################################################

my ($order_id, $lines, %tmp);
my $exit_status = 0;

sub display_chunk {

   ## One can have this receive 2 arguments; $chunk_id and $chunk_data.
   ## However, MCE->freeze is called when more than 1 argument is sent.
   ## For performance, $chunk_id is attached to the end of $_[0].

   my $chunk_id = substr($_[0], rindex($_[0], ':') + 1);
   my $chop_len = length($chunk_id) + 1;

   substr($_[0], -$chop_len, $chop_len, '');

   if ($chunk_id == $order_id && keys %tmp == 0) {
      ## no need to save in cache if orderly
      print $_[0];
      $order_id++;
   }
   else {
      ## hold temporarily otherwise
      $tmp{$chunk_id} = $_[0];
      while (1) {
         last unless exists $tmp{$order_id};
         print delete $tmp{$order_id++};
      }
   }

   return;
}

## Process files, otherwise read from standard input.

if (@files > 0) {
   foreach my $file (@files) {
      $order_id = 1; $lines = 0;
      if ($file eq '-') {
         open(STDIN, '<', ($^O eq 'MSWin32') ? 'CON' : '/dev/tty') or die $!;
         $mce->process(\*STDIN);
      }
      elsif (! -e $file) {
         print {*STDERR} "$prog_name: $file: No such file or directory\n";
         $exit_status = 2;
      }
      elsif (-d $file) {
         print {*STDERR} "$prog_name: $file: Is a directory\n";
         $exit_status = 1;
      }
      else {
         $mce->process($file);
      }
   }
}
else {
   $order_id = 1; $lines = 0;
   $mce->process(\*STDIN);
}

## Shutdown Many-Core Engine and exit.

$mce->shutdown;
exit $exit_status;