The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
#   
# cut -- remove sections from each line of files
#
#        Rich Lafferty <rich@alcor.concordia.ca> 
#        Thu Mar  4 02:33:30 EST 1999
#
#   Perl Power Tools -- http://language.perl.com/ppt/
# 

$^W = 1;    # -w
use strict;
use Getopt::Std;
use File::Basename;

## What's my name?
my $me = basename($0);

## Grab options
getopts ('b:c:d:f:ns', \my %opt);
   
# There's no difference between -b and -c on any unix I 
# use regularly -- it's for i18n. Thus, -n is a noop, too.
$opt{b} = $opt{c} if defined $opt{c}; 

## Byte operations
if (defined ($opt{b})) {

    my @list = split (/,/, $opt{b});

    while (<>) {
	chomp;

	foreach my $item (@list) {
	    my ($start,$end) = split (/-/, $item);
	    die "$me: invalid byte list\nType '$me' alone for usage.\n" 
		if ($start and $end and $start > $end);  # parameters overlap?

	    # change cut's list parameters to substr's parameters
	    $start--;			   # cut counts from 1, not 0
	    $start = 0 if $start < 0;          
	    $end = $start + 1 unless $item =~ /-/;
	    $start = length if $start > length;

	    if ($end) {  
		$end = length if $end > length; 
		printf ("%s", substr ($_, $start, $end - $start));
	    } else {     
		printf ("%s", substr ($_, $start));
	    }
	}
	print "\n";
    }
    exit 0;
} 

## Field operations
elsif (defined ($opt{f})) {

    my @list = split (/,/, $opt{f});
    
    my $delim = "\t";
    $delim = substr ($opt{d}, 0, 1) if defined $opt{d};

    while (<>) {
	chomp;

	# Only waste time on lines with delimiters
	if (/$delim/) {   
	    foreach my $item (@list) {
		my ($start,$end) = split (/-/, $item);
		die "$me: invalid byte list\nType '$me' alone for usage.\n" 
		    if ($start and $end and $start > $end);   # parameters overlap?
		
		# change cut's list parameters to substr's parameters
		$start--;			   # cut counts from 1, not 0
		$start = 0 if $start < 0;          
		$end = $start + 1 unless $item =~ /-/;

		my @hunk = split (/$delim/, $_);
	    
		# don't let parameters exceed number of fields
		$end = @hunk if (! $end or $end > @hunk);
		$start = @hunk if $start > @hunk;

		# If start of field is bigger than number of items, cut(1)
		# still outputs a newline -- but we won't enter the for()
		print "\n" if ($start == $end and $item eq $list[$#list]);

		for (my $i = $start; $i < $end; $i++) {
		    print $hunk[$i];
		    if ($item eq $list[$#list] and $i == $end - 1) {  # if done
			print "\n";
		    } else {
			print $delim;
		    }
		}
	    }
	} else {  # no delimiter in line
	    print "$_\n" unless $opt{"s"};
	}
    }

    exit 0;
}

## $SIG{__CLUE__}
print <<EOT;
usage:  $me -b list [-n] [file ...]
        $me -c list [file ...]
        $me -f list [-d delim] [-s] [file ...]

Each LIST is made up of one range, or many ranges separated by commas.
Each range is one of:

  N     Nth byte, character or field, counted from 1
  N-    from Nth byte, character or field, to end of line
  N-M   from Nth to Mth (included) byte, character or field
  -M    from first to Mth (included) byte, character or field

EOT

exit 1;

# (Thanks to Abigail for the pod template.)

__END__

=pod

=head1 NAME

cut - select portions of each line of a file

=head1 SYNOPSIS

cut C<-b> list [C<-n>] [file ...]

cut C<-c> list [file ...]

cut C<-f> list [C<-d> delim] [C<-s>] [file ...]

=head1 DESCRIPTION

The B<cut> utility selects portions of each line (as specified by I<list>)
from each I<file> (or the standard input by default), and writes them to
the standard output.  The items specified by I<list> can be in terms of
column position or in terms of fields delimited by a special
character. Column numbering starts from 1.

I<list> is a comma- or whitespace-separated set of increasing numbers
and/or number ranges.  Number ranges consist of a number, a dash
('-'), and a second number and select the fields or columns from the
first number to the second, inclusive.  Numbers or number ranges may
be preceded by a dash, which selects all fields or columns from 1 to
the first number.  Numbers or number ranges may be followed by a dash,
which selects all fields or columns from the last number to the end of
the line.  Numbers and number ranges may be repeated, overlapping, and
in any order.  It is not an error to select fields or columns not
present in the input line.

=head1 OPTIONS

B<cut> accepts the following options:

=over 4

=item -b list     

The I<list> specifies byte positions.

=item -c list     

The I<list> specifies character positions.

=item -d string   

Use the first character of I<string> as the field delimiter character
instead of the tab character.

=item -f list

The I<list> specifies fields, delimited in the input by a single tab
character.  Output fields are separated by a single tab character.

=item -n

Do not split multi-byte characters.

=item -s

Suppresses lines with no field delimiter characters.  Unless
specified, lines with no delimiters are passed through unmodified.

=back

=head1 BUGS

B<cut> does not understand multibyte characters; the C<-c> and C<-b>
options function identically, and C<-n> does nothing.

=head1 STANDARDS

This B<cut> implementation is compatible with the I<OpenBSD>
implementation.

=head1 AUTHOR

The Perl implementation of B<cut> was written by Rich Lafferty,
I<rich@alcor.concordia.ca>.

=head1 COPYRIGHT and LICENSE

This program is free and open software. You may use, copy, modify,
distribute and sell this program (and any modified variants) in any
way you wish, provided you do not restrict others to do the same.

=cut