#!/usr/bin/env perl
#
# cut -- remove sections from each line of files
#
# Rich Lafferty <rich@alcor.concordia.ca>
# Thu Mar 4 02:33:30 EST 1999
#
# Perl Power Tools -- http://language.perl.com/ppt/
#
$^W = 1; # -w
use strict;
use Getopt::Std;
use File::Basename;
## What's my name?
my $me = basename($0);
## Grab options
getopts ('b:c:d:f:ns', \my %opt);
# There's no difference between -b and -c on any unix I
# use regularly -- it's for i18n. Thus, -n is a noop, too.
$opt{b} = $opt{c} if defined $opt{c};
## Byte operations
if (defined ($opt{b})) {
my @list = split (/,/, $opt{b});
while (<>) {
chomp;
foreach my $item (@list) {
my ($start,$end) = split (/-/, $item);
die "$me: invalid byte list\nType '$me' alone for usage.\n"
if ($start and $end and $start > $end); # parameters overlap?
# change cut's list parameters to substr's parameters
$start--; # cut counts from 1, not 0
$start = 0 if $start < 0;
$end = $start + 1 unless $item =~ /-/;
$start = length if $start > length;
if ($end) {
$end = length if $end > length;
printf ("%s", substr ($_, $start, $end - $start));
} else {
printf ("%s", substr ($_, $start));
}
}
print "\n";
}
exit 0;
}
## Field operations
elsif (defined ($opt{f})) {
my @list = split (/,/, $opt{f});
my $delim = "\t";
$delim = substr ($opt{d}, 0, 1) if defined $opt{d};
while (<>) {
chomp;
# Only waste time on lines with delimiters
if (/$delim/) {
foreach my $item (@list) {
my ($start,$end) = split (/-/, $item);
die "$me: invalid byte list\nType '$me' alone for usage.\n"
if ($start and $end and $start > $end); # parameters overlap?
# change cut's list parameters to substr's parameters
$start--; # cut counts from 1, not 0
$start = 0 if $start < 0;
$end = $start + 1 unless $item =~ /-/;
my @hunk = split (/$delim/, $_);
# don't let parameters exceed number of fields
$end = @hunk if (! $end or $end > @hunk);
$start = @hunk if $start > @hunk;
# If start of field is bigger than number of items, cut(1)
# still outputs a newline -- but we won't enter the for()
print "\n" if ($start == $end and $item eq $list[$#list]);
for (my $i = $start; $i < $end; $i++) {
print $hunk[$i];
if ($item eq $list[$#list] and $i == $end - 1) { # if done
print "\n";
} else {
print $delim;
}
}
}
} else { # no delimiter in line
print "$_\n" unless $opt{"s"};
}
}
exit 0;
}
## $SIG{__CLUE__}
print <<EOT;
usage: $me -b list [-n] [file ...]
$me -c list [file ...]
$me -f list [-d delim] [-s] [file ...]
Each LIST is made up of one range, or many ranges separated by commas.
Each range is one of:
N Nth byte, character or field, counted from 1
N- from Nth byte, character or field, to end of line
N-M from Nth to Mth (included) byte, character or field
-M from first to Mth (included) byte, character or field
EOT
exit 1;
# (Thanks to Abigail for the pod template.)
__END__
=pod
=head1 NAME
cut - select portions of each line of a file
=head1 SYNOPSIS
cut C<-b> list [C<-n>] [file ...]
cut C<-c> list [file ...]
cut C<-f> list [C<-d> delim] [C<-s>] [file ...]
=head1 DESCRIPTION
The B<cut> utility selects portions of each line (as specified by I<list>)
from each I<file> (or the standard input by default), and writes them to
the standard output. The items specified by I<list> can be in terms of
column position or in terms of fields delimited by a special
character. Column numbering starts from 1.
I<list> is a comma- or whitespace-separated set of increasing numbers
and/or number ranges. Number ranges consist of a number, a dash
('-'), and a second number and select the fields or columns from the
first number to the second, inclusive. Numbers or number ranges may
be preceded by a dash, which selects all fields or columns from 1 to
the first number. Numbers or number ranges may be followed by a dash,
which selects all fields or columns from the last number to the end of
the line. Numbers and number ranges may be repeated, overlapping, and
in any order. It is not an error to select fields or columns not
present in the input line.
=head1 OPTIONS
B<cut> accepts the following options:
=over 4
=item -b list
The I<list> specifies byte positions.
=item -c list
The I<list> specifies character positions.
=item -d string
Use the first character of I<string> as the field delimiter character
instead of the tab character.
=item -f list
The I<list> specifies fields, delimited in the input by a single tab
character. Output fields are separated by a single tab character.
=item -n
Do not split multi-byte characters.
=item -s
Suppresses lines with no field delimiter characters. Unless
specified, lines with no delimiters are passed through unmodified.
=back
=head1 BUGS
B<cut> does not understand multibyte characters; the C<-c> and C<-b>
options function identically, and C<-n> does nothing.
=head1 STANDARDS
This B<cut> implementation is compatible with the I<OpenBSD>
implementation.
=head1 AUTHOR
The Perl implementation of B<cut> was written by Rich Lafferty,
I<rich@alcor.concordia.ca>.
=head1 COPYRIGHT and LICENSE
This program is free and open software. You may use, copy, modify,
distribute and sell this program (and any modified variants) in any
way you wish, provided you do not restrict others to do the same.
=cut