The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

package TSV::Utility;

use Getopt::Long;
use Pod::Usage;
use List::Util qw(sum);
use strict;
use warnings;

our $VERSION = 0.3;

my $match;
my $rotate;
my $validate;
my $show;
my $help;
my $cut;
my $head;
my $default;
my @grep;
my $select;

pod2usage() unless GetOptions(
	'matchup=s'	=> \$match,
	rotate		=> \$rotate,
	validate	=> \$validate,
	show		=> \$show,
	help		=> \$help,
	'cut|f=s'	=> \$cut,
	'head|n=i'	=> \$head,
	'default=s'	=> \$default,
	'grep=s{2}'	=> \@grep,
	'select=s'	=> \$select,
);
pod2usage( '-verbose' => 2) if $help;
pod2usage('Must say what to do, try --help') unless $match || $rotate || $validate || $show || $cut || $head || $default || @grep || $select;
pod2usage('Only one option allowed') if sum(map { $_ ? 1 : 0 } $match, $rotate, $validate, $show, $cut, $default, (@grep ? 1 : 0), $select) > 1;

if ($rotate || $match) {
	my @cdata;
	my $count = -1;
	if ($match) {
		die unless -e $match;
		my $tsv;
		open $tsv, "<", $match || die;
		my $header = <$tsv>;
		chomp($header);
		my @h = split(/\t/, $header, -1);
		for my $i (0..$#h) {
			push(@{$cdata[$i]}, $h[$i]);
		}
		$count = 0;
	}
	while (<>) {
		chomp;
		my (@d) = split(/\t/, $_, -1);
		for my $i (0..$#d) {
			push(@{$cdata[$i]}, $d[$i]);
		}
		last if $head && $count++ >= $head;
	}
	for my $cd (@cdata) {
		print join("\t", @$cd) . "\n";
	}
} elsif ($cut) {
	my @cut = split(/,/, $cut);
	my $header = <>;
	chomp($header);
	$header =~ s/^#DSTSV# //;
	my @cols = split(/\t/, $header, -1);
	my $c = 1;
	my %cols = map { $c => $c, $_ => $c++ } @cols;
	for my $cut (@cut) {
		die "No column $cut" unless $cols{$cut};
	}
	my @cutnum = map { $cols{$_} - 1 } @cut;
	my $count = 0;
	print join("\t", @cols[@cutnum])."\n";
	while (<>) {
		chomp;
		my @data = split(/\t/, $_, -1);
		print join("\t", @data[@cutnum])."\n";
		last if $head && $count++ >= $head;
	}
} elsif ($show) {
	my $header = <>;
	chomp($header);
	$header =~ s/^#[A-Z]+# //;
	my @cols = split(/\t/, $header, -1);
	my $c = 1;
	printf "% 6d %s\n", $c++, $_ for @cols;
} elsif ($validate) {
	my $header = <>;
	print $header;
	my (@reference) = split(/\t/, $header);
	my $skipped = 0;
	my $okay = 0;
	my $count = 0;
	while (<>) {
		my @count = split(/\t/, $_, -1);
		if (@count != @reference) {
			$skipped++;
		} else {
			$okay++;
			if ($default) { 
				1 while s/(^|\t)(\n|\t)/$1$default$2/g;
			}
			print;
		}
		last if $head && $count++ >= $head;
	}
	if ($skipped) {
		print STDERR "Discarded $skipped records (kept $okay records)\n";
	}
} elsif ($select) {
	my $header = <>;
	print $header;
	chomp($header);
	my @header = split(/\t/, $header);

	my @my;
	my @assign;

	for my $col (@header) {
		if ($col =~ /^[A-Za-z_]\w+$/) {
			push(@my, "\$$col");
			push(@assign, "\$$col");
		} else {
			push(@assign, 'undef');
		}
	}

	my $assign = join(', ', @assign);
	my $my = join(', ', @my);

	my $_count;
	my $_head = $head;

	my $e = <<END;
		my ($my);
		while (<>) {
			chomp;
			($assign) = split(/\t/, \$_, -1);
			next unless do {
				no warnings;
				$select
			};
			print "\$_\\n";
			last if \$_head && \$_count++ >= \$_head;
		}
END
	# print STDERR $e;
	eval $e;
	die $@ if $@;
} elsif (@grep) {
	my ($cols, $pattern) = @grep;
	my $header = <>;
	chomp($header);
	my @header = split(/\t/, $header);
	my $c = 1;
	my @cols = split(/,/, $cols);
	my %cols = map { ($c => $c, $_ => $c++) } @header;
	for my $col (@cols) {
		die "No column $col" unless $cols{$col};
	}
	my @colnum = map { $cols{$_} - 1 } @cols;
	my $count = 0;
	while (<>) {
		chomp;
		my @data = split(/\t/, $_, -1);
		my $s = join("\t", @data[@colnum]);
		next unless $s =~ /$pattern/o;
		print "$_\n";
		last if $head && $count++ >= $head;
	}
} elsif ($default) {
	my $count = 0;
	while (<>) {
		1 while s/(^|\t)(\n|\t)/$1$default$2/g;
		print;
		last if $head && $count++ >= $head;
	}
} elsif ($head) {
	my $count = 0;
	while (<>) {
		print;
		last if $count++ >= $head;
	}
} else {
	die;
}

1;

__END__

=head1 NAME

 tsv - general tsv file mangler

=head1 USAGE

 tsv [options] file(s)

=head1 OPTIONS

 --show			Display column names
 -f --cut COL,COL,COL	like cut(1) but uses column names
 --rotate		Turn columsn to rows
 --matchup FILE		Rotate, grabbing header from FILE
 --validate		Discards rows that have wrong column count
 -n --head COUNT	Only process COUNT lines
 --default VALUE	Replace empty values with VALUE
 --grep COLS PATTERN	Search for pattern in cols
 --select COLS CODE	Eval code on cols, output if returns true
 --help			Display man page

=head1 DESCRIPTION

tsv is a collection of small tools for manipulating and reporting
on TSV (tab separated values) files.

=over

=item --show

Displays a numbered list of the columns

=item --cut COLUMN_NAME(S)

Outputs a new TSV with just the named columns.
Columns must be named; separate column names with
comma C<,>.

=item --rotate

Turn the columns into rows and the rows into columns.  

=item --matchup FILE

Grab the first row from FILE and then rotate.  Usually this
is used when with grep:

 grep stuff file.tsv | tsv --match file.tsv 

=item --grep COLUMNS PATTERN

Look for PATTERN in COLUMNS (comma separate list, names or numbers)

=item --select CODE

Evaluate CODE.  Each column will be in the variable C<$colname>.
A true value from the code will cause the line to print.  Do not use
C<return>: the code is inside a while loop, not a sub.

=item --validate

Only output rows that have the same number of columns as the
header line.

=item --default VALUE

Replace empty values with VALUE.  This can be combined with --validate.

=item -n --head COUNT

Only process COUNT lines of input.  This can be combined with
all the other options except --show.

=back

=head1 LICENSE

This package may be used and redistributed under the terms of either
the Artistic 2.0 or LGPL 2.1 license.