The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl


use strict;
use warnings;
use Config::YAMLMacros::YAML;
use File::Slurp::Remote::SmartOpen;

my $metafile = shift;
die unless -e $metafile;

my $meta = LoadFile($metafile);

my ($file) = sort { $b->{items} <=> $a->{items} } @{$meta->{FILES}};

my @colnames;
my @values;
my $header;
smartopen("$file->{host}:$file->{filename}.header", $header, "r");
while (<$header>) {
	chomp;
	die unless /^(\d+)\t(\S+)$/;
	$colnames[$1-1] = $2;
	$values[$1-1] = {};
}

my %ncol;

my $tsv;
smartopen("$file->{host}:$file->{filename}", $tsv, "r");
while (<$tsv>) {
	chomp;
	my (@data) = split(/\t/, $_, -1);
	for my $i (0..$#data) {
		$values[$i]{$data[$i]}++;
	}
	$ncol{scalar(@data)}++;
}

push(@colnames, 'number of columns');
push(@values, \%ncol);

for my $i (0..$#colnames) {
	my $cn = $colnames[$i];
	my $u = scalar(grep { $_ == 1 } values %{$values[$i]}),
	my $r = scalar(grep { $_ > 1 } values %{$values[$i]});
	printf "%50s %7d uniques, %7d repeaters %s\n", $cn, $u, $r, ($u == 0 && $r == 1 ? "***" : "");
}

__END__

=head1 LICENSE

This package may be used and redistributed under the terms of either
the Artistic 2.0 or LGPL 2.1 license.