#!/pro/bin/perl
# xlscat: show XLS/SXC file as Text
# xlsgrep: grep pattern
# (m)'15 [24-08-2015] Copyright H.M.Brand 2005-2016
use strict;
use warnings;
our $VERSION = "3.2";
my $is_grep = $0 =~ m/grep$/;
sub usage
{
my $err = shift and select STDERR;
(my $scrpt = $0) =~ s{.*[\/]}{};
my $p = $is_grep ? " pattern" : "";
print
"usage: $scrpt\t[-s <sep>] [-L] [-n] [-A] [-u] [Selection]$p file.xls\n",
" \t[-c | -m] [-u] [Selection]$p file.xls\n",
" \t -i [-S sheets]$p file.xls\n",
" Generic options:\n",
" -v[#] Set verbose level (xlscat/xlsgrep)\n",
" -d[#] Set debug level (Spreadsheet::Read)\n",
" -u Use unformatted values\n",
" --noclip Do not strip empty sheets and\n",
" trailing empty rows and columns\n",
" -e <enc> Set encoding for input and output\n",
" -b <enc> Set encoding for input\n",
" -a <enc> Set encoding for output\n",
" Input CSV:\n",
" --in-sep=c Set input sep_char for CSV\n",
" Input XLS:\n",
" --dtfmt=fmt Specify the default date format to replace 'm-d-yy'\n",
" the default replacement is 'yyyy-mm-dd'\n",
" Output Text (default):\n",
" -s <sep> Use separator <sep>. Default '|', \\n allowed\n",
" -L Line up the columns\n",
" -n [skip] Number lines (prefix with column number)\n",
" optionally skip <skip> (header) lines\n",
" -A Show field attributes in ANSI escapes\n",
" -h[#] Show # header lines\n",
$is_grep ? (
" Grep options:\n",
" -i Ignore case\n",
" -w Match whole words only\n") : (
" Output Index only:\n",
" -i Show sheet names and size only\n"),
" Output CSV:\n",
" -c Output CSV, separator = ','\n",
" -m Output CSV, separator = ';'\n",
" Output HTML:\n",
" -H Output HTML\n",
" Selection:\n",
" -S <sheets> Only print sheets <sheets>. 'all' is a valid set\n",
" Default only prints the first sheet\n",
" -R <rows> Only print rows <rows>. Default is 'all'\n",
" -C <cols> Only print columns <cols>. Default is 'all'\n",
" -F <flds> Only fields <flds> e.g. -FA3,B16\n",
" Ordering (column numbers in result set *after* selection):\n",
" --sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)\n",
" +# - first # lines do not sort (header)\n",
" # - order on column # lexical ascending\n",
" #n - order on column # numeric ascending\n",
" #r - order on column # lexical descending\n",
" #rn - order on column # numeric descending\n",
"\n",
"Examples:\n",
" xlscat -i foo.xls\n",
" xlscat --in-sep=: --sort=3n -L /etc/passwd\n";
@_ and print join "\n", @_, "";
exit $err;
} # usage
use Getopt::Long qw(:config bundling noignorecase);
my $opt_c; # Generate CSV
my $opt_s; # Text separator
my $opt_S; # Sheets to print
my $opt_R; # Rows to print
my $opt_C; # Columns to print
my $dtfmt; # Default date-format for Excel
my $opt_F = ""; # Fields to print
my $opt_i = 0; # Index (cat) | ignore_case (grep)
my $opt_L = 0; # Auto-size/align columns
my $opt_n; # Prefix lines with column number
my $opt_u = 0; # Show unformatted values
my $opt_v = 0; # Verbosity for xlscat
my $opt_d = 0; # Debug level for Spreadsheet::Read
my $opt_A = 0; # Show field colors in ANSI escapes
my $opt_H = 0; # Output in HTML
my $opt_h = 0; # Number of header lines for grep or -L
my $opt_w = 0; # Grep words
my $clip = 1;
my $enc_i; # Input encoding
my $enc_o; # Output encoding
my $sep; # Input field sep for CSV
GetOptions (
"help|?" => sub { usage (0); },
# Input CSV
"c|csv" => sub { $opt_c = "," },
"m|ms" => sub { $opt_c = ";" },
"insepchar".
"|in-sep".
"|in-sep-char=s" => \$sep,
# Input XLS
"dtfmt".
"|date-format=s" => \$dtfmt,
# Output
"i|index".
"|ignore-case" => \$opt_i,
"s|separator".
"|outsepchar".
"|out-sep".
"|out-sep-char=s" => \$opt_s,
"S|sheets=s" => \$opt_S,
"R|rows=s" => \$opt_R,
"C|columns=s" => \$opt_C,
"F|fields=s" => \$opt_F,
"L|fit|align!" => \$opt_L,
"n|number:0" => \$opt_n,
"A|ansi|color!" => \$opt_A,
"u|unformatted!" => \$opt_u,
"v|verbose:1" => \$opt_v,
"d|debug:1" => \$opt_d,
"H|html:1" => \$opt_H,
"noclip" => sub { $clip = 0 },
"sort=s" => \my $sort_order,
# Encoding
"e|encoding=s" => sub { $enc_i = $enc_o = $_[1] },
"b|encoding-in=s" => \$enc_i,
"a|encoding-out=s" => \$enc_o,
# Grep
"w|word!" => \$opt_w,
"h|header:1" => \$opt_h,
) or usage 1, "GetOpt: $@";
unless ($is_grep) {
$opt_i && $opt_L and usage 1, "Options i and L are mutually exclusive";
$opt_i && $opt_s and usage 1, "Options i and s are mutually exclusive";
$opt_i && $opt_c and usage 1, "Options i and c are mutually exclusive";
$opt_i && $opt_u and usage 1, "Options i and u are mutually exclusive";
$opt_i && $opt_S and usage 1, "Options i and S are mutually exclusive";
$opt_i && $opt_R and usage 1, "Options i and R are mutually exclusive";
$opt_i && $opt_C and usage 1, "Options i and C are mutually exclusive";
$opt_i && $opt_F and usage 1, "Options i and F are mutually exclusive";
$opt_i && $opt_H and usage 1, "Options i and H are mutually exclusive";
}
$opt_c && $opt_s and usage 1, "Options c and s are mutually exclusive";
$opt_c && $opt_H and usage 1, "Options c and H are mutually exclusive";
$opt_s && $opt_H and usage 1, "Options s and H are mutually exclusive";
defined $opt_s or $opt_s = "|"; eval "\$opt_s = qq{$opt_s}";
defined $opt_S or $opt_S = $opt_i || $is_grep ? "all" : "1";
$opt_i && !$is_grep && $opt_v < 1 and $opt_v = 1;
if ($opt_c) {
$opt_L = 0; # Cannot align CSV
$opt_c =~ m/^1?$/ and $opt_c = ",";
$opt_c = Text::CSV_XS->new ({
binary => 1,
sep_char => $opt_c,
eol => "\r\n",
});
}
# Debugging. Prefer Data::Peek over Data::Dumper if available
{ use Data::Dumper;
my $dp = 0;
eval q{
use Data::Peek;
$dp = 1;
};
sub ddumper
{
$dp ? DDumper (@_)
: print STDERR Dumper (@_);
} # ddumper
}
my $pattern;
if ($is_grep) {
$pattern = shift or usage 1;
$opt_w and $pattern = "\\b$pattern\\b";
$opt_i and $pattern = "(?i:$pattern)";
$pattern = qr{$pattern};
$opt_v > 1 and warn "Matching on $pattern\n";
}
@ARGV or usage 1;
my $file = shift;
-f $file or usage 1, "the file argument is not a regular file";
-s $file or usage 1, "the file is empty";
use Encode qw( encode decode );
use Spreadsheet::Read;
if ($opt_c) {
Spreadsheet::Read::parses ("csv") or die "No CSV module found\n";
eval q{use Text::CSV_XS};
}
if ($opt_H) {
$enc_o = "utf-8";
$opt_H = sub { $_[0]; };
eval q{
use HTML::Entities;
$opt_H = sub {
encode_entities (Encode::is_utf8 ($_[0]) ? $_[0] :
decode ("utf-8", $_[0]));
};
};
}
my @RDarg = (debug => $opt_d, clip => $clip);
$opt_A and push @RDarg, "attr" => 1;
defined $sep and push @RDarg, "sep" => $sep, parser => "csv";
defined $dtfmt and push @RDarg, "dtfmt" => $dtfmt;
$opt_v > 4 and warn "ReadData ($file, @RDarg);\n";
my $xls = ReadData ($file, @RDarg) or die "cannot read $file\n";
$opt_v > 7 and ddumper ($xls);
my $sc = $xls->[0]{sheets} or die "No sheets in $file\n";
$opt_v > 1 and warn "Opened $file with $sc sheets\n";
$opt_S eq "all" and $opt_S = "1..$sc"; # all
$opt_S =~ s/-$/-$sc/; # 3,6-
$opt_S =~ s/-/../g;
my %print;
eval "%{\$print{sheet}} = map { \$_ => 1 } $opt_S";
my $v_fmt = $opt_C || $opt_R || $opt_F ? "" : "%6d x %6d%s";
# New style xterm (based on ANSI colors):
# 30 Black
# 31 Red
# 32 Green
# 33 Yellow
# 34 Blue
# 35 Magenta
# 36 Cyan
# 37 White
sub color_reduce
{
my ($rgb, $base) = @_;
defined $rgb or return "";
my ($r, $g, $b) = map { hex >> 7 }
($rgb =~ m/^\s*#?([\da-f]{2})([\da-f]{2})([\da-f]{2})/);
$base + 4 * $b + 2 * $g + $r;
} # color_reduce
sub ansi_color
{
my ($fg, $bg, $bold, $ul) = @_;
# warn "$fg on $bg $bold $ul\n";
my $attr = join ";", 0, grep { /\S/ }
$bold ? 1 : "",
$ul ? 4 : "",
color_reduce ($fg, 30),
color_reduce ($bg, 40);
"\e[${attr}m";
} # ansi_color
sub css_color
{
my ($fg, $bg, $bold, $ul, $ha) = @_;
my @css;
$bold and push @css, "font-weight: bold";
$ul and push @css, "text-decoration: underline";
$fg and push @css, "color: $fg";
$bg and push @css, "background: $bg";
$ha and push @css, "text-align: $ha";
local $" = "; ";
@css ? qq{ style="@css"} : "";
} # css_color
binmode STDERR, ":utf8";
$enc_o and binmode STDOUT, ":encoding($enc_o)";
if ($opt_H) {
print <<EOH;
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<title>$file</title>
<meta name="Author" content="xlscat $VERSION" />
<style type="text/css">
body, h2,
td, th { font-family: "Nimbus Sans L", "DejaVu Sans",
Helvetica, Arial, sans; }
table { border-spacing: 2px;
border-collapse: collapse; }
td, th { vertical-align: top;
padding: 4px; }
table > tbody > tr > th,
table > tr > th {
background: #e0e0e0; }
table > tbody > tr > td:not([class]),
table > tr > td:not([class]) {
background: #f0f0f0; }
.odd { background: #e0e0e0; }
</style>
</head>
<body>
EOH
}
my $name_len = 30;
if ($opt_i) {
my $nl = 0;
foreach my $sn (keys %{$xls->[0]{sheet}}) {
length ($sn) > $nl and $nl = length $sn;
}
$nl and $name_len = $nl;
}
my @opt_F = split m/[^A-Z\d]+/ => $opt_F;
foreach my $si (1 .. $sc) {
my @data;
exists $print{sheet}{$si} or next;
$opt_v > 1 and warn "Opening sheet $si ...\n";
my $s = $xls->[$si] or next;
$opt_v > 5 and ddumper ($s);
my @r = (1, $s->{maxrow});
my @c = (1, $s->{maxcol});
my ($sn, $nr, $nc) = ($s->{label}, $r[-1], $c[-1]);
$opt_v and printf STDERR "%s - %02d: [ %-*s ] %3d Cols, %5d Rows\n",
$file, $si, $name_len, $sn, $nc, $nr;
$opt_i && !$is_grep and next;
if (@opt_F) {
foreach my $fld (@opt_F) {
$is_grep && defined $s->{$fld} && $s->{$fld} !~ $pattern and next;
print "$fld:",$s->{$fld},"\n";
}
next;
}
if (my $rows = $opt_R) {
$rows eq "all" and $rows = "1..$nr"; # all
$rows =~ s/-$/-$nr/; # 3,6-
$rows =~ s/-/../g;
eval "%{\$print{row}} = map { \$_ => 1 } $rows";
}
if (my $cols = $opt_C) {
$cols eq "all" and $cols = "1..$nc"; # all
if ($cols =~ m/[A-Za-z]/) { # -C B,D => -C 2,4
my %ct = map {
my ($cc, $rr) = cell2cr (uc "$_".1);
($_ => $cc)
} ($cols =~ m/([a-zA-Z]+)/g);
$cols =~ s/([A-Za-z]+)/$ct{$1}/g;
}
$cols =~ s/-$/-$nc/; # 3,6-
$cols =~ s/-/../g;
eval "\$print{col} = [ map { \$_ - 1 } $cols ]";
$nc = @{$print{col}};
}
$opt_v >= 8 and ddumper (\%print);
$opt_H and print qq{<h2>$sn</h2>\n\n<table border="1">\n};
my $undef = $opt_v > 2 ? "-- undef --" : "";
my ($h, @w) = (0, (0) x $nc); # data height, -width, and default column widths
my @align = ("") x $nc;
foreach my $r ($r[0] .. $r[1]) {
exists $print{row} && !exists $print{row}{$r} and next;
my @att;
my @row = map {
my $cell = cr2cell ($_, $r);
my ($uval, $fval) = map {
defined $_ ? $enc_i ? decode ($enc_i, $_) : $_ : $undef
} $s->{cell}[$_][$r], $s->{$cell};
$opt_v > 2 and warn "$_:$r '$uval' / '$fval'\n";
$opt_A and
push @att, [ @{$s->{attr}[$_][$r]}{qw( fgcolor bgcolor bold uline halign )} ];
defined $s->{cell}[$_][$r] ? $opt_u ? $uval : $fval : "";
} $c[0] .. $c[1];
exists $print{col} and @row = @row[grep{$_<@row}@{$print{col}}];
$is_grep && $r > $opt_h &&
! grep { defined $_ && $_ =~ $pattern } @row and next;
if ($opt_L) {
foreach my $c (0 .. $#row) {
my $l = length $row[$c];
$l > $w[$c] and $w[$c] = $l;
$row[$c] =~ m/\D/ and $align[$c] = "-";
}
}
if ($opt_H) { # HTML
print " <tr>";
if (defined $opt_n) {
my $x = $r - $opt_n;
$x <= 0 and $x = "";
my $c = $r % 2 ? qq{ class="odd"} : "";
print qq{<td style="text-align: right" $c>$x</td>};
}
foreach my $c (0 .. $#row) {
my $css = css_color (@{$att[$c]});
$r % 2 and $css .= qq{ class="odd"};
my $td = $opt_H->($row[$c]);
print "<td$css>$td</td>";
}
print "</tr>\n";
next;
}
if ($opt_c) { # CSV
$opt_c->print (*STDOUT, \@row) or die $opt_c->error_diag;
next;
}
if (defined $opt_n) {
unshift @row, $r;
unshift @att, [ "#ffffff", "#000000", 0, 0 ];
}
if ($opt_L || $sort_order) { # Autofit / Align / order
push @data, [ [ @row ], [ @att ] ];
next;
}
if ($opt_A) {
foreach my $c (0 .. $#row) {
$row[$c] =
ansi_color (@{$att[$c]}).
$row[$c] .
"\e[0m";
}
}
line ($opt_s => @row);
} continue {
++$h % 100 == 0 && $opt_v and printf STDERR $v_fmt, $nc, $h, "\r";
}
$opt_H and print " </table>\n\n";
$v_fmt and printf STDERR $v_fmt, $nc, $h, "\n";
if ($sort_order) {
my @o;
my @h;
$sort_order =~ s/\+([0-9]+)\b// and @h = splice @data, 0, $1;
for ($sort_order =~ m/([0-9]+[rn]*)/g) {
m/^([0-9]+)(.*)/;
push @o, { col => $1 - 1, map { $_ => 1 } split m// => $2 };
}
my $sort = sub {
my $d = 0;
foreach my $o (@o) {
my ($A, $B) = map { $_->[0][$o->{col}] || 0 } $a, $b;
$d = $o->{n}
? $o->{r} ? $B <=> $A : $A <=> $B
: $o->{r} ? $B cmp $A : $A cmp $B
and return $d;
}
return $d;
};
@data = (@h, sort $sort @data);
}
$opt_L || $sort_order or next;
if (defined $opt_n) {
unshift @w, length $data[-1][0][0];
unshift @align, "";
}
$opt_n = 0;
for (@data) {
my ($row, $att) = @$_;
my @row = @$row;
for (0 .. $#row) {
my $w = " " x ($w[$_] - length $row[$_]);
if ($align[$_]) {
$row[$_] .= $w;
}
else {
substr $row[$_], 0, 0, $w;
}
if ($opt_A) {
substr $row[$_], 0, 0, ansi_color (@{$att->[$_]});
$row[$_] .= "\e[0m";
}
}
line ("|" => @row);
++$opt_n == $opt_h and line ("+", map {"-"x$w[$_]} 0..$#row);
}
}
$opt_H and print "</body>\n</html>\n";
sub line
{
my $sep = shift;
my $line = join $sep => @_;
!$enc_o && Encode::is_utf8 ($line) and $line = encode ("utf-8", $line);
print "$line\n";
} # show_line