examples/xlscat - metacpan.org

#!/pro/bin/perl

# xlscat:  show XLS/SXC file as Text
# xlsgrep: grep pattern
#	   (m)'17 [2017-02-02] Copyright H.M.Brand 2005-2017

use strict;
use warnings;

our $VERSION = "3.6";

my $is_grep = $0 =~ m/grep$/;

sub usage {
    my $err = shift and select STDERR;
    (my $scrpt = $0) =~ s{.*[\/]}{};
    my $p = $is_grep ? " pattern" : "";
    print
	"usage: $scrpt\t[-s <sep>] [-L] [-n] [-A] [-u] [Selection]$p file.xls\n",
	"             \t[-c | -m]                 [-u] [Selection]$p file.xls\n",
	"             \t -i                            [-S sheets]$p file.xls\n",
	"    Generic options:\n",
	"       -v[#]       Set verbose level (xlscat/xlsgrep)\n",
	"       -d[#]       Set debug   level (Spreadsheet::Read)\n",
	"       -u          Use unformatted values\n",
	"       --strip[=#] Strip leading and/or traing spaces of all cells\n",
	"       --noclip    Do not strip empty sheets and\n",
	"                   trailing empty rows and columns\n",
	"       -e <enc>    Set encoding for input and output\n",
	"       -b <enc>    Set encoding for input\n",
	"       -a <enc>    Set encoding for output\n",
	"    Input CSV:\n",
	"       --in-sep=c  Set input sep_char for CSV\n",
	"    Input XLS:\n",
	"       --dtfmt=fmt Specify the default date format to replace 'm-d-yy'\n",
	"                   the default replacement is 'yyyy-mm-dd'\n",
	"    Output Text (default):\n",
	"       -s <sep>    Use separator <sep>. Default '|', \\n allowed\n",
	"       -L          Line up the columns\n",
	"       -n [skip]   Number lines (prefix with column number)\n",
	"                   optionally skip <skip> (header) lines\n",
	"       -A          Show field attributes in ANSI escapes\n",
	"       -h[#]       Show # header lines\n",
	$is_grep ? (
	"    Grep options:\n",
	"       -i          Ignore case\n",
	"       -w          Match whole words only\n") : (
	"    Output Index only:\n",
	"       -i          Show sheet names and size only\n"),
	"    Output CSV:\n",
	"       -c          Output CSV, separator = ','\n",
	"       -m          Output CSV, separator = ';'\n",
	"    Output HTML:\n",
	"       -H          Output HTML\n",
	"    Selection:\n",
	"       -S <sheets> Only print sheets <sheets>. 'all' is a valid set\n",
	"                   Default only prints the first sheet\n",
	"       -R <rows>   Only print rows    <rows>. Default is 'all'\n",
	"       -C <cols>   Only print columns <cols>. Default is 'all'\n",
	"       -F <flds>   Only fields <flds> e.g. -FA3,B16\n",
	"    Ordering (column numbers in result set *after* selection):\n",
	"       --sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)\n",
	"                   +#   - first # lines do not sort (header)\n",
	"                   #    - order on column # lexical ascending\n",
	"                   #n   - order on column # numeric ascending\n",
	"                   #r   - order on column # lexical descending\n",
	"                   #rn  - order on column # numeric descending\n",
	"\n",
	"Examples:\n",
	"    xlscat -i foo.xls\n",
	"    xlscat --in-sep=: --sort=3n -L /etc/passwd\n";
    @_ and print join "\n", @_, "";
    exit $err;
    } # usage

use Getopt::Long qw(:config bundling noignorecase);
my $opt_c;		# Generate CSV
my $opt_s;		# Text separator
my $opt_S;		# Sheets to print
my $opt_R;		# Rows to print
my $opt_C;		# Columns to print
my $dtfmt;		# Default date-format for Excel
my $opt_F = "";		# Fields to print
my $opt_i = 0;		# Index (cat) | ignore_case (grep)
my $opt_L = 0;		# Auto-size/align columns
my $opt_n;		# Prefix lines with column number
my $opt_u = 0;		# Show unformatted values
my $opt_f = 0;		# Show the formula instead of the value
my $opt_v = 0;		# Verbosity for xlscat
my $opt_d = 0;		# Debug level for Spreadsheet::Read
my $opt_A = 0;		# Show field colors in ANSI escapes
my $opt_H = 0;		# Output in HTML
my $opt_h = 0;		# Number of header lines for grep or -L
my $opt_w = 0;		# Grep words
my $clip  = 1;
my $enc_i;		# Input  encoding
my $enc_o;		# Output encoding
my $sep;		# Input field sep for CSV
GetOptions (
    "help|?"		=> sub { usage (0); },

    # Input CSV
    "c|csv"		=> sub { $opt_c = "," },
    "m|ms"		=> sub { $opt_c = ";" },
    "insepchar".
     "|in-sep".
     "|in-sep-char=s"	=> \$sep,

    # Input XLS
    "dtfmt".
     "|date-format=s"	=> \$dtfmt,
    "f|formulas!"	=> \$opt_f,

    # Output
    "i|index".
     "|ignore-case"	=> \$opt_i,
    "s|separator".
     "|outsepchar".
     "|out-sep".
     "|out-sep-char=s"	=> \$opt_s,
    "S|sheets=s"	=> \$opt_S,
    "R|rows=s"		=> \$opt_R,
    "C|columns=s"	=> \$opt_C,
    "F|fields=s"	=> \$opt_F,
    "L|fit|align!"	=> \$opt_L,
    "n|number:0"	=> \$opt_n,
    "A|ansi|color!"	=> \$opt_A,
    "u|unformatted!"	=> \$opt_u,
    "v|verbose:1"	=> \$opt_v,
    "d|debug:1"		=> \$opt_d,
    "H|html:1"		=> \$opt_H,
      "noclip"		=> sub { $clip = 0 },
      "strip:3"		=> \my $strip,
      "sort=s"		=> \my $sort_order,

    # Encoding
    "e|encoding=s"	=> sub { $enc_i = $enc_o = $_[1] },
    "b|encoding-in=s"	=> \$enc_i,
    "a|encoding-out=s"	=> \$enc_o,

    # Grep
    "w|word!"		=> \$opt_w,
    "h|header:1"	=> \$opt_h,
    ) or usage 1, "GetOpt: $@";

unless ($is_grep) {
$opt_i && $opt_L and usage 1, "Options i and L are mutually exclusive";
$opt_i && $opt_s and usage 1, "Options i and s are mutually exclusive";
$opt_i && $opt_c and usage 1, "Options i and c are mutually exclusive";
$opt_i && $opt_u and usage 1, "Options i and u are mutually exclusive";
$opt_i && $opt_S and usage 1, "Options i and S are mutually exclusive";
$opt_i && $opt_R and usage 1, "Options i and R are mutually exclusive";
$opt_i && $opt_C and usage 1, "Options i and C are mutually exclusive";
$opt_i && $opt_F and usage 1, "Options i and F are mutually exclusive";
$opt_i && $opt_H and usage 1, "Options i and H are mutually exclusive";
}
$opt_c && $opt_s and usage 1, "Options c and s are mutually exclusive";
$opt_c && $opt_H and usage 1, "Options c and H are mutually exclusive";
$opt_s && $opt_H and usage 1, "Options s and H are mutually exclusive";

defined $opt_s or $opt_s = "|"; eval "\$opt_s = qq{$opt_s}";
defined $opt_S or $opt_S = $opt_i || $is_grep ? "all" : "1";
$opt_i && !$is_grep && $opt_v < 1 and $opt_v = 1;
$opt_f and $opt_A++;

if ($opt_c) {
    $opt_L = 0;	# Cannot align CSV
    $opt_c =~ m/^1?$/ and $opt_c = ",";
    $opt_c = Text::CSV_XS->new ({
	binary   => 1,
	sep_char => $opt_c,
	eol      => "\r\n",
	});
    }

# Debugging. Prefer Data::Peek over Data::Dumper if available
{   use Data::Dumper;
    my $dp = 0;
    eval q{
	use Data::Peek;
	$dp = 1;
	};
    sub ddumper {
	$dp ? DDumper (@_)
	    : print STDERR Dumper (@_);
	} # ddumper
    }

my $pattern;
if ($is_grep) {
    $pattern = shift or usage 1;
    $opt_w and $pattern = "\\b$pattern\\b";
    $opt_i and $pattern = "(?i:$pattern)";
    $pattern = qr{$pattern};
    $opt_v > 1 and warn "Matching on $pattern\n";
    }

@ARGV or usage 1;
my $file = shift;
-f $file or usage 1, "the file argument is not a regular file";
-s $file or usage 1, "the file is empty";

use Encode qw( encode decode );
use Spreadsheet::Read;

if ($opt_c) {
    Spreadsheet::Read::parses ("csv") or die "No CSV module found\n";
    eval q{use Text::CSV_XS};
    }
if ($opt_H) {
    $enc_o = "utf-8";
    $opt_H = sub { $_[0]; };
    eval q{
	use HTML::Entities;
	$opt_H = sub {
	    encode_entities (Encode::is_utf8 ($_[0]) ? $_[0] :
		decode ("utf-8", $_[0]));
	    };
	};
    }

my @RDarg = (debug => $opt_d, clip => $clip);
$opt_A         and push @RDarg, attr  => 1;
defined $sep   and push @RDarg, sep   => $sep, parser => "csv";
defined $dtfmt and push @RDarg, dtfmt => $dtfmt;
$strip         and push @RDarg, strip => $strip;
$opt_v > 4 and warn "ReadData ($file, @RDarg);\n";
my $xls = ReadData ($file, @RDarg) or die "cannot read $file\n";
$opt_v > 7 and ddumper ($xls);
my $sc  = $xls->[0]{sheets}	or die "No sheets in $file\n";
$opt_v > 1 and warn "Opened $file with $sc sheets\n";

$opt_S eq "all" and $opt_S = "1..$sc";	# all
$opt_S =~ s/-$/-$sc/;			# 3,6-
$opt_S =~ s/-/../g;
my %print;
eval "%{\$print{sheet}} = map { \$_ => 1 } $opt_S";

my $v_fmt = $opt_C || $opt_R || $opt_F ? "" : "%6d x %6d%s";

# New style xterm (based on ANSI colors):
# 30 Black
# 31 Red
# 32 Green
# 33 Yellow
# 34 Blue
# 35 Magenta
# 36 Cyan
# 37 White
sub color_reduce {
    my ($rgb, $base) = @_;
    defined $rgb or return "";
    my ($r, $g, $b) = map { hex >> 7 }
	($rgb =~ m/^\s*#?([\da-f]{2})([\da-f]{2})([\da-f]{2})/);
    $base + 4 * $b + 2 * $g + $r;
    } # color_reduce

sub ansi_color {
    my ($fg, $bg, $bold, $ul) = @_;

    # warn "$fg on $bg $bold $ul\n";
    my $attr = join ";", 0, grep { /\S/ }
	$bold ? 1 : "",
	$ul   ? 4 : "",
	color_reduce ($fg, 30),
	color_reduce ($bg, 40);

    "\e[${attr}m";
    } # ansi_color

sub css_color {
    my ($fg, $bg, $bold, $ul, $ha) = @_;

    my @css;
    $bold and push @css, "font-weight: bold";
    $ul   and push @css, "text-decoration: underline";
    $fg   and push @css, "color: $fg";
    $bg   and push @css, "background: $bg";
    $ha   and push @css, "text-align: $ha";

    local $" = "; ";
    @css ? qq{ style="@css"} : "";
    } # css_color

	    binmode STDERR, ":utf8";
$enc_o and  binmode STDOUT, ":encoding($enc_o)";

if ($opt_H) {
    print <<EOH;
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
  <title>$file</title>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  <meta name="Author" content="xlscat $VERSION" />
  <style type="text/css">
    body, h2,
    td, th { font-family:     "Nimbus Sans L", "DejaVu Sans",
                              Helvetica, Arial, sans; }
    table  { border-spacing:  2px;
             border-collapse: collapse;               }
    td, th { vertical-align:  top;
             padding:         4px;                    }
    table  > tbody > tr > th,
    table  > tr > th {
             background:      #e0e0e0;                }
    table  > tbody > tr > td:not([class]),
    table  > tr > td:not([class]) {
             background:      #f0f0f0;                }
    .odd   { background:      #e0e0e0;                }
    </style>
  </head>
<body>
EOH
    }

my $name_len = 30;
if ($opt_i) {
    my $nl = 0;
    foreach my $sn (keys %{$xls->[0]{sheet}}) {
	length ($sn) > $nl and $nl = length $sn;
	}
    $nl and $name_len = $nl;
    }
my @opt_F = split m/[^A-Z\d]+/ => $opt_F;
foreach my $si (1 .. $sc) {
    my @data;
    exists $print{sheet}{$si} or next;
    $opt_v > 1 and warn "Opening sheet $si ...\n";
    my $s = $xls->[$si] or next;
    $opt_v > 5 and ddumper ($s);
    my @r = (1, $s->{maxrow});
    my @c = (1, $s->{maxcol});
    my ($sn, $nr, $nc) = ($s->{label}, $r[-1], $c[-1]);
    $opt_v and printf STDERR "%s - %02d: [ %-*s ] %3d Cols, %5d Rows\n",
	$file, $si, $name_len, $sn, $nc, $nr;
    $opt_i && !$is_grep and next;

    if (@opt_F) {
	foreach my $fld (@opt_F) {
	    $is_grep && defined $s->{$fld} && $s->{$fld} !~ $pattern and next;
	    print "$fld:",$s->{$fld},"\n";
	    }
	next;
	}

    if (my $rows = $opt_R) {
	$rows eq "all" and $rows = "1..$nr";	# all
	$rows =~ s/-$/-$nr/;			# 3,6-
	$rows =~ s/-/../g;
	eval "%{\$print{row}} = map { \$_ => 1 } $rows";
	}
    if (my $cols = $opt_C) {
	$cols eq "all" and $cols = "1..$nc";	# all
	if ($cols =~ m/[A-Za-z]/) {		# -C B,D => -C 2,4
	    my %ct = map {
		my ($cc, $rr) = cell2cr (uc "$_".1);
		($_ => $cc)
		} ($cols =~ m/([a-zA-Z]+)/g);
	    $cols =~ s/([A-Za-z]+)/$ct{$1}/g;
	    }
	$cols =~ s/-$/-$nc/;			# 3,6-
	$cols =~ s/-/../g;
	eval "\$print{col} = [ map { \$_ - 1  } $cols ]";
	$nc = @{$print{col}};
	}
    $opt_v >= 8 and ddumper (\%print);

    $opt_H and print qq{<h2>$sn</h2>\n\n<table border="1">\n};
    my $undef = $opt_v > 2 ? "-- undef --" : "";
    my ($h, @w) = (0, (0) x $nc); # data height, -width, and default column widths
    my @align = ("") x $nc;
    foreach my $r ($r[0] .. $r[1]) {
	exists $print{row} && !exists $print{row}{$r} and next;
	my @att;
	my @row = map {
	    my $cell = cr2cell ($_, $r);
	    my ($uval, $fval) = map {
		defined $_ ? $enc_i ? decode ($enc_i, $_) : $_ : $undef
		} $s->{cell}[$_][$r], $s->{$cell};
	    $opt_v > 2 and warn "$_:$r '$uval' / '$fval'\n";
	    $opt_A and 
		push @att, [ @{$s->{attr}[$_][$r]}{qw( fgcolor bgcolor bold uline halign )} ];
	    $opt_f && $s->{attr}[$_][$r]{formula}
		? "=".$s->{attr}[$_][$r]{formula}
		: defined $s->{cell}[$_][$r] ? $opt_u ? $uval : $fval : "";
	    } $c[0] .. $c[1];
	exists $print{col} and @row = @row[grep{$_<@row}@{$print{col}}];
	$is_grep && $r > $opt_h &&
	    ! grep { defined $_ && $_ =~ $pattern } @row and next;
	if ($opt_L) {
	    foreach my $c (0 .. $#row) {
		my $l = length $row[$c];
		$l > $w[$c] and $w[$c] = $l;
		$row[$c] =~ m/\D/ and $align[$c] = "-";
		}
	    }
	if ($opt_H) {	# HTML
	    print "  <tr>";
	    if (defined $opt_n) {
		my $x = $r - $opt_n;
		$x <= 0 and $x = "";
		my $c = $r % 2 ? qq{ class="odd"} : "";
		print qq{<td style="text-align: right" $c>$x</td>};
		}
	    foreach my $c (0 .. $#row) {
		my $css = css_color (@{$att[$c]});
		$r % 2 and $css .= qq{ class="odd"};
		my $td  = $opt_H->($row[$c]);
		print "<td$css>$td</td>";
		}
	    print "</tr>\n";
	    next;
	    }
	if ($opt_c) {	# CSV
	    $opt_c->print (*STDOUT, \@row) or die $opt_c->error_diag;
	    next;
	    }
	if (defined $opt_n) {
	    unshift @row, $r;
	    unshift @att, [ "#ffffff", "#000000", 0, 0 ];
	    }
	if ($opt_L || $sort_order) {	# Autofit / Align / order
	    push @data, [ [ @row ], [ @att ] ];
	    next;
	    }
	if ($opt_A) {
	    foreach my $c (0 .. $#row) {
		$row[$c] =
		    ansi_color (@{$att[$c]}).
		    $row[$c] .
		    "\e[0m";
		}
	    }
	line ($opt_s => @row);
	} continue {
	    ++$h % 100 == 0 && $opt_v and printf STDERR $v_fmt, $nc, $h, "\r";
	    }
    $opt_H and print "  </table>\n\n";
    $v_fmt and printf STDERR $v_fmt, $nc, $h, "\n";
    if ($sort_order) {
	my @o;
	my @h;
	$sort_order =~ s/\+([0-9]+)\b// and @h = splice @data, 0, $1;
	for ($sort_order =~ m/([0-9]+[rn]*)/g) {
	    m/^([0-9]+)(.*)/;
	    push @o, { col => $1 - 1, map { $_ => 1 } split m// => $2 };
	    }
	my $sort = sub {
	    my $d = 0;
	    foreach my $o (@o) {
		my ($A, $B) = map { $_->[0][$o->{col}] || 0 } $a, $b;
		$d = $o->{n}
		    ? $o->{r} ? $B <=> $A : $A <=> $B
		    : $o->{r} ? $B cmp $A : $A cmp $B
			and return $d;
		}
	    return $d;
	    };
	@data = (@h, sort $sort @data);
	}
    $opt_L || $sort_order or next;
    if (defined $opt_n) {
	unshift @w, length $data[-1][0][0];
	unshift @align, "";
	}
    $opt_n = 0;
    for (@data) {
	my ($row, $att) = @$_;
	my @row = @$row;
	for (0 .. $#row) {
	    my $l = length $row[$_];
	    my $w = $l < $w[$_] ? " " x ($w[$_] - $l) : "";
	    if ($align[$_]) {
		$row[$_] .= $w;
		}
	    else {
		substr $row[$_], 0, 0, $w;
		}
	    if ($opt_A) {
		substr $row[$_], 0, 0, ansi_color (@{$att->[$_]});
		$row[$_] .= "\e[0m";
		}
	    }
	line ("|" => @row);
	++$opt_n == $opt_h and line ("+", map {"-"x$w[$_]} 0..$#row);
	}
    }
$opt_H and print "</body>\n</html>\n";

sub line {
    my $sep  = shift;
    my $line = join $sep => @_;
    !$enc_o && Encode::is_utf8 ($line) and $line = encode ("utf-8", $line);
    print "$line\n";
    } # show_line

END {
    if ($opt_v >= 7) {
	my %seen;
	print "\nNon-CORE modules loaded:\n", "-" x 25, " ", "--------\n";
	foreach my $mod (sort keys %INC) {
	    my $path = $INC{$mod};
	    $mod =~ s{\.pm$}{} or next;
	    $mod =~ s{/}{::}g;
	    $path =~ s{.*/site_perl/}{} or next;
	    grep { $mod =~ m/^${_}::/ } keys %seen and next;
	    $seen{$mod}++;
	    my $v = $mod->VERSION () || eval "\$${mod}::VERSION" || "?";
	    printf "%-25s %s\n", $mod, $v;
	    }
	}
    }
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)