bin/tablify - metacpan.org

#!/usr/bin/env perl

use strict;
use warnings;
use version;
use File::Basename;
use Getopt::Long;
use List::Util 'max';
use Pod::Usage;
use Readonly;
use Text::Autoformat 'autoformat';
use Text::RecordParser;
use Text::TabularDisplay;

Readonly our $VERSION  => 1.15;
Readonly my $DASH      => q{-};
Readonly my $EMPTY_STR => q{};
Readonly my $TAB       => qq{\t};
Readonly my $NEWLINE   => qq{\n};
Readonly my $WIDTH     => 78;

my $comment_start   = '';
my $fs              = $TAB;
my $rs              = $NEWLINE;
my $headers         = '';
my $no_headers      = 0;
my $show_vertically = 0;
my $no_pager        = 0;
my $strip_quotes    = 0;

my ( $show_fields, $list, $limit, @where, $help, $man_page, $show_version );
GetOptions(
    'c|comment:s'  => \$comment_start,
    'fs:s'         => \$fs,
    'rs:s'         => \$rs,
    'f|fields:s'   => \$show_fields,
    'l|list'       => \$list,
    'i|limit:i'    => \$limit,
    'w|where:s'    => \@where,
    'v|vertical'   => \$show_vertically,
    'strip-quotes' => \$strip_quotes,
    'n|no-headers' => \$no_headers,
    'h|headers:s'  => \$headers,
    'no-pager'     => \$no_pager,
    'help'         => \$help,
    'man'          => \$man_page,
    'version'      => \$show_version,
) or pod2usage;

if ( $help || $man_page ) {
    pod2usage({
        -exitval => 0,
        -verbose => $man_page ? 2 : 1
    });
};

if ( $show_version ) {
    my $prog = basename( $0 );
    print "$prog $VERSION\n";
    exit 0;
}

my $file = shift or pod2usage('No file');
my $p    = Text::RecordParser->new(
    field_separator  => $fs,
    record_separator => $rs,
    comment          => $comment_start ? qr/^$comment_start/ : undef,
);

if ( $strip_quotes ) {
    $p->field_filter( sub { s/^["']//; s/["']$//; $_ } );
}

if ( $file eq $DASH ) {
    $p->fh( \*STDIN );
}
else {
    $p->filename( $file );
}

my @fields;
if ( @fields = split( /\s*,\s*/, $headers ) ) {
    $p->bind_fields( @fields );
}
elsif ( !$no_headers ) {
    $p->bind_header;
    @fields = $p->field_list;
}

if ( $list ) {
    pod2usage(q[Can't list fields with --no-headers]) if $no_headers;
    my $tab = Text::TabularDisplay->new('Field No.', 'Field');
    my $i   = 1;
    $tab->add( $i++, $_ ) for @fields;
    print $tab->render, $NEWLINE;
    exit 0;
}

my %where;
for ( @where ) {
    if ( /([\w\d]+)\s*(==|eq|>=?|<=?|=~)\s*(.*)/ ) {
        my $field = $1;
        my $op    = $2;
        my $value = $3;

        unless ( $no_headers ) {
            my %available = map { $_, 1 } @fields;
            next unless $available{ $field };
        }

        $field-- if $field =~ /^\d+$/;
        $where{ $field } = [ $op, $value ];
    }
}

if ( $show_fields ) {
    my @show    = map  { $_ =~ m/^\d+$/ && @fields 
                         ? $_-1 < scalar @fields ? $_ : () : $_ }
                  map  { $_ =~ m/(\d+)-(\d+)/ ? ( $1..$2 ) : $_ }
                  split /,/, $show_fields;
    my @numbers = grep { /^(\d+)$/ } @show;

    if ( $no_headers ) {
        @fields = @show;
    }
    else {
        if ( scalar @show == scalar @numbers ) { # all numbers
            @numbers = map { $_ - 1 } @numbers;
            @fields  = @fields[ @numbers ];
        }
        elsif ( @show ) {
            my %available = map { $_, 1 } @fields;
            my @temp      = @fields;
            @fields       = map { $available{ $_ } ? $_ : () } @show;
        }

        if ( !@fields ) {
            die "No fields match in list '$show_fields'\n";
        }
    }
}

my $fh;
my $pager = $ENV{'PAGER'};
if ( !$no_pager && $pager ) {
    open $fh, "| $pager";
}
else {
    $fh = \*STDOUT;
}

my $tab = Text::TabularDisplay->new( @fields );

my $max_col_length = 0;
my $num_records    = 0;
my $separator      = "************ Record %s ************\n";
if ( $no_headers ) {
    my @field_names;
    RECORD:
    while ( my @data = $p->fetchrow_array ) {
        if ( !@fields ) {
            @fields         = ( 0..$#data );
            @field_names    = map { 'Field' . ($_+1) } @fields;
            $max_col_length = max( map { length $_ } @field_names );
            $tab->columns( @field_names );
        }

        for my $field ( keys %where ) {
            my ( $op, $value ) = @{ $where{ $field } };
            my $cmd = "'$data[ ($field - 1) ]' $op $value";
            next RECORD unless eval $cmd;
        }

        $num_records++;
        if ( $show_vertically ) {
            printf $fh $separator, $num_records;
            for my $i ( @fields ) {
                printf $fh "%${max_col_length}s: %s\n", 
                    $field_names[ $i ], 
                    defined $data[ $i ] ? $data[ $i ] : q{};
            }
        }
        else {
            $tab->add( map { $data[ $_ ] } @fields );
        }

        last if $limit && $num_records >= $limit;
    }
}
else {
    $max_col_length = max map { $_ ? length $_ : 0 } $p->field_list;

    RECORD:
    while ( my $data = $p->fetchrow_hashref ) {
        for my $field ( keys %where ) {
            my ( $op, $value ) = @{ $where{ $field } };
            my $cmd = "'$data->{ $field }' $op $value";
            next RECORD unless eval $cmd;
        }

        $num_records++;
        if ( $show_vertically ) {
            printf $fh $separator, $num_records;
            for my $field ( @fields ) {
                next unless $field;
                my $v = defined $data->{ $field } ? $data->{ $field } : q{};
                if ( length $v > $WIDTH ) {
                    ( $v = autoformat( $v, { left => $max_col_length + 3 } ) )
                        =~ s/^\s+|\s+$//g;
                }

                printf $fh "%${max_col_length}s: %s\n", $field, $v;
            }
        }
        else {
            $tab->add( map { $data->{ $_ } } @fields );
        }
        last if $limit && $num_records >= $limit;
    }
}

if ( !$show_vertically ) {
    print $fh $tab->render;
}

print $fh $num_records
    ? sprintf(
        "\n%s record%s returned\n", $num_records, $num_records > 1 
        ? 's' : $EMPTY_STR 
    )
    : "\nNo records returned\n";

close $fh;

__END__

# -------------------------------------------------------------------

# $Id: tablify,v 1.14 2006/03/07 17:20:00 kclark Exp $

=pod

=head1 NAME

tablify - turn a delimited text file into a text table

=head1 SYNOPSIS

  tablify [options] file

Options:

  -h|--help           Show help
  -c|--comment        Define the beginning of a (single-line) comment 
  -n|--no-headers     Assume first line is data, not headers
  --no-pager          Do not use $ENV{'PAGER'} even if defined
  --strip-quotes      Strip " or ' around fields
  -l|--list           List the fields in the file (for use with -f)
  -f|--fields=f1[,f2] Show only fields in comma-separated list;
                      when used in conjunction with "no-headers"
                      the list should be field numbers (starting at 1);
                      otherwise, should be field names
  -w|where=f<cmp>v    Apply the "cmp" Perl operator to restrict output 
                      where field "f" matches the value "v";  acceptable
                      operators include ==, eq, >, >=, <=, and =~
  -v|--vertical       Show records vertically
  -i|--limit=n        Limit to given number of records
  --fs=x              Use "x" as the field separator 
                      (default is tab "\t")
  --rs=x              Use "x" as the record separator 
                      (default is newline "\n")
  --as-html           Create an HTML table instead of plain text
  --headers           Comma-separated list of names matching 
                      the number of columns

=head1 DESCRIPTION

This script is essentially a quick way to parse a delimited text file
and view it as a nice ASCII table.  By selecting only certain B<fields>,
employing a B<where> clause to only select records where a field matches
some value, and using the B<limit> to only see some of the output, you 
almost have a mini-database front-end for a simple text file.

=head1 EXAMPLES

Given a data file like this:

  name,rank,serial_no,is_living,age
  George,General,190293,0,64
  Dwight,General,908348,0,75
  Attila,Hun,,0,56
  Tojo,Emporor,,0,87
  Tommy,General,998110,1,54

To find the fields you can reference, use the B<list> option:

  $ tablify --fs ',' -l people.dat 
  +-----------+-----------+
  | Field No. | Field     |
  +-----------+-----------+
  | 1         | name      |
  | 2         | rank      |
  | 3         | serial_no |
  | 4         | is_living |
  | 5         | age       |
  +-----------+-----------+

To extract just the name and serial numbers, use the B<fields> option:

  $ tablify --fs ',' -f name,serial_no people.dat 
  +--------+-----------+
  | name   | serial_no |
  +--------+-----------+
  | George | 190293    |
  | Dwight | 908348    |
  | Attila |           |
  | Tojo   |           |
  | Tommy  | 998110    |
  +--------+-----------+
  5 records returned

To extract the first through third fields and the fifth field (where
field numbers start at "1" -- tip: use the B<list> option to quickly 
determine field numbers), use this syntax for B<fields>:

  $ tablify --fs ',' -f 1-3,5 people.dat 
  +--------+---------+-----------+------+
  | name   | rank    | serial_no | age  |
  +--------+---------+-----------+------+
  | George | General | 190293    | 64   |
  | Dwight | General | 908348    | 75   |
  | Attila | Hun     |           | 56   |
  | Tojo   | Emporor |           | 87   |
  | Tommy  | General | 998110    | 54   |
  +--------+---------+-----------+------+
  5 records returned

To select only the ones with six serial numbers, use a B<where> 
clause:

  $ tablify --fs ',' -w 'serial_no=~/^\d{6}$/' people.dat
  +--------+---------+-----------+-----------+------+
  | name   | rank    | serial_no | is_living | age  |
  +--------+---------+-----------+-----------+------+
  | George | General | 190293    | 0         | 64   |
  | Dwight | General | 908348    | 0         | 75   |
  | Tommy  | General | 998110    | 1         | 54   |
  +--------+---------+-----------+-----------+------+
  3 records returned

To find Dwight's record, you would do this:

  $ tablify --fs ',' -w 'name eq "Dwight"' people.dat
  +--------+---------+-----------+-----------+------+
  | name   | rank    | serial_no | is_living | age  |
  +--------+---------+-----------+-----------+------+
  | Dwight | General | 908348    | 0         | 75   |
  +--------+---------+-----------+-----------+------+
  1 record returned

To find the name of all the people with a serial number who are living:

  $ tablify --fs ',' -f name -w 'is_living==1' -w 'serial_no>0' people.dat 
  +-------+
  | name  |
  +-------+
  | Tommy |
  +-------+
  1 record returned

To filter outside of program and simply format the results, use "-" as
the last argument to force reading of STDIN (and probably assume no 
headers):

  $ grep General people.dat | tablify --fs ',' -f 1-3 --no-headers -
  +---------+--------+--------+
  | Field1  | Field2 | Field3 |
  +---------+--------+--------+
  | General | 190293 | 0      |
  | General | 908348 | 0      |
  | General | 998110 | 1      |
  +---------+--------+--------+
  3 records returned

When dealing with data lacking field names, you can specify "no-headers" 
and then refer to fields by number (starting at one), e.g.:

  $ tail -5 people.dat | tablify --fs ',' --no-headers -w '3 eq "General"' -
  +--------+---------+--------+--------+--------+
  | Field1 | Field2  | Field3 | Field4 | Field5 |
  +--------+---------+--------+--------+--------+
  | George | General | 190293 | 0      | 64     |
  | Dwight | General | 908348 | 0      | 75     |
  | Tommy  | General | 998110 | 1      | 54     |
  +--------+---------+--------+--------+--------+
  3 records returned

If your file has many fields which are hard to see across the screen, 
consider using the vertical display with "-v" or "--vertical", e.g.:

  $ tablify --fs ',' -v --limit 1 people.dat
  ************ Record 1 ************
       name: George
       rank: General
  serial_no: 190293
  is_living: 0
       age : 64
  
  1 record returned

=head1 SEE ALSO

=over 4

=item * Text::RecordParser

=item * Text::TabularDisplay

=item * DBD::CSV

Although I don't DBD::CSV this module, the idea was much the inspiration 
for this.  I just didn't want to have to install DBI and DBD::CSV to
get this kind of functionality.  I think my interface is simpler.

=back

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cpan.orgE<gt>.

=head1 LICENSE AND COPYRIGHT

Copyright (C) 2006-10 Ken Youens-Clark.  All rights reserved.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of 
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)