use strict;
use warnings;
#use utf8;
use Encode qw( encode decode );
use Getopt::Long;
my $charset = 'utf8';
GetOptions('charset=s', \$charset) or
die "Usage: $0 --charset=utf8 *.txt\n";
#use Encode qw(decode encode);
use JSON::XS;
my $json_xs = JSON::XS->new->utf8;
my @files = map glob, @ARGV;
if (!@files) { die "No input file given.\n"; }
for my $file (@files) {
my $outfile = $file;
if ($outfile !~ s/\.txt$/.json/) {
$outfile .= '.json';
}
process_file($file, $outfile);
}
sub split_by_tab ($);
sub process_file {
my ($infile, $outfile) = @_;
open my $out, ">$outfile" or
die "Cannot open output file $outfile for writing: $!\n";
open my $in, $infile or
die "Cannot open input file $infile for reading: $!\n";
my $first = decode($charset, scalar <$in>);
$first =~ s/[\n\r]+$//g;
my @fields = split_by_tab $first;
while (<$in>) {
$_ = decode($charset, $_);
s/[\n\r]+$//g;
my @vals = split_by_tab $_;
my %data;
for my $i (0..$#fields) {
$data{$fields[$i]} = $vals[$i];
}
print $out $json_xs->encode(\%data), "\n";
}
close $in;
close $out;
warn "$outfile generated.\n";
}
sub split_by_tab ($) {
my ($line) = @_;
my @vals;
while (1) {
if ($line =~ /\G([^\t]*)\t/gc) {
push @vals, $1;
} elsif ($line =~ /\G[^\t]+$/) {
push @vals, $&;
last;
} else {
push @vals, undef;
last;
}
}
map { defined $_ && $_ eq "" ? undef : $_ } @vals;
}