The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

use warnings;
use strict;
use Getopt::Long;
use Pod::Usage;

### Option varialbes
my $man = 0;
my $help = 0;
my ($sl, $tl);
our $verbose = 1;
my $header = 1;
my $columns = "1,2";
#### ------

binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";


GetOptions ('help|h' => \$help, 'man' => \$man,
			"source=s" => \$sl,    
            "target=s" => \$tl,  
            "header"   => \$header,
            "columns=s" => \$columns,
            "verbose|v"  => \$verbose)
or pod2usage(2);

pod2usage(1) if $help;
pod2usage(-exitval => 0, -verbose => 2) if $man;

##

our ($c1, $c2);
if ($columns =~ /^(\d+),(\d+)$/) {
	($c1,$c2) = ($1,$2);
} else {
	die "Columns definition should be a pair of integers: 1,2\n";
}

if (!$sl || !$tl) {
	if ($header) {
		_log ("No source language or target language defined. Guessing!");
		my ($l1, $l2) = readLine();
		$sl = $l1 unless defined $sl;
		$tl = $l2 unless defined $tl;
	} else {
		die "No header, and one of the source or target languages not defined!\n";
	}
	$header = 0;
}

readLine() if $header;

use XML::TMX::Writer;
my $tmx = XML::TMX::Writer->new();
$tmx->start_tmx(id => 'tsv2tmx');	

my @r;
while (@r = readLine()) {
	$tmx->add_tu($sl=>$r[0],$tl=>$r[1]);
}

$tmx->end_tmx();

sub _log {
	say STDERR @_ if $verbose;
}

sub readLine {
	my $line = <STDIN>;
	if ($line) {
		chomp $line;
		return (split /\t/, $line)[$c1,$c2]
	} else {
		return ();
	}
}

__END__

=encoding utf-8

=head1 NAME
 
tsv2tmx - Create a TMX from a TSV file
 
=head1 SYNOPSIS
 
tsv2tmx [options]
 
 Options:
   --help            brief help message
   --man             full documentation
   --verbose | -v    activated verbose mode
   --sl=EN --tl=PT   describe source and target language names
   --header          treat first line as a heading
   --columns=1,2     specify which columns to extract
 
=head1 OPTIONS
 
=over 8
 
=item B<--help>
 
Print a brief help message and exits.
 
=item B<--man>
 
Prints the manual page and exits.
 
=item B<--verbose> | B<-v>

Activates the verbose mode.

=item B<--sl> | B<--tl>

Use these options to specify the names for the source and target
languages.

=item B<--header>

By default this switch is on, and it means that the TSV file
includes a first line with a heading. If no source or target
language names are specified, the first line will be used to
guess them.

=item B<--columns=1,2>

Specify which columns should be extracted. Needs to be a pair
of integers, separated by a comma. Columns indexes start at 0.
Default to C<1,2>.

=back
 
=head1 DESCRIPTION
 
Useful to create translation memories from TSV files, that can be
easily exported from spreadsheet software.

=head1 
 
=head1 SEE ALSO

XML::TMX

=head1 AUTHOR

Alberto Simões, C<< <ambs@cpan.org> >>

=head1 COPYRIGHT AND LICENSE

Copyright 2016 by Projecto Natura

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut