The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

# PODNAME: tsv2tmx
# ABSTRACT: Create a TMX from a TSV file

use warnings;
use strict;
use Getopt::Long;
use Pod::Usage;

### Option varialbes
my $man = 0;
my $help = 0;
my ($sl, $tl);
our $verbose = 1;
my $header = 1;
my $columns = "1,2";
#### ------

binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";


GetOptions ('help|h' => \$help, 'man' => \$man,
			"source=s" => \$sl,    
            "target=s" => \$tl,  
            "header"   => \$header,
            "columns=s" => \$columns,
            "verbose|v"  => \$verbose)
or pod2usage(2);

pod2usage(1) if $help;
pod2usage(-exitval => 0, -verbose => 2) if $man;

##

our ($c1, $c2);
if ($columns =~ /^(\d+),(\d+)$/) {
	($c1,$c2) = ($1,$2);
} else {
	die "Columns definition should be a pair of integers: 1,2\n";
}

if (!$sl || !$tl) {
	if ($header) {
		_log ("No source language or target language defined. Guessing!");
		my ($l1, $l2) = readLine();
		$sl = $l1 unless defined $sl;
		$tl = $l2 unless defined $tl;
	} else {
		die "No header, and one of the source or target languages not defined!\n";
	}
	$header = 0;
}

readLine() if $header;

use XML::TMX::Writer;
my $tmx = XML::TMX::Writer->new();
$tmx->start_tmx(id => 'tsv2tmx');	

my @r;
while (@r = readLine()) {
	$tmx->add_tu($sl=>$r[0],$tl=>$r[1]);
}

$tmx->end_tmx();

sub _log {
	say STDERR @_ if $verbose;
}

sub readLine {
	my $line = <STDIN>;
	if ($line) {
		chomp $line;
		return (split /\t/, $line)[$c1,$c2]
	} else {
		return ();
	}
}

__END__

=pod

=encoding utf-8

=head1 NAME

tsv2tmx - Create a TMX from a TSV file

=head1 VERSION

version 0.36

=head1 SYNOPSIS

tsv2tmx [options]

 Options:
   --help            brief help message
   --man             full documentation
   --verbose | -v    activated verbose mode
   --sl=EN --tl=PT   describe source and target language names
   --header          treat first line as a heading
   --columns=1,2     specify which columns to extract

=head1 DESCRIPTION

Useful to create translation memories from TSV files, that can be
easily exported from spreadsheet software.

=head1 OPTIONS

=over 8

=item B<--help>

Print a brief help message and exits.

=item B<--man>

Prints the manual page and exits.

=item B<--verbose> | B<-v>

Activates the verbose mode.

=item B<--sl> | B<--tl>

Use these options to specify the names for the source and target
languages.

=item B<--header>

By default this switch is on, and it means that the TSV file
includes a first line with a heading. If no source or target
language names are specified, the first line will be used to
guess them.

=item B<--columns=1,2>

Specify which columns should be extracted. Needs to be a pair
of integers, separated by a comma. Columns indexes start at 0.
Default to C<1,2>.

=back

=head1 SEE ALSO

XML::TMX

=head1 AUTHORS

=over 4

=item *

Alberto Simões <ambs@cpan.org>

=item *

José João Almeida <jj@di.uminho.pt>

=back

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2010-2017 by Projeto Natura <natura@di.uminho.pt>.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut