#!/usr/bin/perl
use warnings;
use strict;
use Getopt::Long;
use Pod::Usage;
### Option varialbes
my $man = 0;
my $help = 0;
my ($sl, $tl);
our $verbose = 1;
my $header = 1;
my $columns = "1,2";
#### ------
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
GetOptions ('help|h' => \$help, 'man' => \$man,
"source=s" => \$sl,
"target=s" => \$tl,
"header" => \$header,
"columns=s" => \$columns,
"verbose|v" => \$verbose)
or pod2usage(2);
pod2usage(1) if $help;
pod2usage(-exitval => 0, -verbose => 2) if $man;
##
our ($c1, $c2);
if ($columns =~ /^(\d+),(\d+)$/) {
($c1,$c2) = ($1,$2);
} else {
die "Columns definition should be a pair of integers: 1,2\n";
}
if (!$sl || !$tl) {
if ($header) {
_log ("No source language or target language defined. Guessing!");
my ($l1, $l2) = readLine();
$sl = $l1 unless defined $sl;
$tl = $l2 unless defined $tl;
} else {
die "No header, and one of the source or target languages not defined!\n";
}
$header = 0;
}
readLine() if $header;
use XML::TMX::Writer;
my $tmx = XML::TMX::Writer->new();
$tmx->start_tmx(id => 'tsv2tmx');
my @r;
while (@r = readLine()) {
$tmx->add_tu($sl=>$r[0],$tl=>$r[1]);
}
$tmx->end_tmx();
sub _log {
say STDERR @_ if $verbose;
}
sub readLine {
my $line = <STDIN>;
if ($line) {
chomp $line;
return (split /\t/, $line)[$c1,$c2]
} else {
return ();
}
}
__END__
=encoding utf-8
=head1 NAME
tsv2tmx - Create a TMX from a TSV file
=head1 SYNOPSIS
tsv2tmx [options]
Options:
--help brief help message
--man full documentation
--verbose | -v activated verbose mode
--sl=EN --tl=PT describe source and target language names
--header treat first line as a heading
--columns=1,2 specify which columns to extract
=head1 OPTIONS
=over 8
=item B<--help>
Print a brief help message and exits.
=item B<--man>
Prints the manual page and exits.
=item B<--verbose> | B<-v>
Activates the verbose mode.
=item B<--sl> | B<--tl>
Use these options to specify the names for the source and target
languages.
=item B<--header>
By default this switch is on, and it means that the TSV file
includes a first line with a heading. If no source or target
language names are specified, the first line will be used to
guess them.
=item B<--columns=1,2>
Specify which columns should be extracted. Needs to be a pair
of integers, separated by a comma. Columns indexes start at 0.
Default to C<1,2>.
=back
=head1 DESCRIPTION
Useful to create translation memories from TSV files, that can be
easily exported from spreadsheet software.
=head1
=head1 SEE ALSO
XML::TMX
=head1 AUTHOR
Alberto Simões, C<< <ambs@cpan.org> >>
=head1 COPYRIGHT AND LICENSE
Copyright 2016 by Projecto Natura
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut