The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w

# Copyright (c) 2002, Sam Vilain.  All rights reserved. This program
# is free software; you may use it under the same terms as Perl
# itself.

package Lingua::Translate::SysTran;

use strict;
use Carp;

# package globals:
# %config is default values to use for new objects
# %servers is a hash from a translation pair to a hostname/port number
# %one_letter_codes is actually a constant, it is for default port
# number calculation
use vars qw($VERSION %config %servers %one_letter_codes);

# WARNING: Some constants have their default values extracted from the
# POD.  See the Pod::Constants man page.

=head1 NAME

Lingua::Translate::SysTrans - Translation back-end for SysTran's
                              enterprise translation server, version
                              0.01 (European languages only)

=head1 SYNOPSIS

 use Lingua::Translate;

 Lingua::Translate::config
     (
       back_end => "SysTran",
       host => "babelfish.mydomainname.com",
     );

 my $xl8r = Lingua::Translate->new(src => "de", dest => "en");

 # prints "My hovercraft is full of eels"
 print $xl8r->translate("Mein Luftkissenfahrzeug ist voll von den Aalen");

=head1 DESCRIPTION

Lingua::Translate::SysTran is a translation back-end for
Lingua::Translate that contacts a SysTran translation server to do the
real work.

You should try to avoid putting the config() command that sets the
location of the server in all of your scripts; make a little
configuration module or put it in a script you can `require'.

=head1 CONSTRUCTOR

=head2 new(src => $lang, dest => lang, option => $value)

Creates a new translation handle.  This won't initiate a connection
until you try to translate something.

=over

=item src

Source language, in RFC-3066 form.  See L<I18N::LangTags> for a
discussion of RFC-3066 language tags.

=item dest

Destination Language

=item host

Specify the host to contact

=item port

Specify the port number

=back

=cut

use I18N::LangTags qw(is_language_tag);

sub new {
    my ($class, %options) = (@_);

    my $self = bless { %config }, $class;

    croak "Must supply source and destination language"
	unless (defined $options{src} and defined $options{dest});

    is_language_tag($self->{src} = delete $options{src})
	or croak "$self->{src} is not a valid RFC3066 language tag";

    is_language_tag($self->{dest} = delete $options{dest})
	or croak "$self->{dest} is not a valid RFC3066 language tag";

    $self->config(%options);

    $self->{pair} = $self->{src} . "_" . $self->{dest};

    my $custom_port = $servers{$self->{pair}};

    if ( defined $custom_port ) {
	($self->{host}, $self->{port})
	    = ($custom_port =~ m/^(.*)(?: (:\d+) )$/);
    }

    $self->{port} ||= _default_port($self->{pair});

    return $self;
}

=head1 METHODS

The following methods may be called on Lingua::Translate::SysTran
objects.

=head2 translate($text) : $translated

Translates the given text.  die's on any kind of error.

=cut

use IO::Socket;
BEGIN {
    # use Unicode::MapUTF8 if it is available
    eval "use Unicode::MapUTF8 qw(from_utf8 to_utf8);";
    if ( $@ ) {
	eval 'no strict; sub from_utf8 { %a=(@_); $a{"-string"} } '.
	    '*{to_utf8} = \&from_utf8';
    }
};

sub translate {
    my $self = shift;
    UNIVERSAL::isa($self, __PACKAGE__)
	    or croak __PACKAGE__."::translate() called as function";

    # every back-end we know of speaks ISO-8859-1
    my $text = from_utf8( -string => (shift),
			  -charset => "iso-8859-1" );

    my $translated;

    my $request = (
		"METHOD=SOCKET\n".
		"ACTION=TRANSLATE\n".
		"SOURCE-CONTENT=".length($text)."\n".
		"$text\n"
	       );

    my $socket = IO::Socket::INET->new
	(
	 Proto    => 'tcp',
	 PeerAddr => $self->{host},
	 PeerPort => $self->{port},
	 Reuse    => 1,
	);

    $self->_barf("Connection failed; $!") unless $socket;

    ## Sending request
    $socket->write($request, length($request))
	|| $self->_barf ('write failed; '.$!);

    $socket->flush;

    ## Then waiting for answer
    my ($error, $error_message, $time);
    while ($_ = $socket->getline()) {
	my ($command, $value) = (m/^([\w\-]+)=(.*)$/)
	    or $self->_barf("protocol error");

	if ( $command eq "ERR" ) {
	    $error = $value;
	} elsif ( $command eq "TIME" ) {
	    $time = $value;
	} elsif ( $command eq "EMSG" ) {
	    $error_message = $value;
	} elsif ( $command eq "OUTPUT-CONTENT" ) {
	    # data always follows
	    my $bytes_read = $socket->read($translated, $value);
	    ($bytes_read == $value)
		or $self->_barf("short read");
	    last;
	} else {
	    $self->_barf("protocol mismatch; $command");
	}
    }

    # close connection
    $socket->close;

    $self->_barf($error_message) if $error;

    # trim excess line feeds at end of string
    $translated =~ s/\n*$//;

    return to_utf8( -string => $translated,
		    -charset => "iso-8859-1" );
}

sub _barf {
    my $self = shift;
    my $message = shift;

    die ($message . " talking to $self->{host}:$self->{port} "
	 .$self->{pair} );

}

=head2 available() : @list

Returns a list of available language pairs, in the form of "XX_YY",
where XX is the source language and YY is the destination.  If you
want the english name of a language tag, call
I18N::LangTags::List::name() on it.  See L<I18N::LangTags::List>.

If you call this function without configuring the package, it returns
all of the languages that there are known back-ends for.

=cut

sub available {

    my $self = shift;
    UNIVERSAL::isa($self, __PACKAGE__)
	    or croak __PACKAGE__."::available() called as function";

    my @a = keys %one_letter_codes;

    # English; "the new universal language?"
    # mi spitu fo le bango pe le glico
    return (
	    keys %servers ||
	    grep /en/, ( map { my $a=$_; map{"${_}_$a"} my @a } @a )
	   );

}

=head1 CONFIGURATION FUNCTIONS

=head2 config(option => $value)

This function sets defaults for use when constructing objects.

=cut

sub config {

    my $self;
    if ( UNIVERSAL::isa($_[0], __PACKAGE__) ) {
        $self = shift;
    } else {
	$self = \%config;
    }

    while ( my ($option, $value) = splice @_, 0, 2 ) {

	if ( $option eq "pairs" ) {

	    # configure a pair
	    while ( my ($pair, $server) = each %$value ) {
		$servers{$pair} = $server;
	    }

	} elsif ( $option =~ m/^(host|port)$/) {

	    # configure host/port
	    $self->{$option} = $value;

	} else {

	    croak "Unknown configuration option $option";
	}
    }
}

=over

=item host

Defines the hostname to use if no hostname/port is defined for a
language pair.  The default value is "localhost".  Do not specify a
port number.

=item servers

The value to this configuration option must be a hash reference from a
language pair (in XX_YY form) to a hostname, optionally followed by a
colon and a port number.

If this configuration option is defined, then attempts to translate
undefined languages will fail.  There is no default value for this
option.

=back

=head1 A Note on default port numbers

Returns the host name and port number for the given language pair.

To determine the default port number, take the one-letter code for the
language from the below table, express as a number in base 25 (A=0,
B=1, etc) and then add 10000 decimal.  Eg en => de would be EG, which
is 106 decimal, or port 10106.

=head2 ONE LETTER LANGUAGE CODES

 en => E
 de => G
 it => I
 fr => F
 pt => P
 es => S
 el => K

=cut

sub _default_port {
    my $pair = shift;

    my ($src, $tgt) =
	($pair =~ m/^(..)_(..)/)
	    or croak "$pair is not a valid language pair";

    # FIXME - won't work on EBCDIC systems
    my $A = ord("A");
    my $num = ( (ord($one_letter_codes{$src}) - $A) * 25
		+ord($one_letter_codes{$tgt}) - $A       );

    return $num + 10000;
}

# extract configuration options from the POD
use Pod::Constants
    'NAME' => sub { ($VERSION) = (m/(\d+\.\d+)/); },
    'CONFIGURATION FUNCTIONS' => sub {
	Pod::Constants::add_hook
		('*item' => sub {
		     my ($varname) = m/(\w+)/;
		     #my ($default) = m/The default value is\s+"(.*)"\./s;
		     my ($default) = m/The default value is\s+"(.*)"/s;
		     config($varname => $default) if $default;
		 }
		);
	Pod::Constants::add_hook
		(
		 '*back' => sub {

		     # an ugly hack?
		     $config{agent} .= $VERSION;

		     Pod::Constants::delete_hook('*item');
		     Pod::Constants::delete_hook('*back');
		 }
		);
    },
    'ONE LETTER LANGUAGE CODES' => \%one_letter_codes;

=head1 BUGS/TODO

No support for non-ISO-8859-1 character sets - with the software I
have, there is no option.

=head1 SEE ALSO

L<Lingua::Translate>, L<LWP::UserAgent>, L<Unicode::MapUTF8>

=head1 AUTHOR

Sam Vilain, <enki@snowcra.sh>

=cut

1;