package CGI::Lingua;
use warnings;
use strict;
use Carp;
use vars qw($VERSION);
our $VERSION = '0.51';
=head1 NAME
CGI::Lingua - Create a multilingual web page
=head1 VERSION
Version 0.51
=cut
=head1 SYNOPSIS
No longer does your website need to be in English only.
CGI::Lingua provides a simple basis to determine which language to display a
website. The website tells CGI::Lingua which languages it supports. Based on
that list CGI::Lingua tells the application which language the user would like
to use.
use CGI::Lingua;
# ...
my $l = CGI::Lingua->new(supported => ['en', 'fr', 'en-gb', 'en-us']);
my $language = $l->language();
if ($language eq 'English') {
print '<P>Hello</P>';
} elsif($language eq 'French') {
print '<P>Bonjour</P>';
} else { # $language eq 'Unknown'
my $rl = $l->requested_language();
print "<P>Sorry for now this page is not available in $rl.</P>";
}
my $c = $l->country();
if ($c eq 'us') {
# print contact details in the US
} elsif ($c eq 'ca') {
# print contact details in Canada
} else {
# print worldwide contact details
}
# ...
use CHI;
use CGI::Lingua;
# ...
my $cache = CHI->new(driver => 'File', root_dir => '/tmp/cache', namespace => 'CGI::Lingua-countries');
my $l = CGI::Lingua->new({ supported => ['en', 'fr'], cache => $cache });
=head1 SUBROUTINES/METHODS
=head2 new
Creates a CGI::Lingua object.
Takes one mandatory parameter: a list of languages, in RFC-1766 format,
that the website supports.
Language codes are of the form primary-code [ - country-code ] e.g.
'en', 'en-gb' for English and British English respectively.
For a list of primary-codes refer to ISO-639 (e.g. 'en' for English).
For a list of country-codes refer to ISO-3166 (e.g. 'gb' for United Kingdom).
# We support English, French, British and American English, in that order
my $l = CGI::Lingua(supported => [('en', 'fr', 'en-gb', 'en-us')]);
Takes optional parameter cache, an object which is used to cache country
lookups.
This cache object is an object that understands get() and set() messages,
such as a L<CHI> object.
Takes an optional boolean parameter syslog, to log messages to
L<Sys::Syslog>.
Takes optional parameter logger, an object which is used for warnings
and traces.
This logger object is an object that understands warn() and trace()
messages, such as a L<Log::Log4perl> object.
Since emitting warnings from a CGI class can result in messages being lost (you
may forget to look in your server's log), or appearing to the client in
amongst HTML causing invalid HTML, it is recommended either either syslog
or logger (or both) are set.
If neither is given, L<Carp> will be used.
Takes an optional parameter dont_use_ip. By default, if none of the
requested languages are supported, CGI::Lingua->language() looks in the IP
address for the language to use. This may be not what you want, so use this
option to disable the feature.
=cut
sub new {
my $proto = shift;
my $class = ref($proto) || $proto;
my %params = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
# TODO: check that the number of supported languages is > 0
# unless($params{supported} && ($#params{supported} > 0)) {
# croak('You must give a list of supported languages');
# }
unless($params{supported}) {
croak('You must give a list of supported languages');
}
return bless {
_supported => $params{supported}, # List of languages (two letters) that the application
_cache => $params{cache}, # CHI
# _rlanguage => undef, # Requested language
# _slanguage => undef, # Language that the website should display
# _sublanguage => undef, # E.g. United States for en-US if you want American English
# _slanguage_code_alpha2 => undef, # E.g en, fr
# _sublanguage_code_alpha2 => undef, # E.g. us, gb
# _country => undef, # Two letters, e.g. gb
# _locale => undef, # Locale::Object::Country
_syslog => $params{syslog},
_dont_use_ip => $params{dont_use_ip} || 0,
_logger => $params{logger},
_have_ipcountry => -1, # -1 = don't know
_have_geoip => -1, # -1 = don't know
}, $class;
}
# Emit a warning message somewhere
sub _warn {
my ($self, $params) = @_;
my $warning = $$params{'warning'};
return unless($warning);
if($self->{_syslog}) {
require Sys::Syslog;
require CGI::Info;
Sys::Syslog->import();
openlog(CGI::Info->new()->script_name(), 'cons,pid', 'user');
syslog('warning', $warning);
closelog();
}
if($self->{_logger}) {
$self->{_logger}->warn($warning);
} elsif(!defined($self->{_syslog})) {
carp($warning);
}
}
=head2 language
Tells the CGI application what language to display its messages in.
The language is the natural name e.g. 'English' or 'Japanese'.
Sublanguages are handled sensibly, so that if a client requests U.S. English
on a site that only serves British English, language() will return 'English'.
If none of the requested languages is included within the supported lists,
language() returns 'Unknown'.
use CGI::Lingua;
# Site supports English and British English
my $l = CGI::Lingua->new(supported => ['en', 'fr', 'en-gb']);
# If the browser requests 'en-us' , then language will be 'English' and
# sublanguage will be undefined because we weren't able to satisfy the
# request
# Site supports British English only
my $l = CGI::Lingua->new({supported => ['fr', 'en-gb']});
# If the browser requests 'en-us' , then language will be 'English' and
# sublanguage will also be undefined, which may seem strange, but it
# ensures that sites behave sensibly.
=cut
sub language {
my $self = shift;
unless($self->{_slanguage}) {
$self->_find_language();
}
return $self->{_slanguage};
}
=head2 name
Synonym for language, for compatibility with Local::Object::Language
=cut
sub name {
my $self = shift;
return $self->language();
}
=head2 sublanguage
Tells the CGI what variant to use e.g. 'United Kingdom', or 'Unknown' if
it can't be determined.
Sublanguages are handled sensibly, so that if a client requests U.S. English
on a site that only serves British English, sublanguage() will return undef.
=cut
sub sublanguage {
my $self = shift;
unless($self->{_sublanguage}) {
$self->_find_language();
}
return $self->{_sublanguage};
}
=head2 language_code_alpha2
Gives the two character representation of the supported language, e.g. 'en'
when you've asked for en-gb.
If none of the requested languages is included within the supported lists,
language_code_alpha2() returns undef.
=cut
sub language_code_alpha2 {
my $self = shift;
unless($self->{_slanguage_code_alpha2}) {
$self->_find_language();
}
return $self->{_slanguage_code_alpha2};
}
=head2 code_alpha2
Synonym for language_code_alpha2, kept for historical reasons.
=cut
sub code_alpha2 {
my $self = shift;
return $self->language_code_alpha2();
}
=head2 sublanguage_code_alpha2
Gives the two character representation of the supported language, e.g. 'gb'
when you've asked for en-gb, or undef.
=cut
sub sublanguage_code_alpha2 {
my $self = shift;
unless($self->{_sublanguage_code_alpha2}) {
$self->_find_language();
}
return $self->{_sublanguage_code_alpha2};
}
=head2 requested_language
Gives a human readable rendition of what language the user asked for whether
or not it is supported.
=cut
sub requested_language {
my $self = shift;
unless($self->{_rlanguage}) {
$self->_find_language();
}
return $self->{_rlanguage};
}
sub _find_language {
my $self = shift;
$self->{_rlanguage} = 'Unknown';
$self->{_slanguage} = 'Unknown';
require Locale::Object::Country;
Locale::Object::Country->import();
# Use what the client has said
if($ENV{'HTTP_ACCEPT_LANGUAGE'}) {
require I18N::AcceptLanguage;
require Locale::Language;
I18N::AcceptLanguage->import();
Locale::Language->import();
# Workaround for RT 74338
local $SIG{__WARN__} = sub {
if($_[0] !~ /^Use of uninitialized value/) {
warn $_[0];
}
};
my $l = I18N::AcceptLanguage->new()->accepts($ENV{'HTTP_ACCEPT_LANGUAGE'}, $self->{_supported});
$SIG{__WARN__} = 'DEFAULT';
if((!$l) && ($ENV{'HTTP_ACCEPT_LANGUAGE'} =~ /(.+)-.+/)) {
# Fall back position, e,g. we want US English on a site
# only giving British English, so allow it as English.
# The calling program can detect that it's not the
# wanted flavour of English by looking at
# requested_language
if(I18N::AcceptLanguage->new()->accepts($1, $self->{_supported})) {
$l = $1;
}
}
if($l) {
$self->{_slanguage} = Locale::Language::code2language($l);
if($self->{_slanguage}) {
# We have the language, but not the right
# sublanguage, e.g. they want US English but we
# only support British English or English
# wanted: en-us, got en-gb and en
$self->{_slanguage_code_alpha2} = $l;
$self->{_rlanguage} = $self->{_slanguage};
if($ENV{'HTTP_ACCEPT_LANGUAGE'} =~ /..-(..)$/) {
my $l = Locale::Object::Country->new(code_alpha2 => $1);
if($l) {
$self->{_rlanguage} .= ' (' . $l->name . ')';
# The requested sublanguage
# isn't supported so don't
# define that
}
} elsif($ENV{'HTTP_ACCEPT_LANGUAGE'} =~ /..-([a-z]{2,3})$/i) {
my $l = Locale::Object::Country->new(code_alpha3 => $1);
if($l) {
$self->{_rlanguage} .= ' (' . $l->name . ')';
# The requested sublanguage isn't
# supported so don't define that
}
}
return;
}
# TODO: Handle es-419 "Spanish (Latin America)"
if($l =~ /(.+)-(..)$/) {
my $alpha2 = $1;
my $variety = $2;
my $accepts = I18N::AcceptLanguage->new()->accepts($l, $self->{_supported});
if($accepts) {
$self->{_slanguage} = Locale::Language::code2language($accepts);
if(length($variety) == 2) {
my $c = Locale::Object::Country->new(code_alpha2 => $variety);
if(defined($c)) {
$self->{_sublanguage} = $c->name;
}
}
if($self->{_slanguage}) {
$self->{_slanguage_code_alpha2} = $accepts;
if($self->{_sublanguage}) {
$self->{_rlanguage} = "$self->{_slanguage} ($self->{_sublanguage})";
}
$self->{_sublanguage_code_alpha2} = $variety;
return;
}
}
my $lang = Locale::Language::code2language($alpha2);
unless($lang) {
$lang = $1;
}
$self->{_rlanguage} = $lang;
$self->_get_closest($alpha2, $alpha2);
if($self->{_sublanguage}) {
$ENV{'HTTP_ACCEPT_LANGUAGE'} =~ /(.{2})-(..)/;
$variety = lc($2);
# Ignore en-029 etc (Carribean English)
if($variety =~ /[a-z]{2,3}/) {
if($variety eq 'uk') {
# ???
$self->_warn({
warning => "Resetting country code to GB for $ENV{'HTTP_ACCEPT_LANGUAGE'}"
});
$variety = 'gb';
}
my $db = Locale::Object::DB->new();
my @results = $db->lookup(
table => 'country',
result_column => '*',
search_column => 'code_alpha2',
value => $variety
);
if(defined($results[0])) {
eval {
$lang = Locale::Object::Country->new(code_alpha2 => $variety);
};
} else {
$lang = undef;
}
if($@ || !defined($lang)) {
$self->{_sublanguage} = 'Unknown';
$self->_warn({
warning => "Can't determine values for $ENV{'HTTP_ACCEPT_LANGUAGE'}"
});
} else {
$self->{_sublanguage} = $lang->name;
}
if(defined($self->{_sublanguage})) {
$self->{_rlanguage} = "$self->{_slanguage} ($self->{_sublanguage})";
$self->{_sublanguage_code_alpha2} = $variety;
return;
}
}
}
}
}
if($self->{_slanguage} && ($self->{_slanguage} ne 'Unknown')) {
if($self->{_rlanguage} eq 'Unknown') {
require I18N::LangTags::Detect;
$self->{_rlanguage} = I18N::LangTags::Detect::detect();
}
if($self->{_rlanguage}) {
my $l = Locale::Language::code2language($self->{_rlanguage});
if($l) {
$self->{_rlanguage} = $l;
} else {
# We have the language, but not the right
# sublanguage, e.g. they want US English but we
# only support British English
# wanted: en-us, got en-gb and not en
}
return;
}
}
unless($self->{_rlanguage}) {
$self->{_rlanguage} = 'Unknown';
}
$self->{_slanguage} = 'Unknown';
}
if($self->{_dont_use_ip}) {
return;
}
# The client hasn't said which to use, guess from their IP address,
# or the requested language(s) isn't/aren't supported so use the IP
# address for an alternative
my $country = $self->country();
if(defined($country)) {
# Determine the first official language of the country
my $l = Locale::Object::Country->new(code_alpha2 => uc($country));
if($l) {
$l = ($l->languages_official)[0];
}
my $ip = $ENV{'REMOTE_ADDR'};
if($l && $l->name) {
$self->{_rlanguage} = $l->name;
unless((exists($self->{_slanguage})) && ($self->{_slanguage} ne 'Unknown')) {
# Check if the language is one that we support
# Don't bother with secondary language
require Locale::Language;
Locale::Language->import();
my $code = Locale::Language::language2code($self->{_rlanguage});
unless($code) {
if($ENV{'HTTP_ACCEPT_LANGUAGE'}) {
$code = Locale::Language::language2code($ENV{'HTTP_ACCEPT_LANGUAGE'});
}
unless($code) {
# If language is Norwegian (Nynorsk)
# lookup Norwegian
if($self->{_rlanguage} =~ /(.+)\s\(.+/) {
$code = Locale::Language::language2code($1);
}
unless($code) {
$self->_warn({
warning => "Can\'t determine code from IP $ip for requested language $self->{_rlanguage}"
});
}
}
}
if($code) {
$self->_get_closest($code, $l->code_alpha2);
}
}
if($self->{_cache} && defined($ip)) {
$country = $self->{_cache}->set($ip, $country, '1 hour');
}
} elsif(defined($ip)) {
$self->_warn({
warning => "Can't determine language from IP $ip, country $country"
});
}
}
}
# Try our very best to give the right country - if they ask for en-us and
# we only have en-gb then give it to them
sub _get_closest {
my ($self, $language_string, $alpha2) = @_;
foreach (@{$self->{_supported}}) {
my $s;
if(/^(.+)-.+/) {
$s = $1;
} else {
$s = $_;
}
if($language_string eq $s) {
$self->{_slanguage} = $self->{_rlanguage};
$self->{_slanguage_code_alpha2} = $alpha2;
last;
}
}
}
=head2 country
Returns the two character country code of the remote end in lower case.
If L<IP::Country> or L<Geo::IP> is installed, CGI::Lingua will make
use of that, otherwise it will do a Whois lookup.
If you do not have either installed I recommend you make use of the
caching capability of CGI::Lingua.
=cut
sub country {
my $self = shift;
# FIXME: If previous calls to country() return undef, we'll
# waste time going through again and no doubt returning undef
# again.
if($self->{_country}) {
return $self->{_country};
}
# mod_geoip
if(defined($ENV{'GEOIP_COUNTRY_CODE'})) {
$self->{_country} = lc($ENV{'GEOIP_COUNTRY_CODE'});
return $self->{_country};
}
my $ip = $ENV{'REMOTE_ADDR'};
unless($ip) {
return();
}
require Data::Validate::IP;
Data::Validate::IP->import();
unless(is_ipv4($ip)) {
if($ip eq '::1') {
# special case that is easy to handle
$ip = '127.0.0.1';
} else {
$self->_warn({
warning => "$ip isn't a valid IPv4 address\n"
});
return;
}
}
if($self->{_cache}) {
$self->{_country} = $self->{_cache}->get($ip);
if($self->{_logger}) {
if(defined($self->{_country})) {
$self->{_logger}->trace("Get $ip from cache = $self->{_country}");
} else {
$self->{_logger}->trace("$ip isn't in the cache");
}
}
}
unless(defined($self->{_country})) {
if(($ENV{'HTTP_CF_IPCOUNTRY'}) && ($ENV{'HTTP_CF_IPCOUNTRY'} ne 'XX')) {
# Hosted by Cloudfare
$self->{_country} = lc($ENV{'HTTP_CF_IPCOUNTRY'});
} else {
if($self->{_have_ipcountry} == -1) {
if(eval { require IP::Country; }) {
IP::Country->import();
$self->{_have_ipcountry} = 1;
$self->{_ipcountry} = IP::Country::Fast->new();
} else {
$self->{_have_ipcountry} = 0;
}
}
if($self->{_logger}) {
$self->{_logger}->debug("have_ipcountry $self->{_have_ipcountry}");
}
if($self->{_have_ipcountry} == 1) {
$self->{_country} = $self->{_ipcountry}->inet_atocc($ip);
if($self->{_country}) {
$self->{_country} = lc($self->{_country});
} else {
$self->_warn({
warning => "$ip is not known by IP::Country"
});
}
}
unless(defined($self->{_country})) {
if($self->{_have_geoip} == -1) {
if(eval { require Geo::IP; }) {
Geo::IP->import();
$self->{_have_geoip} = 1;
# GEOIP_STANDARD = 0, can't use that because you'll
# get a syntax error
$self->{_geoip} = Geo::IP->new(0);
} else {
$self->{_have_geoip} = 0;
}
}
if($self->{_have_geoip} == 1) {
$self->{_country} = $self->{_geoip}->country_code_by_addr($ip);
}
}
}
if($self->{_country} && ($self->{_country} eq 'eu')) {
delete($self->{_country});
}
unless($self->{_country}) {
require Net::Whois::IP;
Net::Whois::IP->import();
my $whois;
eval {
# Catch connection timeouts to
# whois.ripe.net by turning the carp
# into an error
local $SIG{__WARN__} = sub { die $_[0] };
$whois = Net::Whois::IP::whoisip_query($ip);
};
unless($@ || !defined($whois) || (ref($whois) ne 'HASH')) {
if(defined($whois->{Country})) {
$self->{_country} = $whois->{Country};
} elsif(defined($whois->{country})) {
$self->{_country} = $whois->{country};
}
if($self->{_country} && ($self->{_country} eq 'eu')) {
delete($self->{_country});
}
}
unless($self->{_country}) {
require Net::Whois::IANA;
Net::Whois::IANA->import();
my $iana = new Net::Whois::IANA;
eval {
$iana->whois_query(-ip => $ip);
};
unless ($@) {
$self->{_country} = $iana->country();
}
}
if($self->{_country} eq 'eu') {
delete($self->{_country});
}
if($self->{_country}) {
# 190.24.1.122 has carriage return in its WHOIS record
$self->{_country} =~ s/[\r\n]//g;
if($self->{_country} =~ /^(..)\s*#.*/) {
# Remove comments in the Whois record
$self->{_country} = $1;
}
}
}
}
if($self->{_country}) {
$self->{_country} = lc($self->{_country});
if($self->{_country} eq 'hk') {
# Hong Kong is no longer a country, but Whois thinks
# it is - try "whois 218.213.130.87"
$self->{_country} = 'cn';
} elsif($self->country eq 'eu') {
require Net::Subnet;
# RT-86809, Baidu claims it's in EU not CN
Net::Subnet->import();
if(subnet_matcher('185.10.104.0/22')->($ip)) {
$self->{_country} = 'cn';
} else {
# There is no country called 'eu'
$self->_warn({
warning => "$ip has country of eu"
});
delete($self->{_country});
}
}
if($self->{_cache}) {
$self->{_cache}->set($ip, $self->{_country}, '1 hour');
if($self->{_logger}) {
$self->{_logger}->debuf("Set $ip to $self->{_country}");
}
}
}
return $self->{_country};
}
=head2 locale
HTTP doesn't have a way of transmitting a browser's localisation information
which would be useful for default currency, date formatting etc.
This method attempts to detect the information, but it is a best guess
and is not 100% reliable. But it's better than nothing ;-)
Returns a L<Locale::Object::Country> object.
To be clear, if you're in the US and request the language in Spanish,
and the site supports it, language() will return 'Spanish', and locale() will
try to return the Locale::Object::Country for the US.
=cut
sub locale {
my $self = shift;
if($self->{_locale}) {
return $self->{_locale};
}
require Locale::Object::Country;
Locale::Object::Country->import();
# First try from the User Agent. Probably only works with Mozilla and
# Safari. I don't know about Opera. It won't work with IE or Chrome.
my $agent = $ENV{'HTTP_USER_AGENT'};
my $country;
if(defined($agent) && ($agent =~ /\((.+)\)/)) {
foreach(split(/;/, $1)) {
my $candidate = $_;
$candidate =~ s/^\s//g;
$candidate =~ s/\s$//g;
if($candidate =~ /^[a-zA-Z]{2}-([a-zA-Z]{2})$/) {
local $SIG{__WARN__} = undef;
my $c = Locale::Object::Country->new(code_alpha2 => $1);
if($c) {
$self->{_locale} = $c;
return $c;
}
# carp "Warning: unknown country $1 derived from $candidate in HTTP_USER_AGENT ($agent)";
}
}
eval {
require HTTP::BrowserDetect;
HTTP::BrowserDetect->import();
};
unless($@) {
my $browser = HTTP::BrowserDetect->new($agent);
if($browser && $browser->country()) {
my $c = Locale::Object::Country->new(code_alpha2 => $browser->country());
if($c) {
$self->{_locale} = $c;
return $c;
}
}
}
}
# Try from the IP address
$country = $self->country();
if($country) {
$country =~ s/[\r\n]//g;
my $c;
eval {
local $SIG{__WARN__} = sub { die $_[0] };
$c = Locale::Object::Country->new(code_alpha2 => $country);
};
unless($@) {
if($c) {
$self->{_locale} = $c;
return $c;
}
}
}
# Try mod_geoip
if(defined($ENV{'GEOIP_COUNTRY_CODE'})) {
$country = $ENV{'GEOIP_COUNTRY_CODE'};
my $c = Locale::Object::Country->new(code_alpha2 => $country);
if($c) {
$self->{_locale} = $c;
return $c;
}
}
return (); # returns undef
}
=head1 AUTHOR
Nigel Horne, C<< <njh at bandsman.co.uk> >>
=head1 BUGS
If HTTP_ACCEPT_LANGUAGE is 3 characters, e.g., es-419,
sublanguage() returns undef.
Please report any bugs or feature requests to C<bug-cgi-lingua at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CGI-Lingua>. I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
=head1 SEE ALSO
Locale::Country::Object
HTTP::BrowserDetect
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc CGI::Lingua
You can also look for information at:
=over 4
=item * RT: CPAN's request tracker
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CGI-Lingua>
=item * AnnoCPAN: Annotated CPAN documentation
L<http://annocpan.org/dist/CGI-Lingua>
=item * CPAN Ratings
L<http://cpanratings.perl.org/d/CGI-Lingua>
=item * Search CPAN
L<http://search.cpan.org/dist/CGI-Lingua/>
=back
=head1 ACKNOWLEDGEMENTS
=head1 LICENSE AND COPYRIGHT
Copyright 2010-2014 Nigel Horne.
This program is released under the following licence: GPL
=cut
1; # End of CGI::Lingua