The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#
# Locale::Script - ISO codes for script identification (ISO 15924)
#
# $Id: Script.pm,v 2.7 2004/06/10 21:19:34 neilb Exp $
#

package Locale::Script;
use strict;
require 5.002;

require Exporter;
use Carp;
use Locale::Constants;


#-----------------------------------------------------------------------
#	Public Global Variables
#-----------------------------------------------------------------------
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
$VERSION   = sprintf("%d.%02d", q$Revision: 2.7 $ =~ /(\d+)\.(\d+)/);
@ISA       = qw(Exporter);
@EXPORT    = qw(code2script script2code
                all_script_codes all_script_names
		script_code2code
		LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC);

#-----------------------------------------------------------------------
#	Private Global Variables
#-----------------------------------------------------------------------
my $CODES     = [];
my $COUNTRIES = [];


#=======================================================================
#
# code2script ( CODE [, CODESET ] )
#
#=======================================================================
sub code2script
{
    my $code = shift;
    my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;


    return undef unless defined $code;

    #-------------------------------------------------------------------
    # Make sure the code is in the right form before we use it
    # to look up the corresponding script.
    # We have to sprintf because the codes are given as 3-digits,
    # with leading 0's. Eg 070 for Egyptian demotic.
    #-------------------------------------------------------------------
    if ($codeset == LOCALE_CODE_NUMERIC)
    {
	return undef if ($code =~ /\D/);
	$code = sprintf("%.3d", $code);
    }
    else
    {
	$code = lc($code);
    }

    if (exists $CODES->[$codeset]->{$code})
    {
        return $CODES->[$codeset]->{$code};
    }
    else
    {
        #---------------------------------------------------------------
        # no such script code!
        #---------------------------------------------------------------
        return undef;
    }
}


#=======================================================================
#
# script2code ( SCRIPT [, CODESET ] )
#
#=======================================================================
sub script2code
{
    my $script = shift;
    my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;


    return undef unless defined $script;
    $script = lc($script);
    if (exists $COUNTRIES->[$codeset]->{$script})
    {
        return $COUNTRIES->[$codeset]->{$script};
    }
    else
    {
        #---------------------------------------------------------------
        # no such script!
        #---------------------------------------------------------------
        return undef;
    }
}


#=======================================================================
#
# script_code2code ( CODE, IN-CODESET, OUT-CODESET )
#
#=======================================================================
sub script_code2code
{
    (@_ == 3) or croak "script_code2code() takes 3 arguments!";

    my $code = shift;
    my $inset = shift;
    my $outset = shift;
    my $outcode;
    my $script;


    return undef if $inset == $outset;
    $script = code2script($code, $inset);
    return undef if not defined $script;
    $outcode = script2code($script, $outset);
    return $outcode;
}


#=======================================================================
#
# all_script_codes()
#
#=======================================================================
sub all_script_codes
{
    my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;

    return keys %{ $CODES->[$codeset] };
}


#=======================================================================
#
# all_script_names()
#
#=======================================================================
sub all_script_names
{
    my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;

    return values %{ $CODES->[$codeset] };
}


#=======================================================================
#
# initialisation code - stuff the DATA into the ALPHA2 hash
#
#=======================================================================
{
    my   ($alpha2, $alpha3, $numeric);
    my    $script;
    local $_;


    while (<DATA>)
    {
        next unless /\S/;
        chop;
        ($alpha2, $alpha3, $numeric, $script) = split(/:/, $_, 4);

        $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $script;
        $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$script"} = $alpha2;

	if ($alpha3)
	{
            $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $script;
            $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$script"} = $alpha3;
	}

	if ($numeric)
	{
            $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $script;
            $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$script"} = $numeric;
	}

    }

    close(DATA);
}

1;

__DATA__
am:ama:130:Aramaic
ar:ara:160:Arabic
av:ave:151:Avestan
bh:bhm:300:Brahmi (Ashoka)
bi:bid:372:Buhid
bn:ben:325:Bengali
bo:bod:330:Tibetan
bp:bpm:285:Bopomofo
br:brl:570:Braille
bt:btk:365:Batak
bu:bug:367:Buginese (Makassar)
by:bys:550:Blissymbols
ca:cam:358:Cham
ch:chu:221:Old Church Slavonic
ci:cir:291:Cirth
cm:cmn:402:Cypro-Minoan
co:cop:205:Coptic
cp:cpr:403:Cypriote syllabary
cy:cyr:220:Cyrillic
ds:dsr:250:Deserel (Mormon)
dv:dvn:315:Devanagari (Nagari)
ed:egd:070:Egyptian demotic
eg:egy:050:Egyptian hieroglyphs
eh:egh:060:Egyptian hieratic
el:ell:200:Greek
eo:eos:210:Etruscan and Oscan
et:eth:430:Ethiopic
gl:glg:225:Glagolitic
gm:gmu:310:Gurmukhi
gt:gth:206:Gothic
gu:guj:320:Gujarati
ha:han:500:Han ideographs
he:heb:125:Hebrew
hg:hgl:420:Hangul
hm:hmo:450:Pahawh Hmong
ho:hoo:371:Hanunoo
hr:hrg:410:Hiragana
hu:hun:176:Old Hungarian runic
hv:hvn:175:Kok Turki runic
hy:hye:230:Armenian
iv:ivl:610:Indus Valley
ja:jap:930:(alias for Han + Hiragana + Katakana)
jl:jlg:445:Cherokee syllabary
jw:jwi:360:Javanese
ka:kam:241:Georgian (Mxedruli)
kh:khn:931:(alias for Hangul + Han)
kk:kkn:411:Katakana
km:khm:354:Khmer
kn:kan:345:Kannada
kr:krn:357:Karenni (Kayah Li)
ks:kst:305:Kharoshthi
kx:kax:240:Georgian (Xucuri)
la:lat:217:Latin
lf:laf:215:Latin (Fraktur variant)
lg:lag:216:Latin (Gaelic variant)
lo:lao:356:Lao
lp:lpc:335:Lepcha (Rong)
md:mda:140:Mandaean
me:mer:100:Meroitic
mh:may:090:Mayan hieroglyphs
ml:mlm:347:Malayalam
mn:mon:145:Mongolian
my:mya:350:Burmese
na:naa:400:Linear A
nb:nbb:401:Linear B
og:ogm:212:Ogham
or:ory:327:Oriya
os:osm:260:Osmanya
ph:phx:115:Phoenician
ph:pah:150:Pahlavi
pl:pld:282:Pollard Phonetic
pq:pqd:295:Klingon plQaD
pr:prm:227:Old Permic
ps:pst:600:Phaistos Disk
rn:rnr:211:Runic (Germanic)
rr:rro:620:Rongo-rongo
sa:sar:110:South Arabian
si:sin:348:Sinhala
sj:syj:137:Syriac (Jacobite variant)
sl:slb:440:Unified Canadian Aboriginal Syllabics
sn:syn:136:Syriac (Nestorian variant)
sw:sww:281:Shavian (Shaw)
sy:syr:135:Syriac (Estrangelo)
ta:tam:346:Tamil
tb:tbw:373:Tagbanwa
te:tel:340:Telugu
tf:tfn:120:Tifnagh
tg:tag:370:Tagalog
th:tha:352:Thai
tn:tna:170:Thaana
tw:twr:290:Tengwar
va:vai:470:Vai
vs:vsp:280:Visible Speech
xa:xas:000:Cuneiform, Sumero-Akkadian
xf:xfa:105:Cuneiform, Old Persian
xk:xkn:412:(alias for Hiragana + Katakana)
xu:xug:106:Cuneiform, Ugaritic
yi:yii:460:Yi
zx:zxx:997:Unwritten language
zy:zyy:998:Undetermined script
zz:zzz:999:Uncoded script