The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Char;
######################################################################
#
# Char - Character Oriented Perl by Magic Comment
#
# Copyright (c) 2010, 2011, 2013 INABA Hitoshi <ina@cpan.org>
#
######################################################################

use 5.00503;

BEGIN { eval q{ use vars qw($VERSION) } }
$VERSION = sprintf '%d.%02d', q$Revision: 0.11 $ =~ m/(\d+)/oxmsg;

sub LOCK_SH() {1}
sub LOCK_EX() {2}
sub LOCK_UN() {8}
sub LOCK_NB() {4}

local $^W = 1;
$| = 1;

sub unimport {}

BEGIN { eval q{ use vars qw($OSNAME $LANG) } }
($OSNAME, $LANG) = ($^O,       $ENV{'LANG'});
($OSNAME, $LANG) = ('MSWin32', undef)         if 0;
($OSNAME, $LANG) = ('darwin',  'ja_JP.UTF-8') if 0;
($OSNAME, $LANG) = ('MacOS',   undef)         if 0;
($OSNAME, $LANG) = ('solaris', 'ja')          if 0;
($OSNAME, $LANG) = ('hpux',    'SJIS')        if 0;
($OSNAME, $LANG) = ('aix',     'Ja_JP')       if 0;

#
# poor Symbol.pm - substitute of real Symbol.pm
#
BEGIN {
    my $genpkg = "Symbol::";
    my $genseq = 0;
    sub gensym () {
        my $name = "GEN" . $genseq++;

        # here, no strict qw(refs); if strict.pm exists

        my $ref = \*{$genpkg . $name};
        delete $$genpkg{$name};
        return $ref;
    }
}

#
# source code filter
#
sub import {
    my(undef,$filename) = caller 0;

    # when escaped script not exists or older
    my $e_mtime = (stat("$filename.e"))[9];
    my $mtime   = (stat($filename))[9];
    if ((not -e "$filename.e") or ($e_mtime < $mtime)) {

        # select filter software
        my $encoding = '';
        my $filter = '';
        if ($encoding = (from_magic_comment($filename) || from_chcp_lang())) {
            $filter = abspath($encoding);
            if ($filter eq '') {
                die "@{[__FILE__]}: filter software '$encoding.pm' not found in \@INC(@INC)\n";
            }
        }
        else {
            warn "@{[__FILE__]}: don't know which encoding.\n";
        }

        # flock script file
        my $fh1 = gensym();
        _open_r($fh1, $filename) or die "@{[__FILE__]}: Can't read open file: $filename\n";
        if ($OSNAME eq 'MacOS') {
            eval q{
                require Mac::Files;
                Mac::Files::FSpSetFLock($filename);
            };
        }
        else {
            eval q{ flock($fh1, LOCK_EX) };
        }

        # rewrite 'Char::' to any encoding
        my $fh2 = gensym();
        _open_w($fh2, "$filename.tmp") or die "@{[__FILE__]}: Can't write open file: $filename.tmp\n";
        while (<$fh1>) {
            s/\A \s* use \s+ Char \s* [^;]* ;\s* \z//oxmsg;
            s/\bChar::(ord|reverse|getc|length|substr|index|rindex)\b/$encoding.'::'.$1/ge;
            print {$fh2} $_;
        }
        close($fh2) or die "@{[__FILE__]}: Can't close file: $filename.tmp\n";

        # escape perl script
        my @system  = ();
        my $escaped = '';
        if ($OSNAME =~ m/\A (?: MSWin32 | NetWare | symbian | dos ) \z/oxms) {
            @system    = map { _escapeshellcmd_MSWin32($_) } $^X, $filter, "$filename.tmp";
            ($escaped) = map { _escapeshellcmd_MSWin32($_) } "$filename.e";
        }
        elsif ($OSNAME eq 'MacOS') {
            @system    = map { _escapeshellcmd_MacOS($_) }   $^X, $filter, "$filename.tmp";
            ($escaped) = map { _escapeshellcmd_MacOS($_) }   "$filename.e";
        }
        else {
            @system    = map { _escapeshellcmd($_) }         $^X, $filter, "$filename.tmp";
            ($escaped) = map { _escapeshellcmd($_) }         "$filename.e";
        }
        if (_systemx(join ' ', @system, '>', $escaped) == 0) {
            unlink "$filename.tmp";
        }

        # inherit file mode
        my $mode = (stat($filename))[2] & 0777;
        chmod $mode, "$filename.e";

        # close file and unlock
        if ($OSNAME eq 'MacOS') {
            eval q{
                require Mac::Files;
                Mac::Files::FSpRstFLock($filename);
            };
        }
        close($fh1) or die "@{[__FILE__]}: Can't close file: $filename\n";
    }

    # execute escaped script
    my $system;
    if ($OSNAME =~ m/\A (?: MSWin32 | NetWare | symbian | dos ) \z/oxms) {
        my $fh = gensym();
        _open_r($fh, $filename) or die "@{[__FILE__]}: Can't read open file: $filename\n";
        eval q{ flock($fh, LOCK_SH) };
        $system = _systemx(map { _escapeshellcmd_MSWin32($_) } $^X, "$filename.e", @ARGV);
        close($fh) or die "@{[__FILE__]}: Can't close file: $filename\n";
    }
    elsif ($OSNAME eq 'MacOS') {
        eval q{
            require Mac::Files;
            Mac::Files::FSpSetFLock($filename);
        };
        $system = _systemx(map { _escapeshellcmd_MacOS($_) } $^X, "$filename.e", @ARGV);
        eval q{
            require Mac::Files;
            Mac::Files::FSpRstFLock($filename);
        };
    }
    else {
        my $fh = gensym();
        _open_r($fh, $filename) or die "@{[__FILE__]}: Can't read open file: $filename\n";
        eval q{ flock($fh, LOCK_SH) };
        $system = _systemx(map { _escapeshellcmd($_) } $^X, "$filename.e", @ARGV);
        close($fh) or die "@{[__FILE__]}: Can't close file: $filename\n";
    }

    exit $system;
}

#
# open file in read mode
#
sub _open_r {
    my(undef,$file) = @_;
    $file =~ s#\A (\s) #./$1#oxms;
    return eval(q{open($_[0],'<',$_[1])}) ||
                  open($_[0],"< $file\0");
}

#
# open file in write mode
#
sub _open_w {
    my(undef,$file) = @_;
    $file =~ s#\A (\s) #./$1#oxms;
    return eval(q{open($_[0],'>',$_[1])}) ||
                  open($_[0],"> $file\0");
}

#
# safe system
#
sub _systemx {
    $| = 1;
    local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; # Make %ENV safer
#   return CORE::system { $_[0] } @_;
    return CORE::system           @_;
}

#
# escape shell command line on DOS-like system
#
sub _escapeshellcmd_MSWin32 {
    my($word) = @_;
    if ($word =~ / [ ] /oxms) {

        # both DOS-like and UNIX-like shell quote
        return qq{"$word"};
    }
    else {
        return $word;
    }
}

#
# escape shell command line on Mac OS
#
sub _escapeshellcmd_MacOS {
    my($word) = @_;
    return $word;
}

#
# escape shell command line on UNIX-like system
#
sub _escapeshellcmd {
    my($word) = @_;
    return $word;
}

#
# get encoding from magic comment
#
sub from_magic_comment {
    my($filename) = @_;
    my $encoding = '';

    my $fh = gensym();
    _open_r($fh, $filename) or die "@{[__FILE__]}: Can't read open file: $filename\n";
    while (<$fh>) {
        chomp;
        if (($encoding) = m/\A\s*[#].*?coding[:=]\s*(.+)/oxms) {
            last;
        }
    }
    close($fh) or die "@{[__FILE__]}: Can't close file: $filename\n";

    return '' unless $encoding;

    # resolve alias of encoding
    $encoding = lc $encoding;
    $encoding =~ tr/a-z0-9//cd;
    return { qw(

    ascii               USASCII
    usascii             USASCII

    shiftjis            Sjis
    shiftjisx0213       Sjis
    shiftjis2004        Sjis
    sjis                Sjis
    sjisx0213           Sjis
    sjis2004            Sjis
    cp932               Sjis
    windows31j          Sjis
    cswindows31j        Sjis
    sjiswin             Sjis
    macjapanese         Sjis
    macjapan            Sjis
    xsjis               Sjis
    mskanji             Sjis
    csshiftjis          Sjis
    windowscodepage932  Sjis
    ibmcp943            Sjis
    ms932               Sjis

    jisc6220            JIS8
    jisx0201            JIS8
    jis8                JIS8
    ank                 JIS8

    eucjp               EUCJP
    euc                 EUCJP
    ujis                EUCJP
    eucjpms             EUCJP
    eucjpwin            EUCJP
    cp51932             EUCJP

    utf8                UTF2
    utf2                UTF2
    utffss              UTF2
    utf8mac             UTF2

    cesu8               OldUTF8
    modifiedutf8        OldUTF8

    hp15                HP15
    informixv6als       INFORMIXV6ALS

    gb18030             GB18030
    gbk                 GBK
    gb2312              GBK
    cp936               GBK
    euccn               GBK

    uhc                 UHC
    ksx1001             UHC
    ksc5601             UHC
    ksc56011987         UHC
    ks                  UHC
    cp949               UHC
    windows949          UHC

    kps9566             KPS9566
    kps95662003         KPS9566
    kps95662000         KPS9566
    kps95661997         KPS9566
    kps956697           KPS9566
    euckp               KPS9566

    big5plus            Big5Plus
    big5                Big5Plus
    big5et              Big5Plus
    big5eten            Big5Plus
    tcabig5             Big5Plus
    cp950               Big5Plus

    big5hk              Big5HKSCS
    big5hkscs           Big5HKSCS
    hkbig5              Big5HKSCS
    hkscsbig5           Big5HKSCS

    latin1              Latin1
    isoiec88591         Latin1
    iso88591            Latin1
    iec88591            Latin1

    latin2              Latin2
    isoiec88592         Latin2
    iso88592            Latin2
    iec88592            Latin2

    latin3              Latin3
    isoiec88593         Latin3
    iso88593            Latin3
    iec88593            Latin3

    latin4              Latin4
    isoiec88594         Latin4
    iso88594            Latin4
    iec88594            Latin4

    cyrillic            Cyrillic
    isoiec88595         Cyrillic
    iso88595            Cyrillic
    iec88595            Cyrillic

    koi8r               KOI8R
    koi8u               KOI8U

    arabic              Arabic
    isoiec88596         Arabic
    iso88596            Arabic
    iec88596            Arabic

    greek               Greek
    isoiec88597         Greek
    iso88597            Greek
    iec88597            Greek

    hebrew              Hebrew
    isoiec88598         Hebrew
    iso88598            Hebrew
    iec88598            Hebrew

    latin5              Latin5
    isoiec88599         Latin5
    iso88599            Latin5
    iec88599            Latin5

    latin6              Latin6
    isoiec885910        Latin6
    iso885910           Latin6
    iec885910           Latin6

    tis620              TIS620
    tis6202533          TIS620
    isoiec885911        TIS620
    iso885911           TIS620
    iec885911           TIS620

    latin7              Latin7
    isoiec885913        Latin7
    iso885913           Latin7
    iec885913           Latin7

    latin8              Latin8
    isoiec885914        Latin8
    iso885914           Latin8
    iec885914           Latin8

    latin9              Latin9
    isoiec885915        Latin9
    iso885915           Latin9
    iec885915           Latin9

    latin10             Latin10
    isoiec885916        Latin10
    iso885916           Latin10
    iec885916           Latin10

    windows1252         Windows1252

    windows1258         Windows1258

    )}->{$encoding} || $encoding;
}

#
# encoding from chcp or LANG environment variable
#
sub from_chcp_lang {
    my $encoding = '';

    # Microsoft Windows
    if ($OSNAME eq 'MSWin32') {
        $encoding = {

        # Code Page Identifiers (Windows)
        # Identifier .NET Name Additional information

          '037' => '', # IBM037 IBM EBCDIC US-Canada
          '437' => '', # IBM437 OEM United States
          '500' => '', # IBM500 IBM EBCDIC International
          '708' => 'Arabic', # ASMO-708 Arabic (ASMO 708)
          '709' => '', #  Arabic (ASMO-449+, BCON V4)
          '710' => '', #  Arabic - Transparent Arabic
          '720' => '', # DOS-720 Arabic (Transparent ASMO); Arabic (DOS)
          '737' => '', # ibm737 OEM Greek (formerly 437G); Greek (DOS)
          '775' => '', # ibm775 OEM Baltic; Baltic (DOS)
          '850' => '', # ibm850 OEM Multilingual Latin 1; Western European (DOS)
          '852' => '', # ibm852 OEM Latin 2; Central European (DOS)
          '855' => '', # IBM855 OEM Cyrillic (primarily Russian)
          '857' => '', # ibm857 OEM Turkish; Turkish (DOS)
          '858' => '', # IBM00858 OEM Multilingual Latin 1 + Euro symbol
          '860' => '', # IBM860 OEM Portuguese; Portuguese (DOS)
          '861' => '', # ibm861 OEM Icelandic; Icelandic (DOS)
          '862' => '', # DOS-862 OEM Hebrew; Hebrew (DOS)
          '863' => '', # IBM863 OEM French Canadian; French Canadian (DOS)
          '864' => '', # IBM864 OEM Arabic; Arabic (864)
          '865' => '', # IBM865 OEM Nordic; Nordic (DOS)
          '866' => '', # cp866 OEM Russian; Cyrillic (DOS)
          '869' => '', # ibm869 OEM Modern Greek; Greek, Modern (DOS)
          '870' => '', # IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
          '874' => 'TIS620', # windows-874 ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows)
          '875' => '', # cp875 IBM EBCDIC Greek Modern
          '932' => 'Sjis', # shift_jis ANSI/OEM Japanese; Japanese (Shift-JIS)
          '936' => 'GBK', # gb2312 ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
          '949' => 'UHC', # ks_c_5601-1987 ANSI/OEM Korean (Unified Hangul Code)
          '950' => 'Big5Plus', # big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
         '1026' => '', # IBM1026 IBM EBCDIC Turkish (Latin 5)
         '1047' => '', # IBM01047 IBM EBCDIC Latin 1/Open System
         '1140' => '', # IBM01140 IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)
         '1141' => '', # IBM01141 IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)
         '1142' => '', # IBM01142 IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)
         '1143' => '', # IBM01143 IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)
         '1144' => '', # IBM01144 IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)
         '1145' => '', # IBM01145 IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)
         '1146' => '', # IBM01146 IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)
         '1147' => '', # IBM01147 IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
         '1148' => '', # IBM01148 IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
         '1149' => '', # IBM01149 IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
         '1200' => '', # utf-16 Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
         '1201' => '', # unicodeFFFE Unicode UTF-16, big endian byte order; available only to managed applications
         '1250' => '', # windows-1250 ANSI Central European; Central European (Windows)
         '1251' => '', # windows-1251 ANSI Cyrillic; Cyrillic (Windows)
         '1252' => 'Windows1252', # windows-1252 ANSI Latin 1; Western European (Windows)
         '1253' => '', # windows-1253 ANSI Greek; Greek (Windows)
         '1254' => '', # windows-1254 ANSI Turkish; Turkish (Windows)
         '1255' => 'Hebrew', # windows-1255 ANSI Hebrew; Hebrew (Windows)
         '1256' => '', # windows-1256 ANSI Arabic; Arabic (Windows)
         '1257' => '', # windows-1257 ANSI Baltic; Baltic (Windows)
         '1258' => 'Windows1258', # windows-1258 ANSI/OEM Vietnamese; Vietnamese (Windows)
         '1361' => '', # Johab Korean (Johab)
        '10000' => '', # macintosh MAC Roman; Western European (Mac)
        '10001' => '', # x-mac-japanese Japanese (Mac)
        '10002' => '', # x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac)
        '10003' => '', # x-mac-korean Korean (Mac)
        '10004' => '', # x-mac-arabic Arabic (Mac)
        '10005' => '', # x-mac-hebrew Hebrew (Mac)
        '10006' => '', # x-mac-greek Greek (Mac)
        '10007' => '', # x-mac-cyrillic Cyrillic (Mac)
        '10008' => '', # x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)
        '10010' => '', # x-mac-romanian Romanian (Mac)
        '10017' => '', # x-mac-ukrainian Ukrainian (Mac)
        '10021' => '', # x-mac-thai Thai (Mac)
        '10029' => '', # x-mac-ce MAC Latin 2; Central European (Mac)
        '10079' => '', # x-mac-icelandic Icelandic (Mac)
        '10081' => '', # x-mac-turkish Turkish (Mac)
        '10082' => '', # x-mac-croatian Croatian (Mac)
        '12000' => '', # utf-32 Unicode UTF-32, little endian byte order; available only to managed applications
        '12001' => '', # utf-32BE Unicode UTF-32, big endian byte order; available only to managed applications
        '20000' => '', # x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS)
        '20001' => '', # x-cp20001 TCA Taiwan
        '20002' => '', # x_Chinese-Eten Eten Taiwan; Chinese Traditional (Eten)
        '20003' => '', # x-cp20003 IBM5550 Taiwan
        '20004' => '', # x-cp20004 TeleText Taiwan
        '20005' => '', # x-cp20005 Wang Taiwan
        '20105' => '', # x-IA5 IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5)
        '20106' => '', # x-IA5-German IA5 German (7-bit)
        '20107' => '', # x-IA5-Swedish IA5 Swedish (7-bit)
        '20108' => '', # x-IA5-Norwegian IA5 Norwegian (7-bit)
        '20127' => 'USASCII', # us-ascii US-ASCII (7-bit)
        '20261' => '', # x-cp20261 T.61
        '20269' => '', # x-cp20269 ISO 6937 Non-Spacing Accent
        '20273' => '', # IBM273 IBM EBCDIC Germany
        '20277' => '', # IBM277 IBM EBCDIC Denmark-Norway
        '20278' => '', # IBM278 IBM EBCDIC Finland-Sweden
        '20280' => '', # IBM280 IBM EBCDIC Italy
        '20284' => '', # IBM284 IBM EBCDIC Latin America-Spain
        '20285' => '', # IBM285 IBM EBCDIC United Kingdom
        '20290' => '', # IBM290 IBM EBCDIC Japanese Katakana Extended
        '20297' => '', # IBM297 IBM EBCDIC France
        '20420' => '', # IBM420 IBM EBCDIC Arabic
        '20423' => '', # IBM423 IBM EBCDIC Greek
        '20424' => '', # IBM424 IBM EBCDIC Hebrew
        '20833' => '', # x-EBCDIC-KoreanExtended IBM EBCDIC Korean Extended
        '20838' => '', # IBM-Thai IBM EBCDIC Thai
        '20866' => 'KOI8R', # koi8-r Russian (KOI8-R); Cyrillic (KOI8-R)
        '20871' => '', # IBM871 IBM EBCDIC Icelandic
        '20880' => '', # IBM880 IBM EBCDIC Cyrillic Russian
        '20905' => '', # IBM905 IBM EBCDIC Turkish
        '20924' => '', # IBM00924 IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)
        '20932' => 'EUCJP', # EUC-JP Japanese (JIS 0208-1990 and 0121-1990)
        '20936' => '', # x-cp20936 Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)
        '20949' => '', # x-cp20949 Korean Wansung
        '21025' => '', # cp1025 IBM EBCDIC Cyrillic Serbian-Bulgarian
        '21027' => '', #  (deprecated)
        '21866' => 'KOI8U', # koi8-u Ukrainian (KOI8-U); Cyrillic (KOI8-U)
        '28591' => 'Latin1', # iso-8859-1 ISO 8859-1 Latin 1; Western European (ISO)
        '28592' => 'Latin2', # iso-8859-2 ISO 8859-2 Central European; Central European (ISO)
        '28593' => 'Latin3', # iso-8859-3 ISO 8859-3 Latin 3
        '28594' => 'Latin4', # iso-8859-4 ISO 8859-4 Baltic
        '28595' => 'Cyrillic', # iso-8859-5 ISO 8859-5 Cyrillic
        '28596' => 'Arabic', # iso-8859-6 ISO 8859-6 Arabic
        '28597' => 'Greek', # iso-8859-7 ISO 8859-7 Greek
        '28598' => 'Hebrew', # iso-8859-8 ISO 8859-8 Hebrew; Hebrew (ISO-Visual)
        '28599' => 'Latin5', # iso-8859-9 ISO 8859-9 Turkish
        '28603' => 'Latin7', # iso-8859-13 ISO 8859-13 Estonian
        '28605' => 'Latin9', # iso-8859-15 ISO 8859-15 Latin 9
        '29001' => '', # x-Europa Europa 3
        '38598' => '', # iso-8859-8-i ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
        '50220' => '', # iso-2022-jp ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
        '50221' => '', # csISO2022JP ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
        '50222' => '', # iso-2022-jp ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
        '50225' => '', # iso-2022-kr ISO 2022 Korean
        '50227' => '', # x-cp50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
        '50229' => '', #  ISO 2022 Traditional Chinese
        '50930' => '', #  EBCDIC Japanese (Katakana) Extended
        '50931' => '', #  EBCDIC US-Canada and Japanese
        '50933' => '', #  EBCDIC Korean Extended and Korean
        '50935' => '', #  EBCDIC Simplified Chinese Extended and Simplified Chinese
        '50936' => '', #  EBCDIC Simplified Chinese
        '50937' => '', #  EBCDIC US-Canada and Traditional Chinese
        '50939' => '', #  EBCDIC Japanese (Latin) Extended and Japanese
        '51932' => 'EUCJP', # euc-jp EUC Japanese
        '51936' => '', # EUC-CN EUC Simplified Chinese; Chinese Simplified (EUC)
        '51949' => '', # euc-kr EUC Korean
        '51950' => '', #  EUC Traditional Chinese
        '52936' => '', # hz-gb-2312 HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)
        '54936' => 'GB18030', # GB18030 Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
        '57002' => '', # x-iscii-de ISCII Devanagari
        '57003' => '', # x-iscii-be ISCII Bengali
        '57004' => '', # x-iscii-ta ISCII Tamil
        '57005' => '', # x-iscii-te ISCII Telugu
        '57006' => '', # x-iscii-as ISCII Assamese
        '57007' => '', # x-iscii-or ISCII Oriya
        '57008' => '', # x-iscii-ka ISCII Kannada
        '57009' => '', # x-iscii-ma ISCII Malayalam
        '57010' => '', # x-iscii-gu ISCII Gujarati
        '57011' => '', # x-iscii-pa ISCII Punjabi
        '65000' => '', # utf-7 Unicode (UTF-7)
        '65001' => 'UTF2', # utf-8 Unicode (UTF-8)

        }->{(qx{chcp} =~ m/([0-9]{3,5}) \Z/oxms)[0]};
    }

    # C or POSIX
    elsif (not defined($LANG) or ($LANG eq '')) {
        $encoding = 'USASCII';
    }
    elsif ($LANG =~ m/\A (?: C | POSIX ) \z/oxms) {
        $encoding = 'USASCII';
    }

    # Apple Mac OS X
    elsif ($OSNAME eq 'darwin') {
        $encoding = 'UTF2';
    }

    # Apple MacOS
    elsif ($OSNAME eq 'MacOS') {
        die "@{[__FILE__]}: $OSNAME requires magick comment.\n";
    }

    # Oracle Solaris
    elsif ($OSNAME eq 'solaris') {
        my $lang = {

        # Oracle Solaris 10 8/11 Information Library

        qw(

        ar                       ar_EG.ISO8859-6
        bg_BG                    bg_BG.ISO8859-5
        ca                       ca_ES.ISO8859-1
        ca_ES                    ca_ES.ISO8859-1
        cs                       cs_CZ.ISO8859-2
        cs_CZ                    cs_CZ.ISO8859-2
        da                       da_DK.ISO8859-1
        da_DK                    da_DK.ISO8859-1
        da.ISO8859-15            da_DK.ISO8859-15
        de                       de_DE.ISO8859-1
        de_AT                    de_AT.ISO8859-1
        de_CH                    de_CH.ISO8859-1
        de_DE                    de_DE.ISO8859-1
        de.ISO8859-15            de_DE.ISO8859-15
        de.UTF-8                 de_DE.UTF-8
        el                       el_GR.ISO8859-7
        el_GR                    el_GR.ISO8859-7
        el.sun_eu_greek          el_GR.ISO8859-7
        el.UTF-8                 el_CY.UTF-8
        en_AU                    en_AU.ISO8859-1
        en_CA                    en_CA.ISO8859-1
        en_GB                    en_GB.ISO8859-1
        en_IE                    en_IE.ISO8859-1
        en_NZ                    en_NZ.ISO8859-1
        en_US                    en_US.ISO8859-1
        es                       es_ES.ISO8859-1
        es_AR                    es_AR.ISO8859-1
        es_BO                    es_BO.ISO8859-1
        es_CL                    es_CL.ISO8859-1
        es_CO                    es_CO.ISO8859-1
        es_CR                    es_CR.ISO8859-1
        es_EC                    es_EC.ISO8859-1 
        es_ES                    es_ES.ISO8859-1
        es_GT                    es_GT.ISO8859-1
        es.ISO8859-15            es_ES.ISO8859-15
        es_MX                    es_MX.ISO8859-1
        es_NI                    es_NI.ISO8859-1 
        es_PA                    es_PA.ISO8859-1
        es_PE                    es_PE.ISO8859-1
        es_PY                    es_PY.ISO8859-1
        es_SV                    es_SV.ISO8859-1
        es.UTF-8                 es_ES.UTF-8
        es_UY                    es_UY.ISO8859-1
        es_VE                    es_VE.ISO8859-1
        et                       et_EE.ISO8859-15
        et_EE                    et_EE.ISO8859-15
        fi                       fi_FI.ISO8859-1
        fi_FI                    fi_FI.ISO8859-1
        fi.ISO8859-15            fi_FI.ISO8859-15
        fr                       fr_FR.ISO8859-1
        fr_BE                    fr_BE.ISO8859-1
        fr_CA                    fr_CA.ISO8859-1
        fr_CH                    fr_CH.ISO8859-1
        fr_FR                    fr_FR.ISO8859-1
        fr.ISO8859-15            fr_FR.ISO8859-15
        fr.UTF-8                 fr_FR.UTF-8
        he                       he_IL.ISO8859-8
        he_IL                    he_IL.ISO8859-8
        hr_HR                    hr_HR.ISO8859-2
        hu                       hu_HU.ISO8859-2
        hu_HU                    hu_HU.ISO8859-2
        is_IS                    is_IS.ISO8859-1
        it                       it_IT.ISO8859-1
        it.ISO8859-15            it_IT.ISO8859-15
        it_IT                    it_IT.ISO8859-1
        it.UTF-8                 it_IT.UTF-8
        ja                       ja_JP.eucJP
        ko                       ko_KR.EUC
        ko.UTF-8                 ko_KR.UTF-8
        lt                       lt_LT.ISO8859-13
        lt_LT                    lt_LT.ISO8859-13
        lu                       lu_LU.ISO8859-15
        lv                       lv_LV.ISO8859-13
        lv_LV                    lv_LV.ISO8859-13
        mk_MK                    mk_MK.ISO8859-5
        nl                       nl_NL.ISO8859-1
        nl_BE                    nl_BE.ISO8859-1
        nl.ISO8859-15            nl_NL.ISO8859-15
        nl_NL                    nl_NL.ISO8859-1
        no                       nb_NO.ISO8859-1
        no_NO                    nb_NO.ISO8859-1
        no_NO.ISO8859-1@bokmal   nb_NO.ISO8859-1
        no_NO.ISO8859-1@nynorsk  nn_NO.ISO8859-1
        no_NY                    nn_NO.ISO8859-1
        nr                       nr_NR.ISO8859-2
        pl                       pl_PL.ISO8859-2
        pl_PL                    pl_PL.ISO8859-2
        pl.UTF-8                 pl_PL.UTF-8
        pt                       pt_PT.ISO8859-1
        pt_BR                    pt_BR.ISO8859-1
        pt.ISO8859-15            pt_PT.ISO8859-15
        pt_PT                    pt_PT.ISO8859-1
        ro_RO                    ro_RO.ISO8859-2
        ru                       ru_RU.ISO8859-5
        ru.koi8-r                ru_RU.KOI8-R
        ru_RU                    ru_RU.ISO8859-5
        ru.UTF-8                 ru_RU.UTF-8
        sh                       bs_BA.ISO8859-2
        sh_BA                    bs_BA.ISO8859-2
        sh_BA.ISO8859-2@bosnia   bs_BA.ISO8859-2
        sh_BA.UTF-8              bs_BA.UTF-8
        sk_SK                    sk_SK.ISO8859-2
        sl_SI                    sl_SI.ISO8859-2
        sq_AL                    sq_AL.ISO8859-2
        sr_CS                    sr_ME.UTF-8
        sr_CS.UTF-8              sr_ME.UTF-8
        sr_SP                    sr_ME.ISO8859-5
        sr_YU                    sr_ME.ISO8859-5
        sr_YU.ISO8859-5          sr_ME.ISO8859-5
        sv                       sv_SE.ISO8859-1
        sv_SE                    sv_SE.ISO8859-1
        sv.ISO8859-15            sv_SE.ISO8859-15
        sv.UTF-8                 sv_SE.UTF-8
        th                       th_TH.TIS620
        th_TH                    th_TH.TIS620
        th_TH.ISO8859-11         th_TH.TIS620
        tr                       tr_TR.ISO8859-9
        tr_TR                    tr_TR.ISO8859-9
        zh                       zh_CN.EUC
        zh.GBK                   zh_CN.GBK
        zh_TW                    zh_TW.EUC
        zh.UTF-8                 zh_CN.UTF-8
        ca_ES.ISO8859-15@euro    ca_ES.ISO8859-15
        de_AT.ISO8859-15@euro    de_AT.ISO8859-15
        de_DE.ISO8859-15@euro    de_DE.ISO8859-15
        de_DE.UTF-8@euro         de_DE.UTF-8
        el_GR.ISO8859-7@euro     el_GR.ISO8859-7
        en_IE.ISO8859-15@euro    en_IE.ISO8859-15
        es_ES.ISO8859-15@euro    es_ES.ISO8859-15
        es_ES.UTF-8@euro         es_ES.UTF-8
        fi_FI.ISO8859-15@euro    fi_FI.ISO8859-15
        fr_BE.ISO8859-15@euro    fr_BE.ISO8859-15
        fr_BE.UTF-8@euro         fr_BE.UTF-8
        fr_FR.ISO8859-15@euro    fr_FR.ISO8859-15
        fr_FR.UTF-8@euro         fr_FR.UTF-8
        it_IT.ISO8859-15@euro    it_IT.ISO8859-15
        it_IT.UTF-8@euro         it_IT.UTF-8
        nl_BE.ISO8859-15@euro    nl_BE.ISO8859-15
        nl_NL.ISO8859-15@euro    nl_NL.ISO8859-15
        pt_PT.ISO8859-15@euro    pt_PT.ISO8859-15
        cz                       cs_CZ.ISO8859-2
        cs_CZ                    cs_CZ.ISO8859-2
        cs_CZ.ISO8859-2          cs_CZ.ISO8859-2
        cs_CZ.UTF-8              cs_CZ.UTF-8
        cs_CZ.UTF-8@euro         cs_CZ.UTF-8
        ko_KR.EUC                ko_KR.EUC
        ko_KR.UTF-8              ko_KR.UTF-8
        zh_CN.EUC                zh_CN.EUC
        zh_CN.GBK                zh_CN.GBK
        zh_CN.UTF-8              zh_CN.UTF-8
        zh_TW.EUC                zh_TW.EUC

        )}->{$LANG} || $LANG;

        if ($lang eq 'ko_KR.EUC') {
            $encoding = 'UHC';
        }
        elsif ($lang eq 'zh_CN.EUC') {
            $encoding = 'GBK';
        }
        elsif ($lang eq 'zh_TW.EUC') {
            $encoding = 'N/A';
        }
        elsif (my($codeset) = $lang =~ m/\A [^.]+ \. ([^@]+) /oxms) {
            $encoding = {qw(

            5601         UHC
            ANSI1251     N/A
            BIG5         Big5Plus
            BIG5HK       Big5HKSCS
            EUC          N/A
            GB18030      GB18030
            GBK          GBK
            ISO/IEC646   USASCII
            ISO8859-1    Latin1
            ISO8859-13   Latin7
            ISO8859-15   Latin9
            ISO8859-2    Latin2
            ISO8859-5    Cyrillic
            ISO8859-6    Arabic
            ISO8859-7    Greek
            ISO8859-8    Hebrew
            ISO8859-9    Latin5
            KOI8-R       KOI8R
            PCK          Sjis
            TIS620       TIS620
            TIS620-2533  TIS620
            UTF-8        UTF2
            cns11643     N/A
            eucJP        EUCJP
            gb2312       GBK

            )}->{$codeset};
        }
    }

    # HP HP-UX
    elsif ($OSNAME eq 'hpux') {

        # HP-UX 9.x
        if ($LANG =~ m/\A japanese \z/oxms) {
            $encoding = 'Sjis';
        }
        elsif ($LANG =~ m/\A japanese\.euc \z/oxms) {
            $encoding = 'EUCJP';
        }

        # HP-UX 10.x
        if ($LANG =~ m/\A ja_JP\.SJIS \z/oxms) {
            $encoding = 'Sjis';
        }
        elsif ($LANG =~ m/\A ja_JP\.eucJP \z/oxms) {
            $encoding = 'EUCJP';
        }

        # HP-UX 11.x
        elsif (my($codeset) = $LANG =~ m/\A [^.]+ \. ([^@]+) /oxms) {
            $encoding = {

            # HP-UX 11i v3 Internationalization Features
            # Appendix -- Summary of Locale and codeset Conversion Support in HP-UX 11i v3
            # Locales

            qw(

            SJIS       Sjis
            arabic8    N/A
            big5       Big5Plus
            ccdc       N/A
            cp1251     N/A
            eucJP      EUCJP
            eucKR      UHC
            eucTW      N/A
            gb18030    GB18030
            greek8     N/A
            hebrew8    N/A
            hkbig5     Big5HKSCS
            hp15CN     N/A
            iso88591   Latin1
            iso885910  Latin6
            iso885911  TIS620
            iso885913  Latin7
            iso885915  Latin9
            iso88592   Latin2
            iso88593   Latin3
            iso88594   Latin4
            iso88595   Cyrillic
            iso88596   Arabic
            iso88597   Greek
            iso88598   Hebrew
            iso88599   Latin5
            kana8      N/A
            koi8r      KOI8R
            roman8     N/A
            tis620     TIS620
            turkish8   N/A
            utf8       UTF2

            )}->{$codeset};
        }
    }

    # IBM AIX
    elsif ($OSNAME eq 'aix') {
        my $codeset = {

        # SC23-4902-03
        # AIX 5L Version 5.3
        # National Language Support Guide and Reference
        # (c) Copyright International Business Machines Corporation 2002, 2006. All rights reserved.

        qw(

        ar_AA           ISO8859-6
        AR_AA           UTF-8
        Ar_AA           IBM-1046
        ar_AE           ISO8859-6
        AR_AE           UTF-8
        ar_DZ           ISO8859-6
        AR_DZ           UTF-8
        ar_BH           ISO8859-6
        AR_BH           UTF-8
        ar_EG           ISO8859-6
        AR_EG           UTF-8
        ar_JO           ISO8859-6
        AR_JO           UTF-8
        ar_KW           ISO8859-6
        AR_KW           UTF-8
        ar_LB           ISO8859-6
        AR_LB           UTF-8
        ar_MA           ISO8859-6
        AR_MA           UTF-8
        ar_OM           ISO8859-6
        AR_OM           UTF-8
        ar_QA           ISO8859-6
        AR_QA           UTF-8
        ar_SA           ISO8859-6
        AR_SA           UTF-8
        ar_SY           ISO8859-6
        AR_SY           UTF-8
        ar_TN           ISO8859-6
        AR_TN           UTF-8
        ar_YE           ISO8859-6
        AR_YE           UTF-8
        sq_AL           ISO8859-1
        sq_AL.8859-15   ISO8859-15
        SQ_AL           UTF-8
        be_BY           ISO8859-5
        BE_BY           UTF-8
        bg_BG           ISO8859-5
        BG_BG           UTF-8
        ca_ES.IBM-1252  IBM-1252
        ca_ES           ISO8859-1
        ca_ES.8859-15   ISO8859-15
        CA_ES           UTF-8
        Ca_ES           IBM-850
        zh_TW           IBM-eucTW
        ZH_TW           UTF-8
        Zh_TW           big5
        zh_CN           IBM-eucCN
        ZH_CN           UTF-8
        Zh_CN           GBK/GB18030
        ZH_HK           UTF-8
        ZH_SG           UTF-8
        hr_HR           ISO8859-2
        HR_HR           UTF-8
        cs_CZ           ISO8859-2
        CS_CZ           UTF-8
        da_DK           ISO8859-1
        da_DK.8859-15   ISO8859-15
        DA_DK           UTF-8
        nl_BE.IBM-1252  IBM-1252
        nl_BE           ISO8859-1
        nl_BE.8859-15   ISO8859-15
        NL_BE           UTF-8
        nl_NL.IBM-1252  IBM-1252
        nl_NL           ISO8859-1
        nl_NL.8859-15   ISO8859-15
        NL_NL           UTF-8
        en_AU.8859-15   ISO8859-15
        EN_AU           UTF-8
        en_BE.8859-15   ISO8859-15
        EN_BE           UTF-8
        en_CA.8859-15   ISO8859-15
        EN_CA           UTF-8
        en_GB.IBM-1252  IBM-1252
        en_GB           ISO8859-1
        en_GB.8859-15   ISO8859-15
        EN_GB           UTF-8
        en_HK           ISO8859-15
        EN_HK           UTF-8
        en_IE.8859-15   ISO8859-15
        EN_IE           UTF-8
        en_IN.8859-15   ISO8859-15
        EN_IN           UTF-8
        en_NZ.8859-15   ISO8859-15
        EN_NZ           UTF-8
        en_PH           ISO8859-15
        EN_PH           UTF-8
        en_SG           ISO8859-15
        EN_SG           UTF-8
        en_US           ISO8859-1
        en_US.8859-15   ISO8859-15
        EN_US           UTF-8
        en_ZA.8859-15   ISO8859-15
        EN_ZA           UTF-8
        Et_EE           IBM-922
        et_EE           ISO8859-4
        ET_EE           UTF-8
        fi_FI.IBM-1252  IBM-1252
        fi_FI           ISO8859-1
        fi_FI.8859-15   ISO8859-15
        FI_FI           UTF-8
        fr_BE.IBM-1252  IBM-1252
        fr_BE           ISO8859-1
        fr_BE.8859-15   ISO8859-15
        FR_BE           UTF-8
        fr_CA           ISO8859-1
        fr_CA.8859-15   ISO8859-15
        FR_CA           UTF-8
        fr_FR.IBM-1252  IBM-1252
        fr_FR           ISO8859-1
        fr_FR.8859-15   ISO8859-15
        FR_FR           UTF-8
        fr_LU.8859-15   ISO8859-1
        FR_LU           ISO8859-1
        fr_CH           ISO8859-1
        fr_CH.8859-15   ISO8859-15
        FR_CH           UTF-8
        de_AT.8859-15   ISO8859-15
        DE_AT           UTF-8
        de_CH           ISO8859-1
        de_CH.8859-15   ISO8859-15
        DE_CH           UTF-8
        de_DE.IBM-1252  IBM-1252
        de_DE           ISO8859-1
        de_DE.8859-15   ISO8859-15
        DE_DE           UTF-8
        de_LU.8859-15   ISO8859-15
        DE_LU           UTF-8
        el_GR           ISO8859-7
        EL_GR           UTF-8
        iw_IL           ISO8859-8
        HE_IL           UTF-8
        Iw_IL           IBM-856
        hu_HU           ISO8859-2
        HU_HU           UTF-8
        is_IS           ISO8859-1
        is_IS.8859-15   ISO8859-15
        IS_IS           UTF-8
        AS_IN           UTF-8
        BN_IN           UTF-8
        GU_IN           UTF-8
        HI_IN           UTF-8
        KN_IN           UTF-8
        ML_IN           UTF-8
        MR_IN           UTF-8
        OR_IN           UTF-8
        PA_IN           UTF-8
        TA_IN           UTF-8
        TE_IN           UTF-8
        it_IT.IBM-1252  IBM-1252
        it_IT           ISO8859-1
        it_IT.8859-15   ISO8859-15
        IT_IT           UTF-8
        it_CH.8859-15   ISO8859-15
        IT_CH           UTF-8
        ja_JP           IBM-eucJP
        JA_JP           UTF-8
        Ja_JP           IBM-943
        KK_KZ           UTF-8
        ko_KR           IBM-eucKR
        KO_KR           UTF-8
        id_ID           ISO8859-15
        ID_ID           UTF-8
        Lv_LV           IBM-921
        lv_LV           ISO8859-4
        LV_LV           UTF-8
        Lt_LT           IBM-921
        lt_LT           ISO8859-4
        LT_LT           UTF-8
        mk_MK           ISO8859-5
        MK_MK           UTF-8
        ms_MY           ISO8859-15
        MS_MY           UTF-8
        no_NO           ISO8859-1
        no_NO.8859-15   ISO8859-15
        NO_NO           UTF-8
        pl_PL           ISO8859-2
        PL_PL           UTF-8
        pt_BR           ISO8859-1
        pt_BR.8859-15   ISO8859-15
        PT_BR           UTF-8
        pt_PT.IBM-1252  IBM-1252
        pt_PT           ISO8859-1
        pt_PT.8859-15   ISO8859-15
        PT_PT           UTF-8
        ro_RO           ISO8859-2
        RO_RO           UTF-8
        ru_RU           ISO8859-5
        RU_RU           UTF-8
        sr_SP           ISO8859-5
        SR_SP           UTF-8
        sr_YU           ISO8859-5
        SR_YU           UTF-8
        sh_SP           ISO8859-2
        SH_SP           UTF-8
        sh_YU           ISO8859-2
        SH_YU           UTF-8
        sk_SK           ISO8859-2
        SK_SK           UTF-8
        sl_SI           ISO8859-2
        SL_SI           UTF-8
        es_AR.8859-15   ISO8859-15
        ES_AR           UTF-8
        es_BO           ISO8859-15
        ES_BO           UTF-8
        es_CL.8859-15   ISO8859-15
        ES_CL           UTF-8
        es_CO.8859-15   ISO8859-15
        ES_CO           UTF-8
        es_CR           ISO8859-15
        ES_CR           UTF-8
        es_DO           ISO8859-15
        ES_DO           UTF-8
        es_EC           ISO8859-15
        ES_EC           UTF-8
        es_GT           ISO8859-15
        ES_GT           UTF-8
        es_HN           ISO8859-15
        ES_HN           UTF-8
        es_ES.IBM-1252  IBM-1252
        es_ES           ISO8859-1
        es_ES.8859-15   ISO8859-15
        ES_ES           UTF-8
        es_MX.8859-15   ISO8859-15
        ES_MX           UTF-8
        es_NI           ISO8859-15
        ES_NI           UTF-8
        es_PA           ISO8859-15
        ES_PA           UTF-8
        es_PY           ISO8859-15
        ES_PY           UTF-8
        es_PE.8859-15   ISO8859-15
        ES_PE           UTF-8
        es_PR.8859-15   ISO8859-15
        ES_PR           UTF-8
        es_US           ISO8859-15
        ES_US           UTF-8
        es_UY.8859-15   ISO8859-15
        ES_UY           UTF-8
        es_VE.8859-15   ISO8859-15
        ES_VE           UTF-8
        sv_SE           ISO8859-1
        sv_SE.8859-15   ISO8859-15
        SV_SE           UTF-8
        th_TH           TIS-620
        TH_TH           UTF-8
        tr_TR           ISO8859-9
        TR_TR           UTF-8
        Uk_UA           IBM-1124
        UK_UA           UTF-8
        Vi_VN           IBM-1129
        VI_VN           UTF-8

        )}->{$LANG};

        $encoding = {qw(

        GBK/GB18030  GB18030
        IBM-1046     N/A
        IBM-1124     N/A
        IBM-1129     N/A
        IBM-1252     N/A
        IBM-850      N/A
        IBM-856      N/A
        IBM-921      N/A
        IBM-922      N/A
        IBM-943      Sjis
        IBM-eucCN    GBK
        IBM-eucJP    EUCJP
        IBM-eucKR    UHC
        IBM-eucTW    N/A
        ISO8859-1    Latin1
        ISO8859-15   Latin9
        ISO8859-2    Latin2
        ISO8859-4    Latin4
        ISO8859-5    Cyrillic
        ISO8859-6    Arabic
        ISO8859-7    Greek
        ISO8859-8    Hebrew
        ISO8859-9    Latin5
        TIS-620      TIS620
        UTF-8        UTF2
        big5         Big5Plus

        )}->{$codeset};
    }

    # Other Systems
    if ($encoding eq '') {
        if ($encoding = {qw(

            ja            EUCJP
            ja_JP         EUCJP
            ja_JP.ujis    EUCJP
            ja_JP.eucJP   EUCJP
            Jp_JP         EUCJP
            ja_JP.AJEC    EUCJP
            ja_JP.EUC     EUCJP
            ja_JP.mscode  Sjis
            ja_JP.SJIS    Sjis
            ja_JP.PCK     Sjis
            ja_JP.UTF-8   UTF2
            ja_JP.utf8    UTF2
            japanese      Sjis
            japanese.euc  EUCJP
            japan         EUCJP
            Japanese-EUC  EUCJP

            )}->{$LANG}) {
        }
        elsif (my($codeset) = $LANG =~ m/\A [^.]+ \. ([^@]+) /oxms) {
            $encoding = {qw(

            UTF-8        UTF2
            UTF8         UTF2
            ISO_8859-1   Latin1
            ISO_8859-2   Latin2
            ISO_8859-3   Latin3
            ISO_8859-4   Latin4
            ISO_8859-5   Cyrillic
            ISO_8859-6   Arabic
            ISO_8859-7   Greek
            ISO_8859-8   Hebrew
            ISO_8859-9   Latin5
            ISO_8859-10  Latin6
            ISO_8859-11  TIS620
            ISO_8859-13  Latin7
            ISO_8859-14  Latin8
            ISO_8859-15  Latin9
            ISO_8859-16  Latin10
            ISO-8859-1   Latin1
            ISO-8859-2   Latin2
            ISO-8859-3   Latin3
            ISO-8859-4   Latin4
            ISO-8859-5   Cyrillic
            ISO-8859-6   Arabic
            ISO-8859-7   Greek
            ISO-8859-8   Hebrew
            ISO-8859-9   Latin5
            ISO-8859-10  Latin6
            ISO-8859-11  TIS620
            ISO-8859-13  Latin7
            ISO-8859-14  Latin8
            ISO-8859-15  Latin9
            ISO-8859-16  Latin10
            ISO8859-1    Latin1
            ISO8859-2    Latin2
            ISO8859-3    Latin3
            ISO8859-4    Latin4
            ISO8859-5    Cyrillic
            ISO8859-6    Arabic
            ISO8859-7    Greek
            ISO8859-8    Hebrew
            ISO8859-9    Latin5
            ISO8859-10   Latin6
            ISO8859-11   TIS620
            ISO8859-13   Latin7
            ISO8859-14   Latin8
            ISO8859-15   Latin9
            ISO8859-16   Latin10
            ISO88591     Latin1
            ISO88592     Latin2
            ISO88593     Latin3
            ISO88594     Latin4
            ISO88595     Cyrillic
            ISO88596     Arabic
            ISO88597     Greek
            ISO88598     Hebrew
            ISO88599     Latin5
            ISO885910    Latin6
            ISO885911    TIS620
            ISO885913    Latin7
            ISO885914    Latin8
            ISO885915    Latin9
            ISO885916    Latin10
            KOI8-R       KOI8R
            KOI8-U       KOI8U

            )}->{uc $codeset};
        }
    }

    return $encoding;
}

#
# get absolute path to filter software
#
sub abspath {
    my($encoding) = @_;
    for my $path (@INC) {
        if ($OSNAME eq 'MacOS') {
            if (-e "$path$encoding.pm") {
                return "$path$encoding.pm";
            }
        }
        else {
            if (-e "$path/$encoding.pm") {
                return "$path/$encoding.pm";
            }
        }
    }
    return '';
}

1;

__END__

=pod

=head1 NAME

Char - Character Oriented Perl by Magic Comment

=head1 SYNOPSIS

  # encoding: sjis
  use Char;
  use Char ver.sion;             --- requires minimum version
  use Char ver.sion.0;           --- expects version (match or die)

  subroutines:
    Char::ord(...);
    Char::reverse(...);
    Char::getc(...);
    Char::length(...);
    Char::substr(...);
    Char::index(...);
    Char::rindex(...);

  # "no Char;" not supported

=head1 SOFTWARE COMPOSITION

   Char.pm --- Character Oriented Perl by Magic Comment

=head1 OTHER SOFTWARE

To using this software, you must get filter software of 'Sjis software family'.
See also following 'SEE ALSO'.

INSTALLATION BY MAKE (for UNIX-like system)

To install this software by make, type the following:

   perl Makefile.PL
   make
   make test
   make install

INSTALLATION WITHOUT MAKE (for DOS-like system)

To install this software without make, type the following:

   perl pMakefile.PL    --- pMakefile.PL makes "pmake.bat" only, and ...
   pmake.bat
   pmake.bat test
   pmake.bat install    --- install to current using Perl

   pmake.bat dist       --- make distribution package
   pmake.bat ptar.bat   --- make perl script "ptar.bat"

=head1 DEPENDENCIES

This software requires perl5.00503 or later.

=head1 MAGIC COMMENT

You should show the encoding method of your script by either of the following
descriptions. (.+) is an encoding method. It is necessary to describe this
description from anywhere of the script.

  m/coding[:=]\s*(.+)/oxms

  Example:

  # -*- coding: Shift_JIS -*-
  print "Emacs like\n";

  # vim:fileencoding=Latin-1
  print "Vim like 1";

  # vim:set fileencoding=GB18030 :
  print "Vim like 2";

  #coding:Modified UTF-8
  print "simple";

=head1 ENCODING METHOD

The encoding method is evaluated, after it is regularized.

  regularize:
    1. The upper case characters are converted into lower case.
    2. Left only alphabet and number, others are removed.

The filter software is selected by using the following tables. The script does
die if there is no filter software.

  -----------------------------------
  encoding method     filter software
  -----------------------------------
  ascii               USASCII
  usascii             USASCII
  shiftjis            Sjis
  shiftjisx0213       Sjis
  shiftjis2004        Sjis
  sjis                Sjis
  sjisx0213           Sjis
  sjis2004            Sjis
  cp932               Sjis
  windows31j          Sjis
  cswindows31j        Sjis
  sjiswin             Sjis
  macjapanese         Sjis
  macjapan            Sjis
  xsjis               Sjis
  mskanji             Sjis
  csshiftjis          Sjis
  windowscodepage932  Sjis
  ibmcp943            Sjis
  ms932               Sjis
  jisc6220            JIS8
  jisx0201            JIS8
  jis8                JIS8
  ank                 JIS8
  eucjp               EUCJP
  euc                 EUCJP
  ujis                EUCJP
  eucjpms             EUCJP
  eucjpwin            EUCJP
  cp51932             EUCJP
  utf8                UTF2
  utf2                UTF2
  utffss              UTF2
  utf8mac             UTF2
  cesu8               OldUTF8
  modifiedutf8        OldUTF8
  hp15                HP15
  informixv6als       INFORMIXV6ALS
  gb18030             GB18030
  gbk                 GBK
  gb2312              GBK
  cp936               GBK
  euccn               GBK
  uhc                 UHC
  ksx1001             UHC
  ksc5601             UHC
  ksc56011987         UHC
  ks                  UHC
  cp949               UHC
  windows949          UHC
  kps9566             KPS9566
  kps95662003         KPS9566
  kps95662000         KPS9566
  kps95661997         KPS9566
  kps956697           KPS9566
  euckp               KPS9566
  big5plus            Big5Plus
  big5                Big5Plus
  big5et              Big5Plus
  big5eten            Big5Plus
  tcabig5             Big5Plus
  cp950               Big5Plus
  big5hk              Big5HKSCS
  big5hkscs           Big5HKSCS
  hkbig5              Big5HKSCS
  hkscsbig5           Big5HKSCS
  latin1              Latin1
  isoiec88591         Latin1
  iso88591            Latin1
  iec88591            Latin1
  latin2              Latin2
  isoiec88592         Latin2
  iso88592            Latin2
  iec88592            Latin2
  latin3              Latin3
  isoiec88593         Latin3
  iso88593            Latin3
  iec88593            Latin3
  latin4              Latin4
  isoiec88594         Latin4
  iso88594            Latin4
  iec88594            Latin4
  cyrillic            Cyrillic
  isoiec88595         Cyrillic
  iso88595            Cyrillic
  iec88595            Cyrillic
  koi8r               KOI8R
  koi8u               KOI8U
  arabic              Arabic
  isoiec88596         Arabic
  iso88596            Arabic
  iec88596            Arabic
  greek               Greek
  isoiec88597         Greek
  iso88597            Greek
  iec88597            Greek
  hebrew              Hebrew
  isoiec88598         Hebrew
  iso88598            Hebrew
  iec88598            Hebrew
  latin5              Latin5
  isoiec88599         Latin5
  iso88599            Latin5
  iec88599            Latin5
  latin6              Latin6
  isoiec885910        Latin6
  iso885910           Latin6
  iec885910           Latin6
  tis620              TIS620
  tis6202533          TIS620
  isoiec885911        TIS620
  iso885911           TIS620
  iec885911           TIS620
  latin7              Latin7
  isoiec885913        Latin7
  iso885913           Latin7
  iec885913           Latin7
  latin8              Latin8
  isoiec885914        Latin8
  iso885914           Latin8
  iec885914           Latin8
  latin9              Latin9
  isoiec885915        Latin9
  iso885915           Latin9
  iec885915           Latin9
  latin10             Latin10
  isoiec885916        Latin10
  iso885916           Latin10
  iec885916           Latin10
  windows1252         Windows1252
  windows1258         Windows1258
  -----------------------------------

=head1 CHARACTER ORIENTED SUBROUTINES

=over 2

=item * Order of Character

  $ord = Char::ord($string);

  This subroutine returns the numeric value (ASCII or Multibyte Character) of the
  first character of $string. The return value is always unsigned.

=item * Reverse List or String

  @reverse = Char::reverse(@list);
  $reverse = Char::reverse(@list);

  In list context, this subroutine returns a list value consisting of the elements
  of @list in the opposite order. The subroutine can be used to create descending
  sequences:

  for (Char::reverse(1 .. 10)) { ... }

  Because of the way hashes flatten into lists when passed as a @list, reverse can
  also be used to invert a hash, presuming the values are unique:

  %barfoo = Char::reverse(%foobar);

  In scalar context, the subroutine concatenates all the elements of LIST and then
  returns the reverse of that resulting string, character by character.

=item * Returns Next Character

  $getc = Char::getc(FILEHANDLE);
  $getc = Char::getc($filehandle);
  $getc = Char::getc;

  This subroutine returns the next character from the input file attached to
  FILEHANDLE. It returns undef at end-of-file, or if an I/O error was encountered.
  If FILEHANDLE is omitted, the subroutine reads from STDIN.

  This subroutine is somewhat slow, but it's occasionally useful for single-character
  input from the keyboard -- provided you manage to get your keyboard input
  unbuffered. This subroutine requests unbuffered input from the standard I/O library.
  Unfortunately, the standard I/O library is not so standard as to provide a portable
  way to tell the underlying operating system to supply unbuffered keyboard input to
  the standard I/O system. To do that, you have to be slightly more clever, and in
  an operating-system-dependent fashion. Under Unix you might say this:

  if ($BSD_STYLE) {
      system "stty cbreak </dev/tty >/dev/tty 2>&1";
  }
  else {
      system "stty", "-icanon", "eol", "\001";
  }

  $key = Char::getc;

  if ($BSD_STYLE) {
      system "stty -cbreak </dev/tty >/dev/tty 2>&1";
  }
  else {
      system "stty", "icanon", "eol", "^@"; # ASCII NUL
  }
  print "\n";

  This code puts the next character typed on the terminal in the string $key. If your
  stty program has options like cbreak, you'll need to use the code where $BSD_STYLE
  is true. Otherwise, you'll need to use the code where it is false.

=item * Length by Character

  $length = Char::length($string);
  $length = Char::length();

  This subroutine returns the length in characters of the scalar value $string. If
  $string is omitted, it returns the Char::length of $_.

  Do not try to use length to find the size of an array or hash. Use scalar @array
  for the size of an array, and scalar keys %hash for the number of key/value pairs
  in a hash. (The scalar is typically omitted when redundant.)

  To find the length of a string in bytes rather than characters, say:

  $blen = length($string);

  or

  $blen = CORE::length($string);

=item * Substr by Character

  $substr = Char::substr($string,$offset,$length,$replacement);
  $substr = Char::substr($string,$offset,$length);
  $substr = Char::substr($string,$offset);

  This subroutine extracts a substring out of the string given by $string and
  returns it. The substring is extracted starting at $offset characters from the
  front of the string.
  If $offset is negative, the substring starts that far from the end of the string
  instead. If $length is omitted, everything to the end of the string is returned.
  If $length is negative, the length is calculated to leave that many characters off
  the end of the string. Otherwise, $length indicates the length of the substring to
  extract, which is sort of what you'd expect.

  An alternative to using Char::substr as an lvalue is to specify the $replacement
  string as the fourth argument. This allows you to replace parts of the $string and
  return what was there before in one operation, just as you can with splice. The next
  example also replaces the last character of $var with "Curly" and puts that replaced
  character into $oldstr: 

  $oldstr = Char::substr($var, -1, 1, "Curly");

  If you assign something shorter than the length of your substring, the string will
  shrink, and if you assign something longer than the length, the string will grow to
  accommodate it. To keep the string the same length, you may need to pad or chop your
  value using sprintf or the x operator. If you attempt to assign to an unallocated
  area past the end of the string, Char::substr raises an exception.

  To prepend the string "Larry" to the current value of $_, use:

  Char::substr($var, 0, 0, "Larry");

  To instead replace the first character of $_ with "Moe", use:

  Char::substr($var, 0, 1, "Moe");

  And finally, to replace the last character of $var with "Curly", use:

  Char::substr($var, -1, 1, "Curly");

=item * Index by Character

  $index = Char::index($string,$substring,$offset);
  $index = Char::index($string,$substring);

  This subroutine searches for one string within another. It returns the position of
  the first occurrence of $substring in $string. The $offset, if specified, says how
  many characters from the start to skip before beginning to look. Positions are
  based at 0. If the substring is not found, the subroutine returns one less than
  the base, ordinarily -1. To work your way through a string, you might say:

  $pos = -1;
  while (($pos = Char::index($string, $lookfor, $pos)) > -1) {
      print "Found at $pos\n";
      $pos++;
  }

  Three Indexes
  -------------------------------------------------------------------------
  Function       Works as    Returns as   Description
  -------------------------------------------------------------------------
  index          Character   Byte         JPerl semantics (most useful)
  Char::index    Character   Character    Character-oriented semantics
  CORE::index    Byte        Byte         Byte-oriented semantics
  -------------------------------------------------------------------------

=item * Rindex by Character

  $rindex = Char::rindex($string,$substring,$position);
  $rindex = Char::rindex($string,$substring);

  This subroutine works just like Char::index except that it returns the position
  of the last occurrence of $substring in $string (a reverse index). The subroutine
  returns -1 if not $substring is found. $position, if specified, is the rightmost
  position that may be returned. To work your way through a string backward, say:

  $pos = Char::length($string);
  while (($pos = Char::rindex($string, $lookfor, $pos)) >= 0) {
      print "Found at $pos\n";
      $pos--;
  }

  Three Rindexes
  -------------------------------------------------------------------------
  Function       Works as    Returns as   Description
  -------------------------------------------------------------------------
  rindex         Character   Byte         JPerl semantics (most useful)
  Char::rindex   Character   Character    Character-oriented semantics
  CORE::rindex   Byte        Byte         Byte-oriented semantics
  -------------------------------------------------------------------------

=back

=head1 AUTHOR

INABA Hitoshi E<lt>ina@cpan.orgE<gt>

This project was originated by INABA Hitoshi.

=head1 LICENSE AND COPYRIGHT

This software is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See L<perlartistic>.

This software is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

=head1 SEE ALSO

 Other Tools
 http://search.cpan.org/dist/jacode/

 BackPAN
 http://backpan.perl.org/authors/id/I/IN/INA/

=head1 ACKNOWLEDGEMENTS

This software was made referring to software and the document that the
following hackers or persons had made. Especially, Yukihiro Matsumoto taught
to us,

CSI is not impossible.

I am thankful to all persons.

 Larry Wall, Perl
 http://www.perl.org/

 Yukihiro "Matz" Matsumoto, YAPC::Asia2006 Ruby on Perl(s)
 http://www.rubyist.net/~matz/slides/yapc2006/

 About Ruby M17N in Rubyist Magazine
 http://jp.rubyist.net/magazine/?0025-Ruby19_m17n#l13

=cut