The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#! /bin/false
# vim: set autoindent shiftwidth=4 tabstop=4:

# List of internally known conversions.
# Copyright (C) 2002-2017 Guido Flohr <guido.flohr@cantanea.com>,
# all rights reserved.

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

package Locale::Recode::_Conversions;

use strict;
use integer;

use vars qw ($conversions $optional_conversions);

# These are the canonical names of the encodings always available.
$conversions = {
	'ASMO_449' => 'ASMO_449',
	'ATARI-ST-EURO' => 'ATARI_ST_EURO',
	'ATARI-ST' => 'ATARI_ST',
	'CP10007' => 'CP10007',
	'CSN_369103' => 'CSN_369103',
	'CWI' => 'CWI',
	'DEC-MCS' => 'DEC_MCS',
	'EBCDIC-AT-DE-A' => 'EBCDIC_AT_DE_A',
	'EBCDIC-AT-DE' => 'EBCDIC_AT_DE',
	'EBCDIC-CA-FR' => 'EBCDIC_CA_FR',
	'EBCDIC-DK-NO-A' => 'EBCDIC_DK_NO_A',
	'EBCDIC-DK-NO' => 'EBCDIC_DK_NO',
	'EBCDIC-ES-A' => 'EBCDIC_ES_A',
	'EBCDIC-ES-S' => 'EBCDIC_ES_S',
	'EBCDIC-ES' => 'EBCDIC_ES',
	'EBCDIC-FI-SE-A' => 'EBCDIC_FI_SE_A',
	'EBCDIC-FI-SE' => 'EBCDIC_FI_SE',
	'EBCDIC-FR' => 'EBCDIC_FR',
	'EBCDIC-IS-FRISS' => 'EBCDIC_IS_FRISS',
	'EBCDIC-IT' => 'EBCDIC_IT',
	'EBCDIC-PT' => 'EBCDIC_PT',
	'EBCDIC-UK' => 'EBCDIC_UK',
	'EBCDIC-US' => 'EBCDIC_US',
	'ECMA-CYRILLIC' => 'ECMA_CYRILLIC',
	'GEORGIAN-ACADEMY' => 'GEORGIAN_ACADEMY',
	'GEORGIAN-PS' => 'GEORGIAN_PS',
	'GOST_19768-74' => 'GOST_19768_74',
	'GREEK-CCITT' => 'GREEK_CCITT',
	'GREEK7-OLD' => 'GREEK7_OLD',
	'GREEK7' => 'GREEK7',
	'HP-ROMAN8' => 'HP_ROMAN8',
	'IBM037' => 'IBM037',
	'IBM038' => 'IBM038',
	'IBM1004' => 'IBM1004',
	'IBM1026' => 'IBM1026',
	'IBM1047' => 'IBM1047',
	'IBM256' => 'IBM256',
	'IBM273' => 'IBM273',
	'IBM274' => 'IBM274',
	'IBM275' => 'IBM275',
	'IBM277' => 'IBM277',
	'IBM278' => 'IBM278',
	'IBM280' => 'IBM280',
	'IBM281' => 'IBM281',
	'IBM284' => 'IBM284',
	'IBM285' => 'IBM285',
	'IBM290' => 'IBM290',
	'IBM297' => 'IBM297',
	'IBM420' => 'IBM420',
	'IBM423' => 'IBM423',
	'IBM424' => 'IBM424',
	'IBM437' => 'IBM437',
	'IBM500' => 'IBM500',
	'IBM850' => 'IBM850',
	'IBM851' => 'IBM851',
	'IBM852' => 'IBM852',
	'IBM855' => 'IBM855',
	'IBM857' => 'IBM857',
	'IBM860' => 'IBM860',
	'IBM861' => 'IBM861',
	'IBM862' => 'IBM862',
	'IBM863' => 'IBM863',
	'IBM864' => 'IBM864',
	'IBM865' => 'IBM865',
	'IBM866' => 'IBM866',
	'IBM868' => 'IBM868',
	'IBM869' => 'IBM869',
	'IBM870' => 'IBM870',
	'IBM871' => 'IBM871',
	'IBM874' => 'IBM874',
	'IBM875' => 'IBM875',
	'IBM880' => 'IBM880',
	'IBM891' => 'IBM891',
	'IBM903' => 'IBM903',
	'IBM904' => 'IBM904',
	'IBM905' => 'IBM905',
	'IBM918' => 'IBM918',
	'IEC_P27-1' => 'IEC_P27_1',
	'INIS-8' => 'INIS_8',
	'INIS-CYRILLIC' => 'INIS_CYRILLIC',
	'INIS' => 'INIS',
	'ISO-8859-1' => 'ISO_8859_1',
	'ISO-8859-10' => 'ISO_8859_10',
	'ISO-8859-11' => 'ISO_8859_11',
	'ISO-8859-13' => 'ISO_8859_13',
	'ISO-8859-14' => 'ISO_8859_14',
	'ISO-8859-15' => 'ISO_8859_15',
	'ISO-8859-16' => 'ISO_8859_16',
	'ISO-8859-2' => 'ISO_8859_2',
	'ISO-8859-3' => 'ISO_8859_3',
	'ISO-8859-4' => 'ISO_8859_4',
	'ISO-8859-5' => 'ISO_8859_5',
	'ISO-8859-6' => 'ISO_8859_6',
	'ISO-8859-7' => 'ISO_8859_7',
	'ISO-8859-8' => 'ISO_8859_8',
	'ISO-8859-9' => 'ISO_8859_9',
	'ISO_10367-BOX' => 'ISO_10367_BOX',
	'ISO_2033-1983' => 'ISO_2033_1983',
	'ISO_5427-EXT' => 'ISO_5427_EXT',
	'ISO_5427' => 'ISO_5427',
	'ISO_5428' => 'ISO_5428',
	'KOI-8' => 'KOI_8',
	'KOI8-R' => 'KOI8_R',
	'KOI8-RU' => 'KOI8_RU',
	'KOI8-T' => 'KOI8_T',
	'KOI8-U' => 'KOI8_U',
	'LATIN-GREEK-1' => 'LATIN_GREEK_1',
	'LATIN-GREEK' => 'LATIN_GREEK',
	'MACINTOSH' => 'MACINTOSH',
	'MACARABIC' => 'MACARABIC',
	'MACCYRILLIC' => 'MACCYRILLIC',
	'MACCROATIAN' => 'MACCROATIAN',
	'MACGREEK' => 'MACGREEK',
	'MACHEBREW' => 'MACHEBREW',
	'MACICELAND' => 'MACICELAND',
	'MACROMANIA' => 'MACROMANIA',
	'MACTHAI' => 'MACTHAI',
	'MACTURKISH' => 'MACTURKISH',
	'MACUKRAINE' => 'MACUKRAINE',
	'MAC-IS' => 'MAC_IS',
	'MAC-SAMI' => 'MAC_SAMI',
	'MAC-UK' => 'MAC_UK',
	'NATS-DANO' => 'NATS_DANO',
	'NATS-SEFI' => 'NATS_SEFI',
	'NEXTSTEP' => 'NEXTSTEP',
	'TIS-620' => 'TIS_620',
	'UTF-8' => 'UTF_8',
	'VISCII' => 'VISCII',
	'WIN-SAMI-2' => 'SAMI_WS2',
	'WINDOWS-1250' => 'CP1250',
	'WINDOWS-1251' => 'CP1251',
	'WINDOWS-1252' => 'CP1252',
	'WINDOWS-1253' => 'CP1253',
	'WINDOWS-1254' => 'CP1254',
	'WINDOWS-1256' => 'CP1256',
	'WINDOWS-1257' => 'CP1257',
	'US-ASCII' => 'US_ASCII',
};

# These encodings are maybe available via Encode(3pm).
$optional_conversions = {
	'BIG5' => undef,
	'BIG5-HKSCS' => undef,
	'CN-GB' => undef,
	'CN-GB-ISOIR165' => undef,
	'CP1006' => undef,
	'CP1026' => undef,
	'CP1047' => undef,
	'CP1361' => undef,
	'CP949' => undef,
    'CP37' => undef,
    'CP424' => undef,
    'CP500' => undef,
    'CP737' => undef,
    'CP775' => undef,
    'CP856' => undef,
    'CP874' => undef,
    'CP875' => undef,
    'CP932' => undef,
    'CP936' => undef,
    'CP950' => undef,
	'EUC-JP' => undef,
	'EUC-KR' => undef,
	'EUC-TW' => undef,
    # mapping from 0xef to 0xff missing.
	# 'HP-ROMAN8' => undef,
	'GB18030' => undef,
	'HZ' => undef,
	'IBM437' => undef,
	'IBM850' => undef,
	'IBM852' => undef,
	'IBM855' => undef,
	'IBM857' => undef,
	'IBM860' => undef,
	'IBM861' => undef,
	'IBM862' => undef,
	'IBM863' => undef,
	'IBM864' => undef,
	'IBM865' => undef,
	'IBM866' => undef,
	'IBM869' => undef,
	'ISO-10646-UCS-2' => undef,
	'ISO-10646-UCS-4' => undef,
	'ISO-2022-JP' => undef,
	'ISO-2022-JP-1' => undef,
	'ISO-2022-KR' => undef,
	'ISO-8859-1' => undef,
	'ISO-8859-10' => undef,
	# This is broken in some versions of Encode.
	# 'ISO-8859-11' => undef,
	'ISO-8859-13' => undef,
	'ISO-8859-14' => undef,
	'ISO-8859-15' => undef,
    # Errors at 0xa5 and 0xab.
	# 'ISO-8859-16' => undef,
	'ISO-8859-2' => undef,
	'ISO-8859-3' => undef,
	'ISO-8859-4' => undef,
	'ISO-8859-5' => undef,
    # Uses arabic digits in ascii range?!
	# 'ISO-8859-6' => undef,
    # 0xa1 and 0xa2 are incorrectly encoded.
	# 'ISO-8859-7' => undef,
    # 0xfd and 0xfe are missing.
	# 'ISO-8859-8' => undef,
	'ISO-8859-9' => undef,
	'ISO-IR-149' => undef,
	'KOI8-R' => undef,
    # 0x95 is BULLET, not BULLET OPERATOR.
	# 'KOI8-U' => undef,
    # Seems to be messed up in certain Encode versions.
	# 'MACINTOSH' => undef,
	# TODO: Check other Mac encodings for correctness.
	# Nextstep is completely broken in my version of Encode.
	# 'NEXTSTEP' => undef,
	'SHIFT_JIS' => undef,
	'UCS-2BE' => undef,
	'UCS-2LE' => undef,
	'UCS-4BE' => undef,
	'UCS-4LE' => undef,
	'US-ASCII' => undef,
	'UTF-16' => undef,
	'UTF-16BE' => undef,
	'UTF-16LE' => undef,
	'UTF-32' => undef,
	'UTF-32BE' => undef,
	'UTF-32LE' => undef,
	'UTF-8' => undef,
    # 0x86 is missing, 0xa6 is incorrectly encoded.
	# 'VISCII' => undef,
	'WINDOWS-1250' => undef,
	'WINDOWS-1251' => undef,
	'WINDOWS-1252' => undef,
	'WINDOWS-1253' => undef,
	'WINDOWS-1254' => undef,
	'WINDOWS-1255' => undef,
	'WINDOWS-1256' => undef,
	'WINDOWS-1257' => undef,
	'WINDOWS-1258' => undef,
};

my $has_encode;

sub resolveAlias
{
	my (undef, $encoding) = @_;

	$encoding = uc $encoding;
	
	return $encoding if exists $conversions->{$encoding};
	return $encoding if exists $optional_conversions->{$encoding};

	require Locale::Recode::_Aliases;

	my $resolved = Locale::Recode::_Aliases::ALIASES()->{$encoding};
	
	return $resolved if $resolved;

	return;
}

sub isSupported
{
	my ($class, $encoding) = @_;

	return unless defined $encoding && length $encoding;

	$encoding = uc $encoding;
	my $mimename = $class->resolveAlias ($encoding);

	return unless $mimename;
	
	# Determine the correct module.
	if (exists $optional_conversions->{$mimename}) {
		unless (defined $has_encode) {
			eval "require Encode";
			$has_encode = !$@;

			if ($has_encode) {
				require Encode::Alias;

				# Add missing real names.
				Encode::Alias::define_alias (MS_KANJI => 'ShiftJIS');
				Encode::Alias::define_alias ('CN-GB' => 'EUC-CN');
			}
		}

		if ($has_encode) {
			# Now check whether Encode really supports that encoding.
			eval "Encode::encode ('$mimename', 'x')";
		
			unless ($@) {
				$conversions->{$mimename} = '_Encode';
			}
			delete $optional_conversions->{$mimename};
		}
	}

	return $conversions->{$mimename} if exists $conversions->{$mimename};

	return;
}

sub listSupported
{
	my ($class) = @_;

	foreach my $opt (keys %$optional_conversions) {
		$class->isSupported ($opt);
	}

	my @list = keys %$conversions;
	return @list;
}

# Find a conversion path.
sub findPath
{
	my ($class, $from, $to) = @_;

	$from = 'INTERNAL' eq uc $from ? 'INTERNAL' : $class->resolveAlias ($from);
	$to = 'INTERNAL' eq uc $to ? 'INTERNAL' : $class->resolveAlias ($to);
	
	return unless $from && $to;
	
	return [] if $from eq $to;

	my $from_module = $class->isSupported ($from);
	my $to_module = $class->isSupported ($to);

	if (!$from_module) {
		if ('INTERNAL' eq $from) {
			$from_module = $to_module or return;
		} else {
			return;
		}
	}

	if (!$to_module) {
		if ('INTERNAL' eq $to) {
			$to_module = $from_module or return;
		} else {
			return;
		}
	}

	if ($from_module eq $to_module
		|| $to eq 'INTERNAL'
		|| $to eq 'UTF-8') {
		return [[ $from_module, $from, $to ]];
	} elsif ($from eq 'INTERNAL') {
		return [[ $to_module, $from, $to ]];
	} else {
		return [[ $from_module, $from, 'INTERNAL' ],
				[ $to_module, 'INTERNAL', $to ]];
	}
}

# TODO: check for
# 7bit-jis
# AdobeStandardEncoding
# AdobeSymbol
# AdobeZdingbat
# ascii-ctrl
# big5ext
# big5plus
# cccii
# cns11643-1
# cns11643-2
# cns11643-3
# cns11643-4
# cns11643-5
# cns11643-6
# cns11643-7
# cns11643-f
# dingbats
# gb12345-raw
# gb2312-raw
# gsm0338
# jis0201-raw
# jis0208-raw
# jis0212-raw
# koi8-f
# MIME-B
# MIME-Header
# MIME-Q
# posix-bc
# symbol
# unisys

1;

__END__

=head1 NAME

Locale::Recode::_Conversions - Internal Table of Known Conversions

=head1 SYNOPSIS

use Locale::Recode::_Conversions

This module is internal to libintl.  Do not use it directly!

=head1 AUTHOR

Copyright (C) 2002-2017 L<Guido Flohr|http://www.guido-flohr.net/>
(L<mailto:guido.flohr@cantanea.com>), all rights reserved.  See the source
code for details!code for details!

=head1 SEE ALSO

Locale::Recode(3), perl(1)

=cut
Local Variables:
mode: perl
perl-indent-level: 4
perl-continued-statement-offset: 4
perl-continued-brace-offset: 0
perl-brace-offset: -4
perl-brace-imaginary-offset: 0
perl-label-offset: -4
cperl-indent-level: 4
cperl-continued-statement-offset: 2
tab-width: 4
End:
=cut