@@ -1,3 +1,18 @@
+0.21 -- 2014-10-24
+
+ * Added "Common ARA" Arabic transliteration. Thanks to Ahmed Elsheshtawy
+ for suggesting this transliteration and his help implementing it!
+
+0.20 -- 2014-05-22
+
+ * lib/Lingua/Translit/Tables.pm: Added sub to handle Perl's
+ "Unicode Bug", see perlunicode for details (v0.10). This fixes the
+ errors reported by Perl v5.18.0.
+ * MANIFEST: Added missing "ALA-LC RUS" files to distribution.
+ * xml/: Removed "Common Classical MON" transliteration due to errors
+ and contributors lack of time to fix these.
+ * Code cleanups
+
0.19 -- 2011-04-15
* Added "ALA-LC RUS" transliteration (ALA-LC:1997). Thanks to Dmitry Smal
@@ -151,4 +166,4 @@
* ISO 843
* DIN 5008
-# vim: sw=4 sts=4 ai et ft=changelog
+# vim: set sw=4 sts=4 ts=4 ai et ft=changelog:
@@ -3,6 +3,7 @@ MANIFEST
MANIFEST.SKIP
lib/Lingua/Translit.pm
lib/Lingua/Translit/Tables.pm
+xml/ala-lc_rus.xml
xml/tables.dump
xml/Makefile
xml/translit.dtd
@@ -18,7 +19,7 @@ xml/common_ces.xml
xml/common_slv.xml
xml/common_slk.xml
xml/common_pol.xml
-xml/common_classical_mon.xml
+xml/common_ara.xml
xml/din_1460_bul.xml
xml/din_1460_rus.xml
xml/din_1460_ukr.xml
@@ -37,7 +38,6 @@ t/13_tr_Greeklish.t
t/14_tr_DIN_31634.t
t/15_tr_Common_RON.t
t/16_tr_Common_CES.t
-t/17_tr_Common_Classical_MON.t
t/18_tr_DIN_1460_BUL.t
t/19_tr_Streamlined_System_BUL.t
t/20_tr_Common_SLK.t
@@ -48,6 +48,8 @@ t/24_tr_DIN_1460_UKR.t
t/25_tr_GOST_RUS_OLD.t
t/26_tr_GOST_RUS.t
t/27_tr_GOST_UKR.t
+t/28_tr_ALA-LC_RUS.t
+t/29_tr_Common_ARA.t
tools/substitute_tables.pl
Changes
README
@@ -1,13 +1,21 @@
--- #YAML:1.0
-name: Lingua-Translit
-version: 0.19
-abstract: transliterates text between writing systems
-license: ~
-author:
+name: Lingua-Translit
+version: 0.21
+abstract: transliterates text between writing systems
+author:
- Alex Linke <alinke@lingua-systems.com>
-generated_by: ExtUtils::MakeMaker version 6.42
-distribution_type: module
-requires:
+license: unknown
+distribution_type: module
+configure_requires:
+ ExtUtils::MakeMaker: 0
+build_requires:
+ ExtUtils::MakeMaker: 0
+requires: {}
+no_index:
+ directory:
+ - t
+ - inc
+generated_by: ExtUtils::MakeMaker version 6.57_05
meta-spec:
- url: http://module-build.sourceforge.net/META-spec-v1.3.html
- version: 1.3
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: 1.4
@@ -7,15 +7,14 @@ use ExtUtils::MakeMaker;
WriteMakefile(
- NAME => "Lingua::Translit",
+ NAME => "Lingua::Translit",
VERSION_FROM => "lib/Lingua/Translit.pm",
ABSTRACT_FROM => "lib/Lingua/Translit.pm",
- AUTHOR => 'Alex Linke <alinke@lingua-systems.com>',
- EXE_FILES => [qw/translit/],
- PL_FILES => {
- 'tools/substitute_tables.pl' =>
- 'blib/lib/Lingua/Translit/Tables.pm'
- },
+ AUTHOR => 'Alex Linke <alinke@lingua-systems.com>',
+ EXE_FILES => [ qw/translit/ ],
+ PL_FILES => {
+ 'tools/substitute_tables.pl' => 'blib/lib/Lingua/Translit/Tables.pm'
+ },
);
@@ -24,9 +23,8 @@ package MY;
sub postamble
{
# include target to rebuild tables
- return "tables:\n\t\$(MAKE) -C xml tables\n\n" .
- "manual:\n\t\$(MAKE) -C xml/manual\n";
+ return "tables:\n\t\$(MAKE) -C xml tables\n";
}
-# vim: sts=4 enc=utf-8
+# vim: sts=4 sw=4 ts=4 ai et
@@ -35,7 +35,7 @@ if you have any suggestions and contributions.
COPYRIGHT AND LICENSE
Copyright (C) 2007-2008 Alex Linke and Rona Linke
-Copyright (C) 2009-2010 Lingua-Systems Software GmbH
+Copyright (C) 2009-2014 Lingua-Systems Software GmbH
This module is free software. It may be used, redistributed
and/or modified under the terms of either the GPL v2 or the
diff --git a/var/tmp/source/ALINKE/Lingua-Translit-0.19/Lingua-Translit-0.19/developer-manual__eng.pdf b/var/tmp/source/ALINKE/Lingua-Translit-0.21/Lingua-Translit-0.21/developer-manual__eng.pdf
index 9882b86e..6598730d 100644
Binary files a/var/tmp/source/ALINKE/Lingua-Translit-0.19/Lingua-Translit-0.19/developer-manual__eng.pdf and b/var/tmp/source/ALINKE/Lingua-Translit-0.21/Lingua-Translit-0.21/developer-manual__eng.pdf differ
@@ -1,26 +1,22 @@
package Lingua::Translit::Tables;
-
#
# Copyright (C) 2007-2008 ...
# Alex Linke <alinke@lingua-systems.com>
# Rona Linke <rlinke@lingua-systems.com>
-# Copyright (C) 2009-2011 Lingua-Systems Software GmbH
+# Copyright (C) 2009-2014 Lingua-Systems Software GmbH
#
-
use strict;
use warnings;
+use utf8;
require 5.008;
-
-our $VERSION = '0.09';
-
+our $VERSION = '0.10';
use Carp;
-
=pod
=head1 NAME
@@ -75,42 +71,58 @@ Import translit_list_supported(). (Convenience tag)
=cut
-
require Exporter;
-our @ISA = qw/Exporter/;
-our @EXPORT = qw//; # Export nothing by default
-our @EXPORT_OK = qw/translit_supported translit_reverse_supported
- translit_list_supported/;
+our @ISA = qw/Exporter/;
+our @EXPORT = qw//; # Export nothing by default
+our @EXPORT_OK = qw/translit_supported translit_reverse_supported
+ translit_list_supported/;
our %EXPORT_TAGS = (
- checks => [qw/translit_supported translit_reverse_supported/],
- list => [qw/translit_list_supported/],
- all => [@EXPORT_OK]
+ checks => [qw/translit_supported translit_reverse_supported/],
+ list => [qw/translit_list_supported/],
+ all => [@EXPORT_OK]
);
-
-# For convenience, the tables are initialized at the bottom of this file
+# For convenience, the tables are initialized at the bottom of this file.
our %tables;
-
-# used internally to retrieve a reference to a single transliteration table
-sub _get_table_reference
-{
+# Used internally to retrieve a reference to a single transliteration table.
+sub _get_table_reference {
my $name = shift();
return unless $name;
$name = _get_table_id($name);
- foreach my $table (keys %tables)
- {
- return $tables{$table} if ($table =~ /^$name$/i);
+ foreach my $table ( keys %tables ) {
+ return _handle_perl_unicode_bug( $tables{$table} )
+ if $table =~ /^$name$/i;
}
return;
}
+# Handle the "Unicode Bug" affecting code points in the Latin-1 block.
+#
+# Have a look at perlunicode (section "The 'Unicode Bug'") for details.
+sub _handle_perl_unicode_bug {
+ my $tbl = shift();
+
+ foreach my $rule ( @{ $tbl->{rules} } ) {
+ utf8::upgrade( $rule->{from} );
+ utf8::upgrade( $rule->{to} );
+
+ if ( defined( $rule->{context} ) ) {
+ utf8::upgrade( $rule->{context}->{before} )
+ if defined $rule->{context}->{before};
+ utf8::upgrade( $rule->{context}->{after} )
+ if defined $rule->{context}->{after};
+ }
+ }
+
+ return $tbl;
+}
=head1 ROUTINES
@@ -120,12 +132,10 @@ Returns true (1), iff I<translit_name> is supported. False (0) otherwise.
=cut
-sub translit_supported
-{
- return (_get_table_reference(_get_table_id($_[0])) ? 1 : 0);
+sub translit_supported {
+ return ( _get_table_reference( _get_table_id( $_[0] ) ) ? 1 : 0 );
}
-
=head2 translit_reverse_supported(I<translit_name>)
Returns true (1), iff I<translit_name> is supported and allows reverse
@@ -133,16 +143,14 @@ transliteration. False (0) otherwise.
=cut
-sub translit_reverse_supported
-{
- my $table = _get_table_reference(_get_table_id($_[0]));
+sub translit_reverse_supported {
+ my $table = _get_table_reference( _get_table_id( $_[0] ) );
croak("Failed to retrieve table for $_[0].") unless ($table);
- return (($table->{reverse} =~ /^true$/) ? 1 : 0);
+ return ( ( $table->{reverse} =~ /^true$/ ) ? 1 : 0 );
}
-
=head2 B<translit_list_supported()>
Prints a list of all supported transliterations to STDOUT, providing the
@@ -156,18 +164,15 @@ The same information is provided in this document as well:
=cut
-sub translit_list_supported
-{
- foreach my $table (sort keys %tables)
- {
+sub translit_list_supported {
+ foreach my $table ( sort keys %tables ) {
my $t = $tables{$table};
print "$t->{name}, ",
- ($t->{reverse} eq "false" ? "not " : ""),
- "reversible, $t->{desc}\n";
+ ( $t->{reverse} eq "false" ? "not " : "" ),
+ "reversible, $t->{desc}\n";
}
}
-
=head1 SUPPORTED TRANSLITERATIONS
=over 4
@@ -218,9 +223,9 @@ I<Common SLK>, not reversible, Slovak without diacritics
I<Common SLV>, not reversible, Slovenian without diacritics
-=item Mongolian
+=item Arabic
-I<Common Classical MON>, reversible, Classical Mongolian to Latin
+I<Common ARA>, not reversible, Common Romanization of Arabic
=back
@@ -230,7 +235,7 @@ In case you want to add your own transliteration tables to
L<Lingua::Translit>, have a look at the developer manual included in the
distribution.
An online version is available at
-L<http://www.lingua-systems.com/downloads/Lingua-Translit/>.
+L<http://www.lingua-systems.com/translit/downloads/>.
A template of a transliteration table is provided as well
(F<xml/template.xml>) so you can easily start developing.
@@ -246,7 +251,7 @@ Please report bugs to perl@lingua-systems.com.
L<Lingua::Translit>
-L<http://www.lingua-systems.com/transliteration/Lingua-Translit-Perl-module/>
+L<http://www.lingua-systems.com/translit/>
=head1 CREDITS
@@ -254,12 +259,12 @@ L<http://www.lingua-systems.com/transliteration/Lingua-Translit-Perl-module/>
Thanks to Dr. Daniel Eiwen, Romanisches Seminar, Universitaet Koeln for his
help on Romanian transliteration.
-Thanks to Bayanzul Lodoysamba <baynaa@users.sourceforge.net> for contributing
-the "Common Classical Mongolian" transliteration table.
-
Thanks to Dmitry Smal and Rusar Publishing for contributing the "ALA-LC RUS"
transliteration table.
+Thanks to Ahmed Elsheshtawy for his help implementing the "Common ARA" Arabic
+transliteration.
+
=head1 AUTHORS
Alex Linke <alinke@lingua-systems.com>
@@ -270,7 +275,7 @@ Rona Linke <rlinke@lingua-systems.com>
Copyright (C) 2007-2008 Alex Linke and Rona Linke
-Copyright (C) 2009-2011 Lingua-Systems Software GmbH
+Copyright (C) 2009-2014 Lingua-Systems Software GmbH
This module is free software. It may be used, redistributed
and/or modified under the terms of either the GPL v2 or the
@@ -278,11 +283,9 @@ Artistic license.
=cut
-
# Get a table's identifier (based on the table's name)
# i.e "Common DEU" -> "common_deu"
-sub _get_table_id
-{
+sub _get_table_id {
my $name = shift();
return "" unless $name;
@@ -292,13 +295,10 @@ sub _get_table_id
return lc($name);
}
-
# For convenience, the next line is automatically substituted with the set
# of transliteration tables at build time.
-%tables; # PLACEHOLDER
-
+%tables; # PLACEHOLDER
1;
-
-# vim: sts=4 sw=4 ai et
+# vim: sts=4 sw=4 ts=4 ai et
@@ -1,27 +1,23 @@
package Lingua::Translit;
-
#
# Copyright (C) 2007-2008 ...
# Alex Linke <alinke@lingua-systems.com>
# Rona Linke <rlinke@lingua-systems.com>
-# Copyright (C) 2009-2011 Lingua-Systems Software GmbH
+# Copyright (C) 2009-2014 Lingua-Systems Software GmbH
#
-
use strict;
use warnings;
require 5.008;
-use Carp qw/croak/;
+use Carp qw/croak/;
use Encode qw/encode decode/;
use Lingua::Translit::Tables;
-
-our $VERSION = '0.19';
-
+our $VERSION = '0.21';
=pod
@@ -32,11 +28,11 @@ Lingua::Translit - transliterates text between writing systems
=head1 SYNOPSIS
use Lingua::Translit;
-
+
my $tr = new Lingua::Translit("ISO 843");
-
+
my $text_tr = $tr->translit("character oriented string");
-
+
if ($tr->can_reverse()) {
$text_tr = $tr->translit_reverse("character oriented string");
}
@@ -82,8 +78,7 @@ Initializes an object with the specific transliteration table, e.g. "ISO 9".
=cut
-sub new
-{
+sub new {
my $class = shift();
my $name = shift();
@@ -93,8 +88,7 @@ sub new
croak("No transliteration name given.") unless $name;
# Stay compatible with programs that use Lingua::Translit < 0.05
- if ($name =~ /^DIN 5008$/i)
- {
+ if ( $name =~ /^DIN 5008$/i ) {
$name = "Common DEU";
}
@@ -110,20 +104,19 @@ sub new
croak("$name table: missing 'rules'") unless defined $table->{rules};
# Copy over the table's data
- $self->{name} = $table->{name};
- $self->{desc} = $table->{desc};
- $self->{rules} = $table->{rules};
+ $self->{name} = $table->{name};
+ $self->{desc} = $table->{desc};
+ $self->{rules} = $table->{rules};
# Set a truth value of the transliteration's reversibility according to
# the natural language string in the original transliteration table
- $self->{reverse} = ($table->{reverse} =~ /^true$/i) ? 1 : 0;
+ $self->{reverse} = ( $table->{reverse} =~ /^true$/i ) ? 1 : 0;
undef($table);
return bless $self, $class;
}
-
=head2 translit(I<"character oriented string">)
Transliterates the given text according to the object's transliteration
@@ -132,71 +125,55 @@ Returns the transliterated text.
=cut
-sub translit
-{
+sub translit {
my $self = shift();
my $text = shift();
- my $utf8_flag_on = Encode::is_utf8($text);
-
- unless ($utf8_flag_on)
- {
- $text = decode("UTF-8", $text);
- }
-
# Return if no input was given
return unless $text;
- # Copy over the input string. It will be modified directly.
- my $tr_text = $text;
+ my $utf8_flag_on = Encode::is_utf8($text);
+
+ unless ($utf8_flag_on) {
+ $text = decode( "UTF-8", $text );
+ }
- foreach my $rule (@{$self->{rules}})
- {
- if (defined $rule->{context})
- {
+ foreach my $rule ( @{ $self->{rules} } ) {
+ if ( defined $rule->{context} ) {
my $c = $rule->{context};
# single context rules
- if (defined $c->{before} && !defined $c->{after})
- {
- $tr_text =~ s/\Q$rule->{from}\E(?=$c->{before})/$rule->{to}/g;
+ if ( defined $c->{before} && !defined $c->{after} ) {
+ $text =~ s/$rule->{from}(?=$c->{before})/$rule->{to}/g;
}
- elsif (defined $c->{after} && !defined $c->{before})
- {
- $tr_text =~ s/(?<=$c->{after})\Q$rule->{from}\E/$rule->{to}/g;
+ elsif ( defined $c->{after} && !defined $c->{before} ) {
+ $text =~ s/(?<=$c->{after})$rule->{from}/$rule->{to}/g;
}
# double context rules: logical "inbetween"
- elsif (defined $c->{before} && defined $c->{after})
- {
- $tr_text =~
- s/
- (?<=$c->{after})\Q$rule->{from}\E(?=$c->{before})
+ elsif ( defined $c->{before} && defined $c->{after} ) {
+ $text =~ s/
+ (?<=$c->{after})$rule->{from}(?=$c->{before})
/$rule->{to}/gx;
}
- else
- {
+ else {
croak("incomplete rule context");
}
}
- else
- {
- $tr_text =~ s/\Q$rule->{from}\E/$rule->{to}/g;
+ else {
+ $text =~ s/$rule->{from}/$rule->{to}/g;
}
}
- unless ($utf8_flag_on)
- {
- return encode("UTF-8", $tr_text);
+ unless ($utf8_flag_on) {
+ return encode( "UTF-8", $text );
}
- else
- {
- return $tr_text;
+ else {
+ return $text;
}
}
-
=head2 translit_reverse(I<"character oriented string">)
Transliterates the given text according to the object's transliteration
@@ -208,74 +185,58 @@ Returns the transliterated text.
=cut
-sub translit_reverse
-{
+sub translit_reverse {
my $self = shift();
my $text = shift();
- my $utf8_flag_on = Encode::is_utf8($text);
-
- unless ($utf8_flag_on)
- {
- $text = decode("UTF-8", $text);
- }
-
# Return if no input was given
return unless $text;
# Is this transliteration reversible?
croak("$self->{name} cannot be reversed") unless $self->{reverse};
- # Copy over the input string. It will be modified directly.
- my $tr_text = $text;
+ my $utf8_flag_on = Encode::is_utf8($text);
+
+ unless ($utf8_flag_on) {
+ $text = decode( "UTF-8", $text );
+ }
- foreach my $rule (@{$self->{rules}})
- {
- if (defined $rule->{context})
- {
+ foreach my $rule ( @{ $self->{rules} } ) {
+ if ( defined $rule->{context} ) {
my $c = $rule->{context};
# single context rules
- if (defined $c->{before} && !defined $c->{after})
- {
- $tr_text =~ s/\Q$rule->{to}\E(?=$c->{before})/$rule->{from}/g;
+ if ( defined $c->{before} && !defined $c->{after} ) {
+ $text =~ s/$rule->{to}(?=$c->{before})/$rule->{from}/g;
}
- elsif (defined $c->{after} && !defined $c->{before})
- {
- $tr_text =~ s/(?<=$c->{after})\Q$rule->{to}\E/$rule->{from}/g;
+ elsif ( defined $c->{after} && !defined $c->{before} ) {
+ $text =~ s/(?<=$c->{after})$rule->{to}/$rule->{from}/g;
}
# double context rules: logical "inbetween"
- elsif (defined $c->{before} && defined $c->{after})
- {
- $tr_text =~
- s/
- (?<=$c->{after})\Q$rule->{to}\E(?=$c->{before})
+ elsif ( defined $c->{before} && defined $c->{after} ) {
+ $text =~ s/
+ (?<=$c->{after})$rule->{to}(?=$c->{before})
/$rule->{from}/gx;
}
- else
- {
+ else {
croak("incomplete rule context");
}
}
- else
- {
- $tr_text =~ s/\Q$rule->{to}\E/$rule->{from}/g;
+ else {
+ $text =~ s/$rule->{to}/$rule->{from}/g;
}
}
- unless ($utf8_flag_on)
- {
- return encode("UTF-8", $tr_text);
+ unless ($utf8_flag_on) {
+ return encode( "UTF-8", $text );
}
- else
- {
- return $tr_text;
+ else {
+ return $text;
}
}
-
=head2 can_reverse()
Returns true (1), iff reverse transliteration is possible.
@@ -283,24 +244,20 @@ False (0) otherwise.
=cut
-sub can_reverse
-{
+sub can_reverse {
return $_[0]->{reverse};
}
-
=head2 name()
Returns the name of the chosen transliteration table, e.g. "ISO 9".
=cut
-sub name
-{
+sub name {
return $_[0]->{name};
}
-
=head2 desc()
Returns a description for the transliteration,
@@ -308,12 +265,10 @@ e.g. "ISO 9:1995, Cyrillic to Latin".
=cut
-sub desc
-{
+sub desc {
return $_[0]->{desc};
}
-
=head1 SUPPORTED TRANSLITERATIONS
=over 4
@@ -364,9 +319,9 @@ I<Common SLK>, not reversible, Slovak without diacritics
I<Common SLV>, not reversible, Slovenian without diacritics
-=item Mongolian
+=item Arabic
-I<Common Classical MON>, reversible, Classical Mongolian to Latin
+I<Common ARA>, not reversible, Common Romanization of Arabic
=back
@@ -376,7 +331,7 @@ In case you want to add your own transliteration tables to
L<Lingua::Translit>, have a look at the developer manual included in the
distribution.
An online version is available at
-L<http://www.lingua-systems.com/downloads/Lingua-Translit/>.
+L<http://www.lingua-systems.com/translit/downloads/>.
A template of a transliteration table is provided as well
(F<xml/template.xml>) so you can easily start developing.
@@ -405,22 +360,19 @@ L<Lingua::Translit::Tables>, L<Encode>, L<perlunicode>
L<translit(1)>
-L<http://www.lingua-systems.com/transliteration/Lingua-Translit-Perl-module/>
-
-L<http://www.lingua-systems.com/transliteration/Lingua-Translit-Perl-module/online-transliteration.html>
-provides an online frontend for L<Lingua::Translit>.
+L<http://www.lingua-systems.com/translit/>
=head1 CREDITS
Thanks to Dr. Daniel Eiwen, Romanisches Seminar, Universitaet Koeln for his
help on Romanian transliteration.
-Thanks to Bayanzul Lodoysamba <baynaa@users.sourceforge.net> for contributing
-the "Common Classical Mongolian" transliteration table.
-
Thanks to Dmitry Smal and Rusar Publishing for contributing the "ALA-LC RUS"
transliteration table.
+Thanks to Ahmed Elsheshtawy for his help implementing the "Common ARA" Arabic
+transliteration.
+
=head1 AUTHORS
Alex Linke <alinke@lingua-systems.com>
@@ -431,7 +383,7 @@ Rona Linke <rlinke@lingua-systems.com>
Copyright (C) 2007-2008 Alex Linke and Rona Linke
-Copyright (C) 2009-2011 Lingua-Systems Software GmbH
+Copyright (C) 2009-2014 Lingua-Systems Software GmbH
This module is free software. It may be used, redistributed
and/or modified under the terms of either the GPL v2 or the
@@ -439,8 +391,6 @@ Artistic license.
=cut
-
1;
-
-# vim: sts=4 sw=4 ai et
+# vim: sts=4 sw=4 ts=4 ai et
@@ -1,12 +1,12 @@
use strict;
-use Test::More tests => (12 + (13 * 4));
+use Test::More tests => (12 + (12 * 4));
my $truth;
my @check_support = (
"ISO 843", "Common DEU", "ISO 9", "Greeklish", "DIN 31634", "Common RON",
- "Common CES", "Common Classical MON", "DIN 1460 BUL",
- "Streamlined System BUL", "Common SLV", "Common SLK", "Common POL"
+ "Common CES", "DIN 1460 BUL", "Streamlined System BUL", "Common SLV",
+ "Common SLK", "Common POL"
);
@@ -1,11 +1,11 @@
use strict;
-use Test::More tests => (8 + (4*13));
+use Test::More tests => (8 + (12 * 4));
my $truth;
my @check_support = (
"ISO 843", "Common DEU", "ISO 9", "Greeklish", "DIN 31634", "Common RON",
- "Common CES", "Common Classical MON", "DIN 1460 BUL",
- "Streamlined System BUL", "Common SLV", "Common SLK", "Common POL"
+ "Common CES", "DIN 1460 BUL", "Streamlined System BUL", "Common SLV",
+ "Common SLK", "Common POL"
);
my $num_str = "1234567890";
@@ -1,96 +0,0 @@
-use strict;
-use Test::More tests => 13;
-
-my $name = "Common Classical MON";
-
-# "My Native Land" by D.Natsagdorj from http://www.linguamongolia.co.uk/.
-
-my $input = "ᠬᠡᠨᠲᠡᠢ ᠂ ᠬᠠᠩᠭᠠᠢ ᠂ ᠰᠣᠶᠣᠨᠤ ᠥᠨᠳᠦᠷ ᠰᠠᠶᠢᠬᠠᠨ ᠨᠢᠷᠤᠭᠤᠨᠤᠳ " .
- "ᠬᠣᠶᠢᠲᠤ ᠵᠦᠭᠦᠨ ᠴᠢᠮᠡᠭ ᠪᠣᠯᠤᠭᠰᠠᠨ ᠣᠢ ᠬᠥᠪᠴᠢᠶᠢᠨ ᠠᠭᠤᠯᠠᠨᠤᠳ " .
- "ᠮᠡᠨᠡᠨ ᠂ ᠱᠠᠷᠭᠠ ᠂ ᠨᠣᠮᠢᠨᠤ ᠥᠷᠭᠡᠨ ᠶᠡᠬᠡ ᠭᠣᠪᠢᠤᠳ " .
- "ᠡᠮᠦᠨᠡ ᠵᠦᠭᠦᠨ ᠮᠠᠩᠯᠠᠢ ᠪᠣᠯᠤᠭᠰᠠᠨ ᠡᠯᠡᠰᠦᠨ ᠮᠠᠩᠬᠠᠨ ᠳᠠᠯᠠᠢᠤᠳ " .
- "ᠡᠨᠡ ᠪᠣᠯ ᠮᠢᠨᠤ ᠲᠥᠷᠦᠭᠰᠡᠨ ᠨᠤᠲᠤᠭ ᠮᠣᠩᠭᠣᠯᠤᠨ ᠰᠠᠶᠢᠬᠠᠨ ᠣᠷᠣᠨ ᠃";
-my $output_ok = "kentei , qangɣai , soyon-u öndür sayiqan niruɣun-ud " .
- "qoyitu jüg-ün čimeg boluɣsan oi köbči-yin aɣulan-ud " .
- "menen , šarɣ-a , nomin-u örgen yeke ɣobi-ud " .
- "emün-e jüg-ün manglai boluɣsan elesün mangqan dalai-ud " .
- "ene bol minu törügsen nutuɣ mongɣol-un sayiqan oron .";
-
-# A phrase from "Secret History of Mongols" by transcription of B.Sumyaabaatar.
-
-my $txt_1 = "ᠬᠠᠷᠴᠤᠶᠢᠨ ᠬᠥᠪᠡᠭᠦᠨ ᠪᠣᠷᠵᠢᠭᠢᠳᠠᠢᠮᠡᠷᠭᠡᠨ᠂ ᠮᠣᠩᠭᠣᠯᠵᠢᠨᠭᠣᠣᠠ ᠭᠡᠷᠭᠡᠢᠲᠦ ᠠᠵᠤᠭᠤ᠃ ᠪᠣᠷᠵᠢᠭᠢᠳᠠᠢᠮᠡᠷᠭᠡᠨᠤ ᠬᠥᠪᠡᠭᠦᠨ ᠲᠣᠷᠣᠭᠣᠯᠵᠢᠨᠪᠠᠶᠠᠨ᠂ ᠪᠣᠷᠣᠭᠴᠢᠨᠭᠣᠣᠠ ᠭᠡᠷᠭᠡᠢᠲᠦ᠂ ᠪᠣᠷᠣᠯᠳᠠᠢ ᠰᠤᠶᠠᠯᠪᠢ ᠵᠠᠯᠠᠭᠤᠲᠤ᠂ ᠳᠠᠶᠢᠷ ᠪᠣᠷᠣ ᠬᠣᠶᠠᠷ ᠬᠦᠯᠦᠭᠦᠳ ᠠᠭᠲᠠᠰᠲᠤ ᠪᠦᠯᠡᠭᠡ᠃ ᠲᠣᠷᠣᠭᠣᠯᠵᠢᠨᠤ ᠬᠥᠪᠡᠭᠦᠨ ᠳᠤᠸᠠᠰᠣᠬᠣᠷ ᠳᠣᠪᠤᠨᠮᠡᠷᠭᠡᠨ ᠬᠣᠶᠠᠷ ᠪᠦᠯᠡᠭᠡ᠃";
-
-my $txt_1_ok = "qarču-yin köbegün borjiɣidai-mergen, mongɣoljin-ɣoo-a gergeitü ajuɣu. borjiɣidai-mergen-u köbegün toroɣoljin-bayan, boroɣčin-ɣoo-a gergeitü, boroldai suyalbi jalaɣutu, dayir boro qoyar külügüd aɣtastu bülege. toroɣoljin-u köbegün duva-soqor dobun-mergen qoyar bülege.";
-
-my $txt_2 = "ᠤᠭᠤᠷᠬᠠᠢ ᠪᠠᠶᠠᠯᠢᠭᠤᠨ ᠣᠬᠢ ᠣᠯᠠᠨ ᠠᠭᠤᠯᠠ ᠳᠠᠪᠠᠭᠠᠨᠤᠳ";
-my $txt_2_ok = "uɣurqai bayaliɣ-un oki olan aɣula dabaɣan-ud";
-
-my $txt_3 = "ᠬᠦᠮᠦᠨ ᠪᠦᠭᠦᠳᠡ ᠴᠢᠮᠠᠶᠢᠭᠢ ᠬᠦᠯᠢᠶᠡᠵᠦ᠂ ᠪᠣᠳᠠᠰ ᠪᠦᠭᠦᠳᠡ ᠴᠢᠮᠠᠶᠢᠭᠢ ᠮᠥᠷᠦᠭᠡᠳᠡᠮᠦᠢ";
-my $txt_3_ok = "kümün bügüde čimayigi küliyejü, bodas bügüde čimayigi mörügedemüi";
-
-my $txt_4 = "ᠪᠠᠢᠭᠰᠠᠭᠠᠷ᠂ ᠲᠣᠭᠲᠠᠨᠢᠭᠰᠠᠨ᠂ ᠵᠢᠭᠰᠠᠭᠠᠯᠤᠨ";
-my $txt_4_ok = "baiɣsaɣar, toɣtaniɣsan, jiɣsaɣal-un";
-
-my $txt_5 = "᠐᠑᠒᠓᠔᠕᠖᠗᠘᠙ᠧᠫᠱᠸᠹᠺᠻᠼᠽᠿᡀᡁᡂᠾ᠀᠁᠄᠅";
-my $txt_5_ok = "0123456789ēpšvfḳǩczžlhzhchh§…:¶";
-
-
-
-
-use Lingua::Translit;
-
-my $tr = new Lingua::Translit($name);
-
-
-my $output = $tr->translit($input);
-
-# 1
-is($tr->can_reverse(), 1, "$name: is reversible");
-
-# 2
-is($output, $output_ok, "$name: transliteration");
-
-# 3
-$output = $tr->translit_reverse($output);
-is($output,$input, "$name: transliteration (reverse)");
-
-# 4
-my $o = $tr->translit($txt_1);
-is($o, $txt_1_ok, "$name: Short text #1");
-
-# 5
-$o = $tr->translit_reverse($o);
-is($o, $txt_1, "$name: Short text #1 (reverse)");
-
-# 6
-$o = $tr->translit($txt_2);
-is($o, $txt_2_ok, "$name: Short text #2");
-
-# 7
-$o = $tr->translit_reverse($o);
-is($o, $txt_2, "$name: Short text #2 (reverse)");
-
-# 8
-$o = $tr->translit($txt_3);
-is($o, $txt_3_ok, "$name: Short text #3");
-
-# 9
-$o = $tr->translit_reverse($o);
-is($o, $txt_3, "$name: Short text #3 (reverse)");
-
-# 10
-$o = $tr->translit($txt_4);
-is($o, $txt_4_ok, "$name: Short text #4");
-
-# 11
-$o = $tr->translit_reverse($o);
-is($o, $txt_4, "$name: Short text #4 (reverse)");
-
-# 12
-$o = $tr->translit($txt_5);
-is($o, $txt_5_ok, "$name: Short text #5");
-
-# 13
-$o = $tr->translit_reverse($o);
-is($o, $txt_5, "$name: Short text #5 (reverse)");
-
@@ -0,0 +1,39 @@
+use strict;
+use Test::More tests => 4;
+
+my $name = "ALA-LC RUS";
+my $reversible = 0;
+
+my $upper = "AБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ";
+my $upper_ok = "ABVGDEËZhZIĬKLMNOPRSTUFKhTSChShShch″Y′ĖIUIA";
+
+my $lower = "aбвгдеёжзийклмнопрстуфхцчшщъыьэюя";
+my $lower_ok = "abvgdeëzhziĭklmnoprstufkhtschshshch″y′ėiuia";
+
+my $context = "труъ ТРУЪ";
+my $context_ok = "tru TRU";
+
+
+use Lingua::Translit;
+
+my $tr = new Lingua::Translit($name);
+
+my $output;
+
+
+# 1
+is($tr->can_reverse(), $reversible, "$name: reversibility");
+
+# 2
+$output = $tr->translit($upper);
+is($output, $upper_ok, "$name: upper transliteration");
+
+# 3
+$output = $tr->translit($lower);
+is($output, $lower_ok, "$name: lower transliteration");
+
+# 4
+$output = $tr->translit($context);
+is($output, $context_ok, "$name: transliteration (context-sensitive)");
+
+# vim: sts=4 sw=4 ai et
@@ -0,0 +1,35 @@
+use strict;
+use Test::More tests => 3;
+
+my $name = "Common ARA";
+my $reversible = 0;
+
+my $input = "اخبار اليوم"; # "News Today"
+my $output_ok = "akhbar alywm";
+
+my $udohr = "يولد جميع الناس أحراراً متساوين في الكرامة والحقوق، " .
+ "وقد وهبوا عقلاً وضميراً وعليهم أن يعامل بعضهم بعضاً " .
+ "بروح الإخاء.";
+my $udohr_ok = "ywld jmy'e alnas ahrara mtsawyn fy alkramh walhqwq, " .
+ "wqd whbwa 'eqla wdmyra w'elyhm an y'eaml b'edhm " .
+ "b'eda brwh alekha'.";
+
+use Lingua::Translit;
+
+my $tr = Lingua::Translit->new( $name );
+
+
+my $output = $tr->translit( $input );
+
+# 1
+is( $tr->can_reverse(), $reversible, "$name: reversibility" );
+
+# 2
+is( $output, $output_ok, "$name: transliteration (short)" );
+
+$output = $tr->translit( $udohr );
+
+# 3
+is( $output, $udohr_ok, "$name: transliteration (UDOHR)" );
+
+# vim: set sts=4 sw=4 ts=4 ai et ft=perl:
@@ -2,14 +2,14 @@
#
# Copyright (C) 2007-2008 Alex Linke <alinke@lingua-systems.com>
-# Copyright (C) 2009-2010 Lingua-Systems Software GmbH
+# Copyright (C) 2009-2014 Lingua-Systems Software GmbH
#
use strict;
use IO::File;
my $tbl_file = 'xml/tables.dump';
-my $infile = $ARGV[0] || die "usage: $0 file";
+my $infile = $ARGV[0] || die "usage: $0 file";
my $fh = new IO::File();
@@ -25,12 +25,10 @@ $fh->open($tbl_file) or die "$tbl_file: $!\n";
my $tbls = <$fh>;
$fh->close();
-if ($in_content =~ s/\n\%tables;\s+# PLACEHOLDER\s*\n/\n$tbls\n/)
-{
+if ( $in_content =~ s/\n\%tables;\s+# PLACEHOLDER\s*\n/\n$tbls\n/ ) {
print "$infile: substituted tables: " . length($tbls) . " bytes.\n";
}
-else
-{
+else {
print "$infile: no substitution.\n";
exit 1;
}
@@ -42,4 +40,4 @@ $fh->open("> $infile") or die "$infile: $!\n";
print $fh $in_content;
$fh->close();
-# vim: sts=4 sw=4 enc=utf-8 ai et
+# vim: sts=4 sw=4 ts=4 ai et
@@ -1,14 +1,12 @@
#!/usr/bin/perl -w
-
#
# Copyright (C) 2007-2008 ...
# Alex Linke <alinke@lingua-systems.com>
# Rona Linke <rlinke@lingua-systems.com>
-# Copyright (C) 2009-2010 Lingua-Systems Software GmbH
+# Copyright (C) 2009-2014 Lingua-Systems Software GmbH
#
-
use strict;
use Getopt::Long;
@@ -17,10 +15,8 @@ require 5.008;
use Lingua::Translit;
use Lingua::Translit::Tables qw/:all/;
-
my $VERSION = '0.4';
-
=pod
=head1 NAME
@@ -102,92 +98,82 @@ my %opt = (
outfile => "",
reverse => 0,
list => 0,
- verbose => 0, # off
+ verbose => 0, # off
help => 0
);
-show_help(1) unless GetOptions(
- "trans|t=s" => \$opt{trans},
- "infile|i=s" => \$opt{infile},
- "outfile|o=s" => \$opt{outfile},
- "reverse|r" => \$opt{reverse},
- "list|l" => \$opt{list},
- "verbose|v" => \$opt{verbose},
- "help|h" => \$opt{help}
-);
-show_help(0) if $opt{help};
-show_list() if $opt{list};
-show_help(1) unless $opt{trans};
+show_help(1)
+ unless GetOptions(
+ "trans|t=s" => \$opt{trans},
+ "infile|i=s" => \$opt{infile},
+ "outfile|o=s" => \$opt{outfile},
+ "reverse|r" => \$opt{reverse},
+ "list|l" => \$opt{list},
+ "verbose|v" => \$opt{verbose},
+ "help|h" => \$opt{help}
+ );
+show_help(0) if $opt{help};
+show_list() if $opt{list};
+show_help(1) unless $opt{trans};
# Assure the requested transliteration is supported...
-die "$opt{trans} is not supported.\n" unless translit_supported($opt{trans});
+die "$opt{trans} is not supported.\n" unless translit_supported( $opt{trans} );
# ...and reverse transliteration is supported, too - if requested
die "$opt{trans} cannot be reversed.\n"
- if ($opt{reverse} && ! translit_reverse_supported($opt{trans}));
-
+ if ( $opt{reverse} && !translit_reverse_supported( $opt{trans} ) );
# If no input file was specified, use STDIN as a fallback. This way, translit
# may also be used in pipes!
my $in;
-if ($opt{infile})
-{
+if ( $opt{infile} ) {
print STDERR "Reading input from $opt{infile}...\n" if $opt{verbose};
open IN, "$opt{infile}" or die "$opt{infile}: $!\n";
$in = *IN;
}
-else
-{
+else {
print STDERR "Reading input from STDIN...\n" if $opt{verbose};
$in = *STDIN;
}
-
# If no output file was specified, use STDOUT as a fallback.
my $out;
-if ($opt{outfile})
-{
+if ( $opt{outfile} ) {
print STDERR "Writing output to $opt{outfile}...\n" if $opt{verbose};
open OUT, "> $opt{outfile}" or die "$opt{outfile}: $!\n";
$out = *OUT;
}
-else
-{
+else {
print STDERR "Writing output to STDOUT...\n" if $opt{verbose};
$out = *STDOUT;
}
-
# Slurp in all the input and close filehandle
local $/;
my $text = <$in>;
close($in);
-
# Transliterate
-my $tr = new Lingua::Translit($opt{trans});
+my $tr = new Lingua::Translit( $opt{trans} );
my $text_tr;
-unless ($opt{reverse})
-{
+unless ( $opt{reverse} ) {
print STDERR "Transliterating according to ", $tr->name(), "...\n"
- if $opt{verbose};
+ if $opt{verbose};
$text_tr = $tr->translit($text);
}
-else
-{
- print STDERR "Transliterating according to ", $tr->name(),
- " (reverse)...\n"
- if $opt{verbose};
+else {
+ print STDERR "Transliterating according to ", $tr->name(), " (reverse)...\n"
+ if $opt{verbose};
$text_tr = $tr->translit_reverse($text);
}
@@ -196,38 +182,33 @@ else
print $out $text_tr;
close($out);
-
-sub show_help
-{
+sub show_help {
my $retval = shift();
print "translit v$VERSION -- ",
- "(c) 2009-2010 Lingua-Systems Software GmbH\n\n",
- "usage: $0 -i FILE -o FILE -t NAME -r -l -v -h\n\n",
- " --infile -i FILE read input from FILE\n",
- " --outfile -o FILE write output to FILE\n",
- " --trans -t NAME use transliteration NAME\n",
- " --reverse -r transliterate in reverse direction\n",
- " --list -l list all supported transliterations\n\n",
- " --verbose -v print verbose status messages\n",
- " --help -h show this help\n\n",
- "Read translit(1) for details.\n";
+ "(c) 2009-2014 Lingua-Systems Software GmbH\n\n",
+ "usage: $0 -i FILE -o FILE -t NAME -r -l -v -h\n\n",
+ " --infile -i FILE read input from FILE\n",
+ " --outfile -o FILE write output to FILE\n",
+ " --trans -t NAME use transliteration NAME\n",
+ " --reverse -r transliterate in reverse direction\n",
+ " --list -l list all supported transliterations\n\n",
+ " --verbose -v print verbose status messages\n",
+ " --help -h show this help\n\n",
+ "Read translit(1) for details.\n";
exit($retval);
}
-
-sub show_list
-{
- print "Transliterations supported by Lingua::Translit v" .
- $Lingua::Translit::VERSION . ":\n\n";
+sub show_list {
+ print "Transliterations supported by Lingua::Translit v"
+ . $Lingua::Translit::VERSION . ":\n\n";
translit_list_supported();
exit(0);
}
-
=head1 RESTRICTIONS
The input has to be UTF-8 encoded.
@@ -242,9 +223,6 @@ Please report bugs to perl@lingua-systems.com.
L<Lingua::Translit>, L<Lingua::Translit::Tables>
-L<http://www.lingua-systems.com/transliteration/Lingua-Translit-Perl-module/online-transliteration.html>
-provides an online frontend for L<Lingua::Translit>.
-
=head1 AUTHORS
Alex Linke <alinke@lingua-systems.com>
@@ -255,7 +233,7 @@ Rona Linke <rlinke@lingua-systems.com>
Copyright (C) 2007-2008 Alex Linke and Rona Linke
-Copyright (C) 2009-2010 Lingua-Systems Software GmbH
+Copyright (C) 2009-2014 Lingua-Systems Software GmbH
This program is free software. It may be used, redistributed
and/or modified under the terms of either the GPL v2 or the
@@ -263,5 +241,4 @@ Artistic license.
=cut
-
-# vim: sts=4 sw=4 enc=utf-8 ai et
+# vim: sts=4 sw=4 ts=4 ai et
@@ -10,8 +10,8 @@ TABLES := din_31634.xml \
streamlined_system_bul.xml \
greeklish.xml \
common_deu.xml common_ron.xml common_ces.xml \
- common_classical_mon.xml common_slk.xml \
- common_slv.xml common_pol.xml \
+ common_slk.xml common_slv.xml common_pol.xml \
+ common_ara.xml \
gost_7-79_rus_old.xml gost_7-79_rus.xml \
gost_7-79_ukr.xml \
ala-lc_rus.xml
@@ -0,0 +1,436 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!DOCTYPE translit SYSTEM "translit.dtd">
+
+
+<!--
+
+ Transliteration definitions for ALA-LC Romanization table for Russian.
+ This table used by the library of Congress and American Library
+ Assosiantion.
+
+ Reference table: http://www.loc.gov/catdir/cpso/roman.html
+
+ Dmitry Smal <mialinx@gmail.com>
+
+ Copyright 2010 (C) Rusar Publishing
+
+-->
+
+
+<translit>
+
+ <name>ALA-LC RUS</name>
+ <desc>ALA-LC:1997, Cyrillic to Latin, Russian</desc>
+ <reverse>false</reverse>
+
+ <rules>
+
+ <rule>
+ <from>А</from>
+ <to>A</to>
+ </rule>
+
+ <rule>
+ <from>Б</from>
+ <to>B</to>
+ </rule>
+
+ <rule>
+ <from>В</from>
+ <to>V</to>
+ </rule>
+
+ <rule>
+ <from>Г</from>
+ <to>G</to>
+ </rule>
+
+ <rule>
+ <from>Д</from>
+ <to>D</to>
+ </rule>
+
+ <rule>
+ <from>Е</from>
+ <to>E</to>
+ </rule>
+
+ <rule>
+ <from>Ё</from>
+ <to>Ë</to> <!-- latin capital letter e with diaeresis -->
+ </rule>
+
+ <rule>
+ <from>Ж</from>
+ <to>Zh</to>
+ </rule>
+
+ <rule>
+ <from>З</from>
+ <to>Z</to>
+ </rule>
+
+ <rule>
+ <from>И</from>
+ <to>I</to>
+ </rule>
+
+ <rule>
+ <from>І</from> <!-- belorussian i -->
+ <to>Ī</to> <!-- latin capital letter i with macron -->
+ </rule>
+
+ <rule>
+ <from>Й</from>
+ <to>Ĭ</to> <!-- latin capital letter i with breve -->
+ </rule>
+
+ <rule>
+ <from>К</from>
+ <to>K</to>
+ </rule>
+
+ <rule>
+ <from>Л</from>
+ <to>L</to>
+ </rule>
+
+ <rule>
+ <from>М</from>
+ <to>M</to>
+ </rule>
+
+ <rule>
+ <from>Н</from>
+ <to>N</to>
+ </rule>
+
+ <rule>
+ <from>О</from>
+ <to>O</to>
+ </rule>
+
+ <rule>
+ <from>П</from>
+ <to>P</to>
+ </rule>
+
+ <rule>
+ <from>Р</from>
+ <to>R</to>
+ </rule>
+
+ <rule>
+ <from>С</from>
+ <to>S</to>
+ </rule>
+
+ <rule>
+ <from>Т</from>
+ <to>T</to>
+ </rule>
+
+ <rule>
+ <from>У</from>
+ <to>U</to>
+ </rule>
+
+ <rule>
+ <from>Ф</from>
+ <to>F</to>
+ </rule>
+
+ <rule>
+ <from>Х</from>
+ <to>Kh</to>
+ </rule>
+
+ <rule>
+ <from>Ц</from>
+ <to>TS</to> <!-- need ligature -->
+ </rule>
+
+ <rule>
+ <from>Ч</from>
+ <to>Ch</to>
+ </rule>
+
+ <rule>
+ <from>Ч</from>
+ <to>Ch</to>
+ </rule>
+
+ <rule>
+ <from>Ш</from>
+ <to>Sh</to>
+ </rule>
+
+ <rule>
+ <from>Щ</from>
+ <to>Shch</to>
+ </rule>
+
+ <rule>
+ <from>Ъ</from>
+ <to></to>
+ <context>
+ <before>\b</before> <!-- letter is disregarded in romanization when found at the end of a word -->
+ </context>
+ </rule>
+
+ <rule>
+ <from>Ъ</from>
+ <to>″</to> <!-- double prime -->
+ </rule>
+
+ <rule>
+ <from>Ы</from>
+ <to>Y</to>
+ </rule>
+
+ <rule>
+ <from>Ь</from>
+ <to>′</to> <!-- prime -->
+ </rule>
+
+ <rule>
+ <from>Ѣ</from> <!-- cyrillic capital yat -->
+ <to>IE</to> <!-- need ligature -->
+ </rule>
+
+ <rule>
+ <from>Э</from>
+ <to>Ė</to> <!-- latin capital letter e with dot above -->
+ </rule>
+
+ <rule>
+ <from>Ю</from>
+ <to>IU</to> <!-- need ligature -->
+ </rule>
+
+ <rule>
+ <from>Я</from>
+ <to>IA</to> <!-- need ligature -->
+ </rule>
+
+ <rule>
+ <from>Ѧ</from> <!-- cyrillic capital letter little yus -->
+ <to>Ę</to> <!-- latin capital letter e with ogonek -->
+ </rule>
+
+ <rule>
+ <from>Ө</from> <!-- cyrillic capital letter barred o -->
+ <to>Ḟ</to> <!-- latin capital letter f with dot above -->
+ </rule>
+
+ <rule>
+ <from>Ѵ</from> <!-- cyrillic capital letter izhitsa -->
+ <to>Ẏ</to> <!-- latin capital letter y with dot above -->
+ </rule>
+
+ <rule>
+ <from>а</from>
+ <to>a</to>
+ </rule>
+
+ <rule>
+ <from>б</from>
+ <to>b</to>
+ </rule>
+
+ <rule>
+ <from>в</from>
+ <to>v</to>
+ </rule>
+
+ <rule>
+ <from>г</from>
+ <to>g</to>
+ </rule>
+
+ <rule>
+ <from>д</from>
+ <to>d</to>
+ </rule>
+
+ <rule>
+ <from>е</from>
+ <to>e</to>
+ </rule>
+
+ <rule>
+ <from>ё</from>
+ <to>ë</to> <!-- latin small letter e with diaeresis -->
+ </rule>
+
+ <rule>
+ <from>ж</from>
+ <to>zh</to>
+ </rule>
+
+ <rule>
+ <from>з</from>
+ <to>z</to>
+ </rule>
+
+ <rule>
+ <from>и</from>
+ <to>i</to>
+ </rule>
+
+ <rule>
+ <from>і</from> <!-- belorussian i -->
+ <to>ī</to> <!-- latin small letter i with macron -->
+ </rule>
+
+ <rule>
+ <from>й</from>
+ <to>ĭ</to> <!-- latin small letter i with breve -->
+ </rule>
+
+ <rule>
+ <from>к</from>
+ <to>k</to>
+ </rule>
+
+ <rule>
+ <from>л</from>
+ <to>l</to>
+ </rule>
+
+ <rule>
+ <from>м</from>
+ <to>m</to>
+ </rule>
+
+ <rule>
+ <from>н</from>
+ <to>n</to>
+ </rule>
+
+ <rule>
+ <from>о</from>
+ <to>o</to>
+ </rule>
+
+ <rule>
+ <from>п</from>
+ <to>p</to>
+ </rule>
+
+ <rule>
+ <from>р</from>
+ <to>r</to>
+ </rule>
+
+ <rule>
+ <from>с</from>
+ <to>s</to>
+ </rule>
+
+ <rule>
+ <from>т</from>
+ <to>t</to>
+ </rule>
+
+ <rule>
+ <from>у</from>
+ <to>u</to>
+ </rule>
+
+ <rule>
+ <from>ф</from>
+ <to>f</to>
+ </rule>
+
+ <rule>
+ <from>х</from>
+ <to>kh</to>
+ </rule>
+
+ <rule>
+ <from>ц</from>
+ <to>ts</to> <!-- need ligature -->
+ </rule>
+
+ <rule>
+ <from>ч</from>
+ <to>ch</to>
+ </rule>
+
+ <rule>
+ <from>ш</from>
+ <to>sh</to>
+ </rule>
+
+ <rule>
+ <from>щ</from>
+ <to>shch</to>
+ </rule>
+
+ <rule>
+ <from>ъ</from>
+ <to></to>
+ <context>
+ <before>\b</before> <!-- letter is disregarded in romanization when found at the end of a word -->
+ </context>
+ </rule>
+
+ <rule>
+ <from>ъ</from>
+ <to>″</to> <!-- double prime -->
+ </rule>
+
+ <rule>
+ <from>ы</from>
+ <to>y</to>
+ </rule>
+
+ <rule>
+ <from>ь</from>
+ <to>′</to> <!-- prime -->
+ </rule>
+
+ <rule>
+ <from>ѣ</from> <!-- cyrillic small yat -->
+ <to>ie</to> <!-- need ligature -->
+ </rule>
+
+ <rule>
+ <from>э</from>
+ <to>ė</to> <!-- latin smal letter e with dot above -->
+ </rule>
+
+ <rule>
+ <from>ю</from>
+ <to>iu</to> <!-- need ligature -->
+ </rule>
+
+ <rule>
+ <from>я</from>
+ <to>ia</to> <!-- need ligature -->
+ </rule>
+
+ <rule>
+ <from>ѧ</from> <!-- cyrillic small letter little yus -->
+ <to>ę</to> <!-- latin small letter e with ogonek -->
+ </rule>
+
+ <rule>
+ <from>ө</from> <!-- cyrillic small letter barred o -->
+ <to>ḟ</to> <!-- latin small letter f with dot above -->
+ </rule>
+
+ <rule>
+ <from>ѵ</from> <!-- cyrillic small letter izhitsa -->
+ <to>ẏ</to> <!-- latin small letter y with dot above -->
+ </rule>
+
+ </rules>
+
+</translit>
+
+<!--
+ vim: sts=4 sw=4 ai et
+-->
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!DOCTYPE translit SYSTEM "translit.dtd">
+
+
+<!--
+
+ Transliteration definitions for "Common ARA".
+
+ Copyright (C) 2014 Lingua-Systems Software GmbH
+
+ Loosely based on the transliteration table found at
+ http://www.arabic-keyboard.org/arabic/arabic-transliteration.php
+ and suggestions by Ahmed Elsheshtawy.
+
+-->
+
+
+<translit>
+
+ <name>Common ARA</name>
+ <desc>Common Romanization of Arabic</desc>
+ <reverse>false</reverse>
+
+ <rules>
+
+ <!-- Section #1 -->
+
+ <rule>
+ <from>ث</from> <!-- ARABIC LETTER THEH -->
+ <to>th</to>
+ </rule>
+
+ <rule>
+ <from>خ</from> <!-- ARABIC LETTER KHAH -->
+ <to>kh</to>
+ </rule>
+
+ <rule>
+ <from>ش</from> <!-- ARABIC LETTER SHEEN -->
+ <to>sh</to>
+ </rule>
+
+ <rule>
+ <from>غ</from> <!-- ARABIC LETTER GHAIN -->
+ <to>gh</to>
+ </rule>
+
+ <rule>
+ <from>ع</from> <!-- ARABIC LETTER AIN -->
+ <to>'e</to>
+ </rule>
+
+ <rule>
+ <from>ئ</from> <!-- ARABIC LETTER YEH WITH HAMZA ABOVE -->
+ <to>'e</to>
+ </rule>
+
+ <rule>
+ <from>ؤ</from> <!-- ARABIC LETTER WAW WITH HAMZA ABOVE -->
+ <to>'e</to>
+ </rule>
+
+
+ <!-- Section #2 -->
+
+ <rule>
+ <from>ا</from> <!-- ARABIC LETTER ALEF -->
+ <to>a</to>
+ </rule>
+
+ <rule>
+ <from>أ</from> <!-- ARABIC LETTER ALEF W. HAMZA ABOVE -->
+ <to>a</to>
+ </rule>
+
+ <rule>
+ <from>آ</from> <!-- ARABIC LETTER ALEF W. MADDA ABOVE -->
+ <to>a</to>
+ </rule>
+
+ <rule>
+ <from>ى</from> <!-- ARABIC LETTER ALEF MAKSURA -->
+ <to>a</to>
+ </rule>
+
+ <rule>
+ <from>إ</from> <!-- ARABIC LETTER ALEF W. HAMZA BELOW -->
+ <to>e</to>
+ </rule>
+
+ <rule>
+ <from>ب</from> <!-- ARABIC LETTER BEH -->
+ <to>b</to>
+ </rule>
+
+ <rule>
+ <from>ت</from> <!-- ARABIC LETTER TEH -->
+ <to>t</to>
+ </rule>
+
+ <rule>
+ <from>ج</from> <!-- ARABIC LETTER JEEM -->
+ <to>j</to>
+ </rule>
+
+ <rule>
+ <from>ح</from> <!-- ARABIC LETTER HAH -->
+ <to>h</to>
+ </rule>
+
+ <rule>
+ <from>د</from> <!-- ARABIC LETTER DAL -->
+ <to>d</to>
+ </rule>
+
+ <rule>
+ <from>ذ</from> <!-- ARABIC LETTER THAL -->
+ <to>d</to>
+ </rule>
+
+ <rule>
+ <from>ض</from> <!-- ARABIC LETTER DAD -->
+ <to>d</to>
+ </rule>
+
+ <rule>
+ <from>ر</from> <!-- ARABIC LETTER REH -->
+ <to>r</to>
+ </rule>
+
+ <rule>
+ <from>ز</from> <!-- ARABIC LETTER ZAIN -->
+ <to>z</to>
+ </rule>
+
+ <rule>
+ <from>ظ</from> <!-- ARABIC LETTER ZAH -->
+ <to>z</to>
+ </rule>
+
+ <rule>
+ <from>س</from> <!-- ARABIC LETTER SEEN -->
+ <to>s</to>
+ </rule>
+
+ <rule>
+ <from>ص</from> <!-- ARABIC LETTER SAD -->
+ <to>s</to>
+ </rule>
+
+ <rule>
+ <from>ط</from> <!-- ARABIC LETTER TAH -->
+ <to>t</to>
+ </rule>
+
+ <rule>
+ <from>ف</from> <!-- ARABIC LETTER FEH -->
+ <to>f</to>
+ </rule>
+
+ <rule>
+ <from>ق</from> <!-- ARABIC LETTER QAF -->
+ <to>q</to>
+ </rule>
+
+ <rule>
+ <from>ك</from> <!-- ARABIC LETTER KAF -->
+ <to>k</to>
+ </rule>
+
+ <rule>
+ <from>ل</from> <!-- ARABIC LETTER LAM -->
+ <to>l</to>
+ </rule>
+
+ <rule>
+ <from>م</from> <!-- ARABIC LETTER MEEM -->
+ <to>m</to>
+ </rule>
+
+ <rule>
+ <from>ن</from> <!-- ARABIC LETTER NOON -->
+ <to>n</to>
+ </rule>
+
+ <rule>
+ <from>ه</from> <!-- ARABIC LETTER HEH -->
+ <to>h</to>
+ </rule>
+
+ <rule>
+ <from>ة</from> <!-- ARABIC LETTER TEH MARBUTA -->
+ <to>h</to>
+ </rule>
+
+ <rule>
+ <from>و</from> <!-- ARABIC LETTER WAW -->
+ <to>w</to>
+ </rule>
+
+ <rule>
+ <from>ي</from> <!-- ARABIC LETTER YEH -->
+ <to>y</to>
+ </rule>
+
+ <rule>
+ <from>ء</from> <!-- ARABIC LETTER HAMZA -->
+ <to>'</to>
+ </rule>
+
+ <rule>
+ <from>؟</from> <!-- ARABIC QUESTION MARK -->
+ <to>?</to>
+ </rule>
+
+ <rule>
+ <from>،</from> <!-- ARABIC COMMA -->
+ <to>,</to>
+ </rule>
+
+
+ <!-- Section #3 -->
+
+ <rule>
+ <from>ـ</from> <!-- ARABIC TATWEEL -->
+ <to></to>
+ </rule>
+
+ <rule>
+ <from>ً</from> <!-- ARABIC FATHATAN -->
+ <to></to>
+ </rule>
+
+ <rule>
+ <from>ٌ</from> <!-- ARABIC DAMMATAN -->
+ <to></to>
+ </rule>
+
+ <rule>
+ <from>ٍ</from> <!-- ARABIC KASRATAN -->
+ <to></to>
+ </rule>
+
+ <rule>
+ <from>َ</from> <!-- ARABIC FATHA -->
+ <to></to>
+ </rule>
+
+ <rule>
+ <from>ُ</from> <!-- ARABIC DAMMA -->
+ <to></to>
+ </rule>
+
+ <rule>
+ <from>ِ</from> <!-- ARABIC KASRA -->
+ <to></to>
+ </rule>
+
+ <rule>
+ <from>ّ</from> <!-- ARABIC SHADDA -->
+ <to></to>
+ </rule>
+
+ <rule>
+ <from>ْ</from> <!-- ARABIC SUKUN -->
+ <to></to>
+ </rule>
+
+ <rule>
+ <from>‏</from> <!-- RIGHT-TO-LEFT MARK -->
+ <to></to>
+ </rule>
+
+ </rules>
+
+</translit>
+
+<!-- vim: set sts=4 sw=4 ts=4 ai et ft=xml: -->
@@ -1,362 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!DOCTYPE translit SYSTEM "translit.dtd">
-
-
-<!--
-
- Transliteration definitions for the common transliteration of Classical
- Mongolian Script to Latin.
-
- Copyright 2008 Bayanzul Lodoysamba <baynaa@users.sourceforge.net>
-
--->
-
-
-<translit>
-
- <!-- meta data -->
-
- <name>Common Classical MON</name>
-
- <desc>Classical Mongolian Script to Latin</desc>
-
- <reverse>true</reverse>
-
-
- <!-- set of transliteration rules -->
-
- <rules>
- <rule>
- <from>ᠠ</from> <!-- MONGOLIAN LETTER A (ᠠ) -->
- <to>a</to>
- </rule>
- <rule>
- <from>ᠡ</from> <!-- MONGOLIAN LETTER E (ᠡ) -->
- <to>e</to>
- </rule>
- <rule>
- <from>ᠢ</from> <!-- MONGOLIAN LETTER I (ᠢ) -->
- <to>i</to>
- </rule>
- <rule>
- <from>ᠣ</from> <!-- MONGOLIAN LETTER O (ᠣ) -->
- <to>o</to>
- </rule>
- <rule>
- <from>ᠤ</from> <!-- MONGOLIAN LETTER U (ᠤ) -->
- <to>u</to>
- </rule>
- <rule>
- <from>ᠥ</from> <!-- MONGOLIAN LETTER OE (ᠥ) -->
- <to>ö</to>
- </rule>
- <rule>
- <from>ᠦ</from> <!-- MONGOLIAN LETTER UE (ᠦ) -->
- <to>ü</to>
- </rule>
- <rule>
- <from>ᠧ</from> <!-- MONGOLIAN LETTER EE (ᠧ) -->
- <to>ē</to>
- </rule>
- <rule>
- <from>ᠩ</from> <!-- MONGOLIAN LETTER ANG (ᠩ) -->
- <to>ng</to>
- </rule>
- <rule>
- <from>ᠨ</from> <!-- MONGOLIAN LETTER NA (ᠨ) -->
- <to>n</to>
- </rule>
- <rule>
- <from>ᠪ</from> <!-- MONGOLIAN LETTER BA (ᠪ) -->
- <to>b</to>
- </rule>
- <rule>
- <from>ᠫ</from> <!-- MONGOLIAN LETTER PA (ᠫ) -->
- <to>p</to>
- </rule>
- <rule>
- <from>ᠬ</from> <!-- MONGOLIAN LETTER QA (ᠬ) feminine form -->
- <to>k</to>
- <context>
- <before>i</before>
- </context>
- </rule>
- <rule>
- <from>ᠬ</from> <!-- MONGOLIAN LETTER QA (ᠬ) masculine form -->
- <to>q</to>
- <context>
- <before>᠎?[aou]</before>
- </context>
- </rule>
- <rule>
- <from>ᠬ</from> <!-- MONGOLIAN LETTER QA (ᠬ) masculine form -->
- <to>q</to>
- <context>
- <after>[aou]</after>
- </context>
- </rule>
- <rule>
- <from>ᠬ</from> <!-- MONGOLIAN LETTER QA (ᠬ) feminine form -->
- <to>k</to>
- <context>
- <before>[üeö]</before>
- </context>
- </rule>
- <rule>
- <from>ᠬ</from> <!-- MONGOLIAN LETTER QA (ᠬ) feminine form -->
- <to>k</to>
- <context>
- <after>[üeö]</after>
- </context>
- </rule>
- <rule>
- <from>ᠬ</from> <!-- MONGOLIAN LETTER QA (ᠬ) masculine form -->
- <to>q</to>
- </rule>
- <rule>
- <from>ᠬ</from> <!-- MONGOLIAN LETTER QA (ᠬ) feminine form -->
- <to>k</to>
- </rule>
- <rule>
- <from>ᠭ</from> <!-- MONGOLIAN LETTER GA (ᠭ) feminine form -->
- <to>g</to>
- <context>
- <before>i\s</before>
- </context>
- </rule>
- <rule>
- <from>ᠭ</from> <!-- MONGOLIAN LETTER GA (ᠭ) masculine form -->
- <to>ɣ</to>
- <context>
- <before>᠎?[aou]</before>
- </context>
- </rule>
- <rule>
- <from>ᠭ</from> <!-- MONGOLIAN LETTER GA (ᠭ) masculine form -->
- <to>ɣ</to>
- <context>
- <after>[aou]</after>
- </context>
- </rule>
- <rule>
- <from>ᠭ</from> <!-- MONGOLIAN LETTER GA (ᠭ) feminine form -->
- <to>g</to>
- <context>
- <before>[üeö]</before>
- </context>
- </rule>
- <rule>
- <from>ᠭ</from> <!-- MONGOLIAN LETTER GA (ᠭ) feminine form() -->
- <to>g</to>
- <context>
- <after>[üeö]</after>
- </context>
- </rule>
- <rule>
- <from>ᠭ</from> <!-- MONGOLIAN LETTER GA (ᠭ) masculine form() -->
- <to>ɣ</to>
- <context>
- <after>[aou].</after>
- </context>
- </rule>
- <rule>
- <from>ᠭ</from> <!-- MONGOLIAN LETTER GA (ᠭ) masculine form() -->
- <to>ɣ</to>
- <context>
- <after>[aou].i</after>
- </context>
- </rule>
- <rule>
- <from>ᠭ</from> <!-- MONGOLIAN LETTER GA (ᠭ) masculine form() -->
- <to>ɣ</to>
- <context>
- <after>[aou]..i</after>
- </context>
- </rule>
- <rule>
- <from>ᠭ</from> <!-- MONGOLIAN LETTER GA (ᠭ) masculine form() -->
- <to>ɣ</to>
- <context>
- <before>.[aou]</before>
- </context>
- </rule>
- <rule>
- <from>ᠭ</from> <!-- MONGOLIAN LETTER GA (ᠭ) masculine form() -->
- <to>g</to>
- <context>
- <before>.[üeö]</before>
- </context>
- </rule>
- <rule>
- <from>ᠭ</from> <!-- MONGOLIAN LETTER GA (ᠭ) feminine form() -->
- <to>g</to>
- </rule>
- <rule>
- <from>ᠭ</from> <!-- MONGOLIAN LETTER GA (ᠭ) masculine form() -->
- <to>ɣ</to>
- </rule>
- <rule>
- <from>ᠮ</from> <!-- MONGOLIAN LETTER MA (ᠮ) -->
- <to>m</to>
- </rule>
- <rule>
- <from>ᠰ</from> <!-- MONGOLIAN LETTER SA (ᠰ) -->
- <to>s</to>
- </rule>
- <rule>
- <from>ᠱ</from> <!-- MONGOLIAN LETTER SHA (ᠱ) -->
- <to>š</to>
- </rule>
- <rule>
- <from>ᠲ</from> <!-- MONGOLIAN LETTER TA (ᠲ) -->
- <to>t</to>
- </rule>
- <rule>
- <from>ᠳ</from> <!-- MONGOLIAN LETTER DA (ᠳ) -->
- <to>d</to>
- </rule>
- <rule>
- <from>ᠴ</from> <!-- MONGOLIAN LETTER CHA (ᠴ) -->
- <to>č</to>
- </rule>
- <rule>
- <from>ᠵ</from> <!-- MONGOLIAN LETTER JA (ᠵ) -->
- <to>j</to>
- </rule>
- <rule>
- <from>ᠶ</from> <!-- MONGOLIAN LETTER YA (ᠶ) -->
- <to>y</to>
- </rule>
- <rule>
- <from>ᠷ</from> <!-- MONGOLIAN LETTER RA (ᠷ) -->
- <to>r</to>
- </rule>
- <rule>
- <from>ᠸ</from> <!-- MONGOLIAN LETTER WA (ᠸ) -->
- <to>v</to>
- </rule>
- <rule>
- <from>ᠸ</from> <!-- MONGOLIAN LETTER WA (ᠸ) -->
- <to>w</to>
- </rule>
- <rule>
- <from>ᠹ</from> <!-- MONGOLIAN LETTER FA (ᠹ) -->
- <to>f</to>
- </rule>
- <rule>
- <from>ᠺ</from> <!-- MONGOLIAN LETTER KA (ᠺ) -->
- <to>ḳ</to>
- </rule>
- <rule>
- <from>ᠻ</from> <!-- MONGOLIAN LETTER KHA (ᠻ) -->
- <to>ǩ</to>
- </rule>
- <rule>
- <from>ᠿ</from> <!-- MONGOLIAN LETTER ZRA (ᠿ) -->
- <to>ž</to>
- </rule>
- <rule>
- <from>ᡀ</from> <!-- MONGOLIAN LETTER LHA (ᡀ) -->
- <to>lh</to>
- </rule>
- <rule>
- <from>ᠯ</from> <!-- MONGOLIAN LETTER LA (ᠯ) -->
- <to>l</to>
- </rule>
- <rule>
- <from>ᡁ</from> <!-- MONGOLIAN LETTER ZHI (ᡁ) -->
- <to>zh</to>
- </rule>
- <rule>
- <from>ᠽ</from> <!-- MONGOLIAN LETTER ZA (ᠽ) -->
- <to>z</to>
- </rule>
- <rule>
- <from>ᡂ</from> <!-- MONGOLIAN LETTER CHI (ᡂ) -->
- <to>ch</to>
- </rule>
- <rule>
- <from>ᠼ</from> <!-- MONGOLIAN LETTER TSA (ᠼ) -->
- <to>c</to>
- </rule>
- <rule>
- <from>ᠾ</from> <!-- MONGOLIAN LETTER HAA (ᠾ) -->
- <to>h</to>
- </rule>
- <rule>
- <from>᠀</from> <!-- MONGOLIAN BIRGA (᠀) -->
- <to>§</to>
- </rule>
- <rule>
- <from>᠁</from> <!-- MONGOLIAN ELLIPSIS (᠁) -->
- <to>…</to>
- </rule>
- <rule>
- <from>᠂</from> <!-- MONGOLIAN COMMA (᠂) -->
- <to>,</to>
- </rule>
- <rule>
- <from>᠃</from> <!-- MONGOLIAN FULL STOP (᠃) -->
- <to>.</to>
- </rule>
- <rule>
- <from>᠄</from> <!-- MONGOLIAN COLON (᠄) -->
- <to>:</to>
- </rule>
- <rule>
- <from>᠅</from> <!-- MONGOLIAN FOUR DOTS (᠅) -->
- <to>¶</to>
- </rule>
- <rule>
- <from>᠎</from> <!-- MONGOLIAN VOWEL SEPARATOR () -->
- <to>-</to>
- </rule>
- <rule>
- <from>᠐</from> <!-- MONGOLIAN DIGIT ZERO (᠐) -->
- <to>0</to>
- </rule>
- <rule>
- <from>᠑</from> <!-- MONGOLIAN DIGIT ONE (᠑) -->
- <to>1</to>
- </rule>
- <rule>
- <from>᠒</from> <!-- MONGOLIAN DIGIT TWO (᠒) -->
- <to>2</to>
- </rule>
- <rule>
- <from>᠓</from> <!-- MONGOLIAN DIGIT THREE (᠓) -->
- <to>3</to>
- </rule>
- <rule>
- <from>᠔</from> <!-- MONGOLIAN DIGIT FOUR (᠔) -->
- <to>4</to>
- </rule>
- <rule>
- <from>᠕</from> <!-- MONGOLIAN DIGIT FIVE (᠕) -->
- <to>5</to>
- </rule>
- <rule>
- <from>᠖</from> <!-- MONGOLIAN DIGIT SIX (᠖) -->
- <to>6</to>
- </rule>
- <rule>
- <from>᠗</from> <!-- MONGOLIAN DIGIT SEVEN (᠗) -->
- <to>7</to>
- </rule>
- <rule>
- <from>᠘</from> <!-- MONGOLIAN DIGIT EIGHT (᠘) -->
- <to>8</to>
- </rule>
- <rule>
- <from>᠙</from> <!-- MONGOLIAN DIGIT NINE (᠙) -->
- <to>9</to>
- </rule>
- </rules>
-
-</translit>
-
-
-<!--
- vim: sts=4 sw=4 ai et
--->
@@ -3507,339 +3507,6 @@
],
"reverse" => "true"
},
- "common_classical_mon" => {
- "desc" => "Classical Mongolian Script to Latin",
- "name" => "Common Classical MON",
- "id" => "common_classical_mon",
- "rules" => [
- {
- "to" => "a",
- "from" => "\x{1820}"
- },
- {
- "to" => "e",
- "from" => "\x{1821}"
- },
- {
- "to" => "i",
- "from" => "\x{1822}"
- },
- {
- "to" => "o",
- "from" => "\x{1823}"
- },
- {
- "to" => "u",
- "from" => "\x{1824}"
- },
- {
- "to" => "\x{f6}",
- "from" => "\x{1825}"
- },
- {
- "to" => "\x{fc}",
- "from" => "\x{1826}"
- },
- {
- "to" => "\x{113}",
- "from" => "\x{1827}"
- },
- {
- "to" => "ng",
- "from" => "\x{1829}"
- },
- {
- "to" => "n",
- "from" => "\x{1828}"
- },
- {
- "to" => "b",
- "from" => "\x{182a}"
- },
- {
- "to" => "p",
- "from" => "\x{182b}"
- },
- {
- "to" => "k",
- "from" => "\x{182c}",
- "context" => {
- "before" => "i"
- }
- },
- {
- "to" => "q",
- "from" => "\x{182c}",
- "context" => {
- "before" => "\x{180e}?[aou]"
- }
- },
- {
- "to" => "q",
- "from" => "\x{182c}",
- "context" => {
- "after" => "[aou]"
- }
- },
- {
- "to" => "k",
- "from" => "\x{182c}",
- "context" => {
- "before" => "[\x{fc}e\x{f6}]"
- }
- },
- {
- "to" => "k",
- "from" => "\x{182c}",
- "context" => {
- "after" => "[\x{fc}e\x{f6}]"
- }
- },
- {
- "to" => "q",
- "from" => "\x{182c}"
- },
- {
- "to" => "k",
- "from" => "\x{182c}"
- },
- {
- "to" => "g",
- "from" => "\x{182d}",
- "context" => {
- "before" => "i\\s"
- }
- },
- {
- "to" => "\x{263}",
- "from" => "\x{182d}",
- "context" => {
- "before" => "\x{180e}?[aou]"
- }
- },
- {
- "to" => "\x{263}",
- "from" => "\x{182d}",
- "context" => {
- "after" => "[aou]"
- }
- },
- {
- "to" => "g",
- "from" => "\x{182d}",
- "context" => {
- "before" => "[\x{fc}e\x{f6}]"
- }
- },
- {
- "to" => "g",
- "from" => "\x{182d}",
- "context" => {
- "after" => "[\x{fc}e\x{f6}]"
- }
- },
- {
- "to" => "\x{263}",
- "from" => "\x{182d}",
- "context" => {
- "after" => "[aou]."
- }
- },
- {
- "to" => "\x{263}",
- "from" => "\x{182d}",
- "context" => {
- "after" => "[aou].i"
- }
- },
- {
- "to" => "\x{263}",
- "from" => "\x{182d}",
- "context" => {
- "after" => "[aou]..i"
- }
- },
- {
- "to" => "\x{263}",
- "from" => "\x{182d}",
- "context" => {
- "before" => ".[aou]"
- }
- },
- {
- "to" => "g",
- "from" => "\x{182d}",
- "context" => {
- "before" => ".[\x{fc}e\x{f6}]"
- }
- },
- {
- "to" => "g",
- "from" => "\x{182d}"
- },
- {
- "to" => "\x{263}",
- "from" => "\x{182d}"
- },
- {
- "to" => "m",
- "from" => "\x{182e}"
- },
- {
- "to" => "s",
- "from" => "\x{1830}"
- },
- {
- "to" => "\x{161}",
- "from" => "\x{1831}"
- },
- {
- "to" => "t",
- "from" => "\x{1832}"
- },
- {
- "to" => "d",
- "from" => "\x{1833}"
- },
- {
- "to" => "\x{10d}",
- "from" => "\x{1834}"
- },
- {
- "to" => "j",
- "from" => "\x{1835}"
- },
- {
- "to" => "y",
- "from" => "\x{1836}"
- },
- {
- "to" => "r",
- "from" => "\x{1837}"
- },
- {
- "to" => "v",
- "from" => "\x{1838}"
- },
- {
- "to" => "w",
- "from" => "\x{1838}"
- },
- {
- "to" => "f",
- "from" => "\x{1839}"
- },
- {
- "to" => "\x{1e33}",
- "from" => "\x{183a}"
- },
- {
- "to" => "\x{1e9}",
- "from" => "\x{183b}"
- },
- {
- "to" => "\x{17e}",
- "from" => "\x{183f}"
- },
- {
- "to" => "lh",
- "from" => "\x{1840}"
- },
- {
- "to" => "l",
- "from" => "\x{182f}"
- },
- {
- "to" => "zh",
- "from" => "\x{1841}"
- },
- {
- "to" => "z",
- "from" => "\x{183d}"
- },
- {
- "to" => "ch",
- "from" => "\x{1842}"
- },
- {
- "to" => "c",
- "from" => "\x{183c}"
- },
- {
- "to" => "h",
- "from" => "\x{183e}"
- },
- {
- "to" => "\x{a7}",
- "from" => "\x{1800}"
- },
- {
- "to" => "\x{2026}",
- "from" => "\x{1801}"
- },
- {
- "to" => ",",
- "from" => "\x{1802}"
- },
- {
- "to" => ".",
- "from" => "\x{1803}"
- },
- {
- "to" => ":",
- "from" => "\x{1804}"
- },
- {
- "to" => "\x{b6}",
- "from" => "\x{1805}"
- },
- {
- "to" => "-",
- "from" => "\x{180e}"
- },
- {
- "to" => 0,
- "from" => "\x{1810}"
- },
- {
- "to" => 1,
- "from" => "\x{1811}"
- },
- {
- "to" => 2,
- "from" => "\x{1812}"
- },
- {
- "to" => 3,
- "from" => "\x{1813}"
- },
- {
- "to" => 4,
- "from" => "\x{1814}"
- },
- {
- "to" => 5,
- "from" => "\x{1815}"
- },
- {
- "to" => 6,
- "from" => "\x{1816}"
- },
- {
- "to" => 7,
- "from" => "\x{1817}"
- },
- {
- "to" => 8,
- "from" => "\x{1818}"
- },
- {
- "to" => 9,
- "from" => "\x{1819}"
- }
- ],
- "reverse" => "true"
- },
"din_1460_bul" => {
"desc" => "DIN 1460:1982, Cyrillic to Latin, Bulgarian",
"name" => "DIN 1460 BUL",
@@ -4524,6 +4191,206 @@
],
"reverse" => "false"
},
+ "common_ara" => {
+ "desc" => "Common Romanization of Arabic",
+ "name" => "Common ARA",
+ "id" => "common_ara",
+ "rules" => [
+ {
+ "to" => "th",
+ "from" => "\x{62b}"
+ },
+ {
+ "to" => "kh",
+ "from" => "\x{62e}"
+ },
+ {
+ "to" => "sh",
+ "from" => "\x{634}"
+ },
+ {
+ "to" => "gh",
+ "from" => "\x{63a}"
+ },
+ {
+ "to" => "'e",
+ "from" => "\x{639}"
+ },
+ {
+ "to" => "'e",
+ "from" => "\x{626}"
+ },
+ {
+ "to" => "'e",
+ "from" => "\x{624}"
+ },
+ {
+ "to" => "a",
+ "from" => "\x{627}"
+ },
+ {
+ "to" => "a",
+ "from" => "\x{623}"
+ },
+ {
+ "to" => "a",
+ "from" => "\x{622}"
+ },
+ {
+ "to" => "a",
+ "from" => "\x{649}"
+ },
+ {
+ "to" => "e",
+ "from" => "\x{625}"
+ },
+ {
+ "to" => "b",
+ "from" => "\x{628}"
+ },
+ {
+ "to" => "t",
+ "from" => "\x{62a}"
+ },
+ {
+ "to" => "j",
+ "from" => "\x{62c}"
+ },
+ {
+ "to" => "h",
+ "from" => "\x{62d}"
+ },
+ {
+ "to" => "d",
+ "from" => "\x{62f}"
+ },
+ {
+ "to" => "d",
+ "from" => "\x{630}"
+ },
+ {
+ "to" => "d",
+ "from" => "\x{636}"
+ },
+ {
+ "to" => "r",
+ "from" => "\x{631}"
+ },
+ {
+ "to" => "z",
+ "from" => "\x{632}"
+ },
+ {
+ "to" => "z",
+ "from" => "\x{638}"
+ },
+ {
+ "to" => "s",
+ "from" => "\x{633}"
+ },
+ {
+ "to" => "s",
+ "from" => "\x{635}"
+ },
+ {
+ "to" => "t",
+ "from" => "\x{637}"
+ },
+ {
+ "to" => "f",
+ "from" => "\x{641}"
+ },
+ {
+ "to" => "q",
+ "from" => "\x{642}"
+ },
+ {
+ "to" => "k",
+ "from" => "\x{643}"
+ },
+ {
+ "to" => "l",
+ "from" => "\x{644}"
+ },
+ {
+ "to" => "m",
+ "from" => "\x{645}"
+ },
+ {
+ "to" => "n",
+ "from" => "\x{646}"
+ },
+ {
+ "to" => "h",
+ "from" => "\x{647}"
+ },
+ {
+ "to" => "h",
+ "from" => "\x{629}"
+ },
+ {
+ "to" => "w",
+ "from" => "\x{648}"
+ },
+ {
+ "to" => "y",
+ "from" => "\x{64a}"
+ },
+ {
+ "to" => "'",
+ "from" => "\x{621}"
+ },
+ {
+ "to" => "?",
+ "from" => "\x{61f}"
+ },
+ {
+ "to" => ",",
+ "from" => "\x{60c}"
+ },
+ {
+ "to" => "",
+ "from" => "\x{640}"
+ },
+ {
+ "to" => "",
+ "from" => "\x{64b}"
+ },
+ {
+ "to" => "",
+ "from" => "\x{64c}"
+ },
+ {
+ "to" => "",
+ "from" => "\x{64d}"
+ },
+ {
+ "to" => "",
+ "from" => "\x{64e}"
+ },
+ {
+ "to" => "",
+ "from" => "\x{64f}"
+ },
+ {
+ "to" => "",
+ "from" => "\x{650}"
+ },
+ {
+ "to" => "",
+ "from" => "\x{651}"
+ },
+ {
+ "to" => "",
+ "from" => "\x{652}"
+ },
+ {
+ "to" => "",
+ "from" => "\x{200f}"
+ }
+ ],
+ "reverse" => "false"
+ },
"gost_7.79_rus" => {
"desc" => "GOST 7.79:2000, Cyrillic to Latin, Russian",
"name" => "GOST 7.79 RUS",
@@ -59,6 +59,4 @@
</translit>
-<!--
- vim: sts=4 sw=4 ai et
--->
+<!-- vim: set sts=4 sw=4 ts=4 ai et ft=xml: -->
@@ -2,28 +2,26 @@
<!--
-
- Basic document type definition for transliteration tables, "translit".
+
+ Basic document type definition for transliteration tables.
Copyright (C) 2007-2008 Alex Linke <alinke@lingua-systems.com>
- Copyright (C) 2009 Lingua-Systems Software GmbH
+ Copyright (C) 2009-2014 Lingua-Systems Software GmbH
-->
<!ELEMENT translit (name, desc, reverse, rules)>
-<!ELEMENT name (#PCDATA)>
-<!ELEMENT desc (#PCDATA)>
+<!ELEMENT name (#PCDATA)>
+<!ELEMENT desc (#PCDATA)>
<!ELEMENT reverse (#PCDATA)>
-<!ELEMENT rules (rule+)>
-<!ELEMENT rule (from, to, context?)>
-<!ELEMENT from (#PCDATA)>
-<!ELEMENT to (#PCDATA)>
+<!ELEMENT rules (rule+)>
+<!ELEMENT rule (from, to, context?)>
+<!ELEMENT from (#PCDATA)>
+<!ELEMENT to (#PCDATA)>
<!ELEMENT context ((before|after),(before|after)?)>
<!ELEMENT before (#PCDATA)>
-<!ELEMENT after (#PCDATA)>
+<!ELEMENT after (#PCDATA)>
-<!--
- vim: sts=2 enc=utf-8
--->
+<!-- vim: set sts=4 ts=4 sw=4 ai et: -->
@@ -2,10 +2,9 @@
#
# Copyright (C) 2007-2008 Alex Linke <alinke@lingua-systems.com>
-# Copyright (C) 2009 Lingua-Systems Software GmbH
+# Copyright (C) 2009-2014 Lingua-Systems Software GmbH
#
-
use strict;
use warnings;
@@ -15,30 +14,26 @@ use XML::LibXML;
use Data::Dumper;
use Getopt::Long;
-
my $VERSION = '0.5';
-
my %tables;
-
# set default options
my %opt = (
output => "tables.dump",
verbose => 0,
);
-
# parse commandline options
-show_help(1) unless GetOptions(
- "output|o=s" => \$opt{output},
- "verbose|v" => \$opt{verbose},
- "help|h" => \$opt{help}
-);
-show_help(1) if scalar(@ARGV) == 0; # No XML file(s) given
+show_help(1)
+ unless GetOptions(
+ "output|o=s" => \$opt{output},
+ "verbose|v" => \$opt{verbose},
+ "help|h" => \$opt{help}
+ );
+show_help(1) if scalar(@ARGV) == 0; # No XML file(s) given
show_help(0) if $opt{help};
-
my $xmlparser = new XML::LibXML();
# Set parser options
@@ -48,86 +43,78 @@ $xmlparser->expand_entities(1);
$xmlparser->keep_blanks(1);
$xmlparser->line_numbers(1);
-
# Treat everything else in @ARGV as a filename
foreach my $file (@ARGV) {
print "Parsing $file..." if $opt{verbose};
- my %counts = (rules => 0, contexts => 0);
+ my %counts = ( rules => 0, contexts => 0 );
my $ds;
my $doc = $xmlparser->parse_file($file)
- or die "Error parsing $file: $!\n";
+ or die "Error parsing $file: $!\n";
# Retrieve meta-documentation from XML document first
- foreach my $meta (qw/name desc reverse/)
- {
+ foreach my $meta (qw/name desc reverse/) {
my @nodes = $doc->findnodes("/translit/$meta");
- die "#/translit/$meta != 1" if (scalar(@nodes) != 1);
+ die "#/translit/$meta != 1" if ( scalar(@nodes) != 1 );
$ds->{$meta} = $nodes[0]->to_literal();
}
-
# Perform some basic meta data checks
- die "Name undefined.\n" unless $ds->{name};
- die "Description undefined.\n" unless $ds->{desc};
- die "Reversibility undefined.\n" unless $ds->{reverse};
+ die "Name undefined.\n" unless $ds->{name};
+ die "Description undefined.\n" unless $ds->{desc};
+ die "Reversibility undefined.\n" unless $ds->{reverse};
# Check <reverse> tag contains valid data.
# TODO: move this to the DTD
die "Reversibility: '$ds->{reverse}' -- Should be 'true' or 'false'.\n"
- unless $ds->{reverse} =~ /^(true|false)$/;
+ unless $ds->{reverse} =~ /^(true|false)$/;
# Set the table's identifier
- $ds->{id} = lc($ds->{name});
+ $ds->{id} = lc( $ds->{name} );
$ds->{id} =~ s/\s/_/g;
-
# Retrieve all rules, extract their data and store it to an appropriate
# data structure
- foreach my $rule ($doc->findnodes("/translit/rules/rule"))
- {
+ foreach my $rule ( $doc->findnodes("/translit/rules/rule") ) {
my @nodes;
my $rule_ds;
-
# Retrieve "from" and "to" literals
- foreach my $n (qw/from to/)
- {
+ foreach my $n (qw/from to/) {
@nodes = $rule->findnodes("./$n");
- die "#/translit/rules/rules/$n != 1 " .
- "(at line " . $rule->line_number() . ")\n"
- if (scalar(@nodes) != 1);
+ die "#/translit/rules/rules/$n != 1 "
+ . "(at line "
+ . $rule->line_number() . ")\n"
+ if ( scalar(@nodes) != 1 );
$rule_ds->{$n} = $nodes[0]->to_literal();
}
-
# Retrieve rule's "context"
@nodes = $rule->findnodes("./context");
- die "#/translit/rules/rule/context > 1 " .
- "(at line " . $rule->line_number() . ")\n"
- if (scalar(@nodes) > 1);
+ die "#/translit/rules/rule/context > 1 "
+ . "(at line "
+ . $rule->line_number() . ")\n"
+ if ( scalar(@nodes) > 1 );
# Process rule's "context" if necessary
- if (scalar(@nodes))
- {
- foreach my $context (qw/before after/)
- {
+ if ( scalar(@nodes) ) {
+ foreach my $context (qw/before after/) {
@nodes = $rule->findnodes("./context/$context");
- die "#/translit/rules/rule/context/$context > 1 " .
- "(at line " . $rule->line_number() . ")\n"
- if (scalar(@nodes) > 1);
+ die "#/translit/rules/rule/context/$context > 1 "
+ . "(at line "
+ . $rule->line_number() . ")\n"
+ if ( scalar(@nodes) > 1 );
# Copy the context to the rule's data structure
- if (scalar(@nodes))
- {
+ if ( scalar(@nodes) ) {
$rule_ds->{context}->{$context} = $nodes[0]->to_literal();
}
}
@@ -137,26 +124,23 @@ foreach my $file (@ARGV) {
$counts{rules}++;
-
die $rule_ds->{name} . ": from==to -> " . $rule_ds->{from} . "\n"
- if ($rule_ds->{from} eq $rule_ds->{to});
+ if ( $rule_ds->{from} eq $rule_ds->{to} );
- push @{$ds->{rules}}, $rule_ds;
+ push @{ $ds->{rules} }, $rule_ds;
}
-
# Copy transliteration structure over to the final hash
- $tables{$ds->{id}} = $ds;
+ $tables{ $ds->{id} } = $ds;
print " ($ds->{id}: rules=$counts{rules}, contexts=$counts{contexts})\n"
- if $opt{verbose};
+ if $opt{verbose};
- undef($ds); # free memory
+ undef($ds); # free memory
}
-
# Configure Data::Dumper
-my $dumper = new Data::Dumper([ \%tables ], [ qw/*tables/ ]);
+my $dumper = new Data::Dumper( [ \%tables ], [qw/*tables/] );
$dumper->Purity(0);
$dumper->Useqq(1);
$dumper->Indent(1);
@@ -166,25 +150,22 @@ open FH, ">$opt{output}" or die "$opt{output}: $!\n";
print FH $dumper->Dump();
close(FH);
-print scalar(keys(%tables)),
- " transliteration table(s) dumped to $opt{output}.\n"
- if $opt{verbose};
+print scalar( keys(%tables) ),
+ " transliteration table(s) dumped to $opt{output}.\n"
+ if $opt{verbose};
-
-sub show_help
-{
+sub show_help {
my $retval = shift();
print STDERR
- "xml2dump v$VERSION -- Copyright 2007-2008 by Alex Linke ",
- "<alinke\@lingua-systems.com>\n\n",
- "usage: $0 [-v -h] -o FILE XML-FILE(s)\n\n",
- "\t--output -o FILE set output file (default: transtbl.dump)\n",
- "\t--verbose -v be verbose\n",
- "\t--help -h show this help\n";
+ "xml2dump v$VERSION -- Copyright 2007-2008 by Alex Linke ",
+ "<alinke\@lingua-systems.com>\n\n",
+ "usage: $0 [-v -h] -o FILE XML-FILE(s)\n\n",
+ "\t--output -o FILE set output file (default: transtbl.dump)\n",
+ "\t--verbose -v be verbose\n",
+ "\t--help -h show this help\n";
exit($retval);
}
-
-# vim: sw=4 sts=4 enc=utf-8 ai et
+# vim: sw=4 sts=4 ts=4 ai et