Search the CPAN - metacpan.org

Changes	1 16
MANIFEST	2 4
META.yml	10 18
Makefile.PL	10 8
README	1 1
developer-manual__eng.pdf	--
lib/Lingua/Translit/Tables.pm	56 56
lib/Lingua/Translit.pm	118 68
t/01_Lingua-Translit-Tables.t	3 3
t/02_Lingua-Translit.t	3 3
t/17_tr_Common_Classical_MON.t	96 0
t/28_tr_ALA-LC_RUS.t	0 39
t/29_tr_Common_ARA.t	0 35
tools/substitute_tables.pl	7 5
translit	66 43
xml/Makefile	2 2
xml/ala-lc_rus.xml	0 436
xml/common_ara.xml	0 279
xml/common_classical_mon.xml	362 0
xml/tables.dump	333 200
xml/template.xml	3 1
xml/translit.dtd	13 11
xml/xml2dump.pl	70 51
23 files changed (This is a version diff)	11561279

Changes

@@ -1,3 +1,18 @@
+0.21  -- 2014-10-24
+
+    * Added "Common ARA" Arabic transliteration. Thanks to Ahmed Elsheshtawy
+      for suggesting this transliteration and his help implementing it!
+
+0.20  -- 2014-05-22
+
+    * lib/Lingua/Translit/Tables.pm: Added sub to handle Perl's
+      "Unicode Bug", see perlunicode for details (v0.10). This fixes the
+      errors reported by Perl v5.18.0.
+    * MANIFEST: Added missing "ALA-LC RUS" files to distribution.
+    * xml/: Removed "Common Classical MON" transliteration due to errors
+      and contributors lack of time to fix these.
+    * Code cleanups
+
 0.19  -- 2011-04-15
 
     * Added "ALA-LC RUS" transliteration (ALA-LC:1997). Thanks to Dmitry Smal
@@ -151,4 +166,4 @@
         * ISO 843
         * DIN 5008
 
-# vim: sw=4 sts=4 ai et ft=changelog
+# vim: set sw=4 sts=4 ts=4 ai et ft=changelog:

MANIFEST

@@ -3,6 +3,7 @@ MANIFEST
 MANIFEST.SKIP
 lib/Lingua/Translit.pm
 lib/Lingua/Translit/Tables.pm
+xml/ala-lc_rus.xml
 xml/tables.dump
 xml/Makefile
 xml/translit.dtd
@@ -18,7 +19,7 @@ xml/common_ces.xml
 xml/common_slv.xml
 xml/common_slk.xml
 xml/common_pol.xml
-xml/common_classical_mon.xml
+xml/common_ara.xml
 xml/din_1460_bul.xml
 xml/din_1460_rus.xml
 xml/din_1460_ukr.xml
@@ -37,7 +38,6 @@ t/13_tr_Greeklish.t
 t/14_tr_DIN_31634.t
 t/15_tr_Common_RON.t
 t/16_tr_Common_CES.t
-t/17_tr_Common_Classical_MON.t
 t/18_tr_DIN_1460_BUL.t
 t/19_tr_Streamlined_System_BUL.t
 t/20_tr_Common_SLK.t
@@ -48,6 +48,8 @@ t/24_tr_DIN_1460_UKR.t
 t/25_tr_GOST_RUS_OLD.t
 t/26_tr_GOST_RUS.t
 t/27_tr_GOST_UKR.t
+t/28_tr_ALA-LC_RUS.t
+t/29_tr_Common_ARA.t
 tools/substitute_tables.pl
 Changes
 README

META.yml

@@ -1,13 +1,21 @@
 --- #YAML:1.0
-name:                Lingua-Translit
-version:             0.19
-abstract:            transliterates text between writing systems
-license:             ~
-author:              
+name:               Lingua-Translit
+version:            0.21
+abstract:           transliterates text between writing systems
+author:
     - Alex Linke <alinke@lingua-systems.com>
-generated_by:        ExtUtils::MakeMaker version 6.42
-distribution_type:   module
-requires:     
+license:            unknown
+distribution_type:  module
+configure_requires:
+    ExtUtils::MakeMaker:  0
+build_requires:
+    ExtUtils::MakeMaker:  0
+requires:  {}
+no_index:
+    directory:
+        - t
+        - inc
+generated_by:       ExtUtils::MakeMaker version 6.57_05
 meta-spec:
-    url:     http://module-build.sourceforge.net/META-spec-v1.3.html
-    version: 1.3
+    url:      http://module-build.sourceforge.net/META-spec-v1.4.html
+    version:  1.4

Makefile.PL

@@ -7,15 +7,14 @@ use ExtUtils::MakeMaker;
 
 
 WriteMakefile(
-    NAME	    =>  "Lingua::Translit",
+    NAME            =>  "Lingua::Translit",
     VERSION_FROM    =>  "lib/Lingua/Translit.pm",
     ABSTRACT_FROM   =>  "lib/Lingua/Translit.pm",
-    AUTHOR	    =>  'Alex Linke <alinke@lingua-systems.com>',
-    EXE_FILES	    =>  [qw/translit/],
-    PL_FILES	    =>	{
-	    'tools/substitute_tables.pl' =>
-		'blib/lib/Lingua/Translit/Tables.pm'
-	},
+    AUTHOR          =>  'Alex Linke <alinke@lingua-systems.com>',
+    EXE_FILES       =>  [ qw/translit/ ],
+    PL_FILES        =>  {
+        'tools/substitute_tables.pl' => 'blib/lib/Lingua/Translit/Tables.pm'
+    },
 );
 
 
@@ -24,9 +23,8 @@ package MY;
 sub postamble
 {
     # include target to rebuild tables
-    return  "tables:\n\t\$(MAKE) -C xml tables\n\n" .
-	    "manual:\n\t\$(MAKE) -C xml/manual\n";
+    return  "tables:\n\t\$(MAKE) -C xml tables\n";
 }
 
 
-# vim: sts=4 enc=utf-8
+# vim: sts=4 sw=4 ts=4 ai et

README

@@ -35,7 +35,7 @@ if you have any suggestions and contributions.
 COPYRIGHT AND LICENSE
 
 Copyright (C) 2007-2008 Alex Linke and Rona Linke
-Copyright (C) 2009-2010 Lingua-Systems Software GmbH
+Copyright (C) 2009-2014 Lingua-Systems Software GmbH
 
 This module is free software. It may be used, redistributed
 and/or modified under the terms of either the GPL v2 or the

developer-manual__eng.pdf

diff --git a/var/tmp/source/ALINKE/Lingua-Translit-0.19/Lingua-Translit-0.19/developer-manual__eng.pdf b/var/tmp/source/ALINKE/Lingua-Translit-0.21/Lingua-Translit-0.21/developer-manual__eng.pdf
index 9882b86e..6598730d 100644
Binary files a/var/tmp/source/ALINKE/Lingua-Translit-0.19/Lingua-Translit-0.19/developer-manual__eng.pdf and b/var/tmp/source/ALINKE/Lingua-Translit-0.21/Lingua-Translit-0.21/developer-manual__eng.pdf differ

lib/Lingua/Translit/Tables.pm

@@ -1,26 +1,22 @@
 package Lingua::Translit::Tables;
 
-
 #
 # Copyright (C) 2007-2008 ...
 #   Alex Linke <alinke@lingua-systems.com>
 #   Rona Linke <rlinke@lingua-systems.com>
-# Copyright (C) 2009-2011 Lingua-Systems Software GmbH
+# Copyright (C) 2009-2014 Lingua-Systems Software GmbH
 #
 
-
 use strict;
 use warnings;
+use utf8;
 
 require 5.008;
 
-
-our $VERSION = '0.09';
-
+our $VERSION = '0.10';
 
 use Carp;
 
-
 =pod
 
 =head1 NAME
@@ -75,42 +71,58 @@ Import translit_list_supported(). (Convenience tag)
 
 =cut
 
-
 require Exporter;
 
-our @ISA        =   qw/Exporter/;
-our @EXPORT     =   qw//;           # Export nothing by default
-our @EXPORT_OK  =   qw/translit_supported translit_reverse_supported
-                       translit_list_supported/;
+our @ISA    = qw/Exporter/;
+our @EXPORT = qw//;           # Export nothing by default
+our @EXPORT_OK = qw/translit_supported translit_reverse_supported
+  translit_list_supported/;
 
 our %EXPORT_TAGS = (
-    checks  => [qw/translit_supported translit_reverse_supported/],
-    list    => [qw/translit_list_supported/],
-    all     => [@EXPORT_OK]
+    checks => [qw/translit_supported translit_reverse_supported/],
+    list   => [qw/translit_list_supported/],
+    all    => [@EXPORT_OK]
 );
 
-
-# For convenience, the tables are initialized at the bottom of this file
+# For convenience, the tables are initialized at the bottom of this file.
 our %tables;
 
-
-# used internally to retrieve a reference to a single transliteration table
-sub _get_table_reference
-{
+# Used internally to retrieve a reference to a single transliteration table.
+sub _get_table_reference {
     my $name = shift();
 
     return unless $name;
 
     $name = _get_table_id($name);
 
-    foreach my $table (keys %tables)
-    {
-        return $tables{$table} if ($table =~ /^$name$/i);
+    foreach my $table ( keys %tables ) {
+        return _handle_perl_unicode_bug( $tables{$table} )
+          if $table =~ /^$name$/i;
     }
 
     return;
 }
 
+# Handle the "Unicode Bug" affecting code points in the Latin-1 block.
+#
+# Have a look at perlunicode (section "The 'Unicode Bug'") for details.
+sub _handle_perl_unicode_bug {
+    my $tbl = shift();
+
+    foreach my $rule ( @{ $tbl->{rules} } ) {
+        utf8::upgrade( $rule->{from} );
+        utf8::upgrade( $rule->{to} );
+
+        if ( defined( $rule->{context} ) ) {
+            utf8::upgrade( $rule->{context}->{before} )
+              if defined $rule->{context}->{before};
+            utf8::upgrade( $rule->{context}->{after} )
+              if defined $rule->{context}->{after};
+        }
+    }
+
+    return $tbl;
+}
 
 =head1 ROUTINES
 
@@ -120,12 +132,10 @@ Returns true (1), iff I<translit_name> is supported. False (0) otherwise.
 
 =cut
 
-sub translit_supported
-{
-    return (_get_table_reference(_get_table_id($_[0])) ? 1 : 0);
+sub translit_supported {
+    return ( _get_table_reference( _get_table_id( $_[0] ) ) ? 1 : 0 );
 }
 
-
 =head2 translit_reverse_supported(I<translit_name>)
 
 Returns true (1), iff I<translit_name> is supported and allows reverse
@@ -133,16 +143,14 @@ transliteration. False (0) otherwise.
 
 =cut
 
-sub translit_reverse_supported
-{
-    my $table = _get_table_reference(_get_table_id($_[0]));
+sub translit_reverse_supported {
+    my $table = _get_table_reference( _get_table_id( $_[0] ) );
 
     croak("Failed to retrieve table for $_[0].") unless ($table);
 
-    return (($table->{reverse} =~ /^true$/) ? 1 : 0);
+    return ( ( $table->{reverse} =~ /^true$/ ) ? 1 : 0 );
 }
 
-
 =head2 B<translit_list_supported()>
 
 Prints a list of all supported transliterations to STDOUT, providing the
@@ -156,18 +164,15 @@ The same information is provided in this document as well:
 
 =cut
 
-sub translit_list_supported
-{
-    foreach my $table (sort keys %tables)
-    {
+sub translit_list_supported {
+    foreach my $table ( sort keys %tables ) {
         my $t = $tables{$table};
         print "$t->{name}, ",
-            ($t->{reverse} eq "false" ? "not " : ""),
-            "reversible, $t->{desc}\n";
+          ( $t->{reverse} eq "false" ? "not " : "" ),
+          "reversible, $t->{desc}\n";
     }
 }
 
-
 =head1 SUPPORTED TRANSLITERATIONS
 
 =over 4
@@ -218,9 +223,9 @@ I<Common SLK>, not reversible, Slovak without diacritics
 
 I<Common SLV>, not reversible, Slovenian without diacritics
 
-=item Mongolian
+=item Arabic
 
-I<Common Classical MON>, reversible, Classical Mongolian to Latin
+I<Common ARA>, not reversible, Common Romanization of Arabic
 
 =back
 
@@ -230,7 +235,7 @@ In case you want to add your own transliteration tables to
 L<Lingua::Translit>, have a look at the developer manual included in the
 distribution.
 An online version is available at
-L<http://www.lingua-systems.com/downloads/Lingua-Translit/>.
+L<http://www.lingua-systems.com/translit/downloads/>.
 
 A template of a transliteration table is provided as well
 (F<xml/template.xml>) so you can easily start developing.
@@ -246,7 +251,7 @@ Please report bugs to perl@lingua-systems.com.
 
 L<Lingua::Translit>
 
-L<http://www.lingua-systems.com/transliteration/Lingua-Translit-Perl-module/>
+L<http://www.lingua-systems.com/translit/>
 
 
 =head1 CREDITS
@@ -254,12 +259,12 @@ L<http://www.lingua-systems.com/transliteration/Lingua-Translit-Perl-module/>
 Thanks to Dr. Daniel Eiwen, Romanisches Seminar, Universitaet Koeln for his
 help on Romanian transliteration.
 
-Thanks to Bayanzul Lodoysamba <baynaa@users.sourceforge.net> for contributing
-the "Common Classical Mongolian" transliteration table.
-
 Thanks to Dmitry Smal and Rusar Publishing for contributing the "ALA-LC RUS"
 transliteration table.
 
+Thanks to Ahmed Elsheshtawy for his help implementing the "Common ARA" Arabic
+transliteration.
+
 =head1 AUTHORS
 
 Alex Linke <alinke@lingua-systems.com>
@@ -270,7 +275,7 @@ Rona Linke <rlinke@lingua-systems.com>
 
 Copyright (C) 2007-2008 Alex Linke and Rona Linke
 
-Copyright (C) 2009-2011 Lingua-Systems Software GmbH
+Copyright (C) 2009-2014 Lingua-Systems Software GmbH
 
 This module is free software. It may be used, redistributed
 and/or modified under the terms of either the GPL v2 or the
@@ -278,11 +283,9 @@ Artistic license.
 
 =cut
 
-
 # Get a table's identifier (based on the table's name)
 #   i.e "Common DEU" -> "common_deu"
-sub _get_table_id
-{
+sub _get_table_id {
     my $name = shift();
 
     return "" unless $name;
@@ -292,13 +295,10 @@ sub _get_table_id
     return lc($name);
 }
 
-
 # For convenience, the next line is automatically substituted with the set
 # of transliteration tables at build time.
-%tables; # PLACEHOLDER
-
+%tables;    # PLACEHOLDER
 
 1;
 
-
-# vim: sts=4 sw=4 ai et
+# vim: sts=4 sw=4 ts=4 ai et

lib/Lingua/Translit.pm

@@ -1,27 +1,23 @@
 package Lingua::Translit;
 
-
 #
 # Copyright (C) 2007-2008 ...
 #   Alex Linke <alinke@lingua-systems.com>
 #   Rona Linke <rlinke@lingua-systems.com>
-# Copyright (C) 2009-2011 Lingua-Systems Software GmbH
+# Copyright (C) 2009-2014 Lingua-Systems Software GmbH
 #
 
-
 use strict;
 use warnings;
 
 require 5.008;
 
-use Carp   qw/croak/;
+use Carp qw/croak/;
 use Encode qw/encode decode/;
 
 use Lingua::Translit::Tables;
 
-
-our $VERSION = '0.19';
-
+our $VERSION = '0.21';
 
 =pod
 
@@ -32,11 +28,11 @@ Lingua::Translit - transliterates text between writing systems
 =head1 SYNOPSIS
 
   use Lingua::Translit;
- 
+
   my $tr = new Lingua::Translit("ISO 843");
- 
+
   my $text_tr = $tr->translit("character oriented string");
- 
+
   if ($tr->can_reverse()) {
     $text_tr = $tr->translit_reverse("character oriented string");
   }
@@ -82,8 +78,7 @@ Initializes an object with the specific transliteration table, e.g. "ISO 9".
 
 =cut
 
-sub new
-{
+sub new {
     my $class = shift();
     my $name  = shift();
 
@@ -93,8 +88,7 @@ sub new
     croak("No transliteration name given.") unless $name;
 
     # Stay compatible with programs that use Lingua::Translit < 0.05
-    if ($name =~ /^DIN 5008$/i)
-    {
+    if ( $name =~ /^DIN 5008$/i ) {
         $name = "Common DEU";
     }
 
@@ -110,20 +104,19 @@ sub new
     croak("$name table: missing 'rules'")   unless defined $table->{rules};
 
     # Copy over the table's data
-    $self->{name}   = $table->{name};
-    $self->{desc}   = $table->{desc};
-    $self->{rules}  = $table->{rules};
+    $self->{name}  = $table->{name};
+    $self->{desc}  = $table->{desc};
+    $self->{rules} = $table->{rules};
 
     # Set a truth value of the transliteration's reversibility according to
     # the natural language string in the original transliteration table
-    $self->{reverse} = ($table->{reverse} =~ /^true$/i) ? 1 : 0;
+    $self->{reverse} = ( $table->{reverse} =~ /^true$/i ) ? 1 : 0;
 
     undef($table);
 
     return bless $self, $class;
 }
 
-
 =head2 translit(I<"character oriented string">)
 
 Transliterates the given text according to the object's transliteration
@@ -132,71 +125,55 @@ Returns the transliterated text.
 
 =cut
 
-sub translit
-{
+sub translit {
     my $self = shift();
     my $text = shift();
 
-    my $utf8_flag_on = Encode::is_utf8($text);
-
-    unless ($utf8_flag_on)
-    {
-        $text = decode("UTF-8", $text);
-    }
-
     # Return if no input was given
     return unless $text;
 
-    # Copy over the input string. It will be modified directly.
-    my $tr_text = $text;
+    my $utf8_flag_on = Encode::is_utf8($text);
+
+    unless ($utf8_flag_on) {
+        $text = decode( "UTF-8", $text );
+    }
 
-    foreach my $rule (@{$self->{rules}})
-    {
-        if (defined $rule->{context})
-        {
+    foreach my $rule ( @{ $self->{rules} } ) {
+        if ( defined $rule->{context} ) {
             my $c = $rule->{context};
 
             # single context rules
-            if (defined $c->{before}   && !defined $c->{after})
-            {
-                $tr_text =~ s/\Q$rule->{from}\E(?=$c->{before})/$rule->{to}/g;
+            if ( defined $c->{before} && !defined $c->{after} ) {
+                $text =~ s/$rule->{from}(?=$c->{before})/$rule->{to}/g;
             }
-            elsif (defined $c->{after} && !defined $c->{before})
-            {
-                $tr_text =~ s/(?<=$c->{after})\Q$rule->{from}\E/$rule->{to}/g;
+            elsif ( defined $c->{after} && !defined $c->{before} ) {
+                $text =~ s/(?<=$c->{after})$rule->{from}/$rule->{to}/g;
             }
 
             # double context rules: logical "inbetween"
-            elsif (defined $c->{before} && defined $c->{after})
-            {
-            $tr_text =~
-                s/
-                (?<=$c->{after})\Q$rule->{from}\E(?=$c->{before})
+            elsif ( defined $c->{before} && defined $c->{after} ) {
+                $text =~ s/
+                (?<=$c->{after})$rule->{from}(?=$c->{before})
                 /$rule->{to}/gx;
             }
 
-            else
-            {
+            else {
                 croak("incomplete rule context");
             }
         }
-        else
-        {
-            $tr_text =~ s/\Q$rule->{from}\E/$rule->{to}/g;
+        else {
+            $text =~ s/$rule->{from}/$rule->{to}/g;
         }
     }
 
-    unless ($utf8_flag_on)
-    {
-        return encode("UTF-8", $tr_text);
+    unless ($utf8_flag_on) {
+        return encode( "UTF-8", $text );
     }
-    else
-    {
-        return $tr_text;
+    else {
+        return $text;
     }
 }
 
-
 =head2 translit_reverse(I<"character oriented string">)
 
 Transliterates the given text according to the object's transliteration
@@ -208,74 +185,58 @@ Returns the transliterated text.
 
 =cut
 
-sub translit_reverse
-{
+sub translit_reverse {
     my $self = shift();
     my $text = shift();
 
-    my $utf8_flag_on = Encode::is_utf8($text);
-
-    unless ($utf8_flag_on)
-    {
-        $text = decode("UTF-8", $text);
-    }
-
     # Return if no input was given
     return unless $text;
 
     # Is this transliteration reversible?
     croak("$self->{name} cannot be reversed") unless $self->{reverse};
 
-    # Copy over the input string. It will be modified directly.
-    my $tr_text = $text;
+    my $utf8_flag_on = Encode::is_utf8($text);
+
+    unless ($utf8_flag_on) {
+        $text = decode( "UTF-8", $text );
+    }
 
-    foreach my $rule (@{$self->{rules}})
-    {
-        if (defined $rule->{context})
-        {
+    foreach my $rule ( @{ $self->{rules} } ) {
+        if ( defined $rule->{context} ) {
             my $c = $rule->{context};
 
             # single context rules
-            if (defined $c->{before} && !defined $c->{after})
-            {
-                $tr_text =~ s/\Q$rule->{to}\E(?=$c->{before})/$rule->{from}/g;
+            if ( defined $c->{before} && !defined $c->{after} ) {
+                $text =~ s/$rule->{to}(?=$c->{before})/$rule->{from}/g;
             }
-            elsif (defined $c->{after} && !defined $c->{before})
-            {
-                $tr_text =~ s/(?<=$c->{after})\Q$rule->{to}\E/$rule->{from}/g;
+            elsif ( defined $c->{after} && !defined $c->{before} ) {
+                $text =~ s/(?<=$c->{after})$rule->{to}/$rule->{from}/g;
             }
 
             # double context rules: logical "inbetween"
-            elsif (defined $c->{before} && defined $c->{after})
-            {
-                $tr_text =~
-                    s/
-                    (?<=$c->{after})\Q$rule->{to}\E(?=$c->{before})
+            elsif ( defined $c->{before} && defined $c->{after} ) {
+                $text =~ s/
+                    (?<=$c->{after})$rule->{to}(?=$c->{before})
                     /$rule->{from}/gx;
             }
 
-            else
-            {
+            else {
                 croak("incomplete rule context");
             }
         }
-        else
-        {
-            $tr_text =~ s/\Q$rule->{to}\E/$rule->{from}/g;
+        else {
+            $text =~ s/$rule->{to}/$rule->{from}/g;
         }
     }
 
-    unless ($utf8_flag_on)
-    {
-        return encode("UTF-8", $tr_text);
+    unless ($utf8_flag_on) {
+        return encode( "UTF-8", $text );
     }
-    else
-    {
-        return $tr_text;
+    else {
+        return $text;
     }
 }
 
-
 =head2 can_reverse()
 
 Returns true (1), iff reverse transliteration is possible.
@@ -283,24 +244,20 @@ False (0) otherwise.
 
 =cut
 
-sub can_reverse
-{
+sub can_reverse {
     return $_[0]->{reverse};
 }
 
-
 =head2 name()
 
 Returns the name of the chosen transliteration table, e.g. "ISO 9".
 
 =cut
 
-sub name
-{
+sub name {
     return $_[0]->{name};
 }
 
-
 =head2 desc()
 
 Returns a description for the transliteration,
@@ -308,12 +265,10 @@ e.g. "ISO 9:1995, Cyrillic to Latin".
 
 =cut
 
-sub desc
-{
+sub desc {
     return $_[0]->{desc};
 }
 
-
 =head1 SUPPORTED TRANSLITERATIONS
 
 =over 4
@@ -364,9 +319,9 @@ I<Common SLK>, not reversible, Slovak without diacritics
 
 I<Common SLV>, not reversible, Slovenian without diacritics
 
-=item Mongolian
+=item Arabic
 
-I<Common Classical MON>, reversible, Classical Mongolian to Latin
+I<Common ARA>, not reversible, Common Romanization of Arabic
 
 =back
 
@@ -376,7 +331,7 @@ In case you want to add your own transliteration tables to
 L<Lingua::Translit>, have a look at the developer manual included in the
 distribution.
 An online version is available at
-L<http://www.lingua-systems.com/downloads/Lingua-Translit/>.
+L<http://www.lingua-systems.com/translit/downloads/>.
 
 A template of a transliteration table is provided as well
 (F<xml/template.xml>) so you can easily start developing.
@@ -405,22 +360,19 @@ L<Lingua::Translit::Tables>, L<Encode>, L<perlunicode>
 
 L<translit(1)>
 
-L<http://www.lingua-systems.com/transliteration/Lingua-Translit-Perl-module/>
-
-L<http://www.lingua-systems.com/transliteration/Lingua-Translit-Perl-module/online-transliteration.html>
-provides an online frontend for L<Lingua::Translit>.
+L<http://www.lingua-systems.com/translit/>
 
 =head1 CREDITS
 
 Thanks to Dr. Daniel Eiwen, Romanisches Seminar, Universitaet Koeln for his
 help on Romanian transliteration.
 
-Thanks to Bayanzul Lodoysamba <baynaa@users.sourceforge.net> for contributing
-the "Common Classical Mongolian" transliteration table.
-
 Thanks to Dmitry Smal and Rusar Publishing for contributing the "ALA-LC RUS"
 transliteration table.
 
+Thanks to Ahmed Elsheshtawy for his help implementing the "Common ARA" Arabic
+transliteration.
+
 =head1 AUTHORS
 
 Alex Linke <alinke@lingua-systems.com>
@@ -431,7 +383,7 @@ Rona Linke <rlinke@lingua-systems.com>
 
 Copyright (C) 2007-2008 Alex Linke and Rona Linke
 
-Copyright (C) 2009-2011 Lingua-Systems Software GmbH
+Copyright (C) 2009-2014 Lingua-Systems Software GmbH
 
 This module is free software. It may be used, redistributed
 and/or modified under the terms of either the GPL v2 or the
@@ -439,8 +391,6 @@ Artistic license.
 
 =cut
 
-
 1;
 
-
-# vim: sts=4 sw=4 ai et
+# vim: sts=4 sw=4 ts=4 ai et

t/01_Lingua-Translit-Tables.t

@@ -1,12 +1,12 @@
 use strict;
-use Test::More tests => (12 + (13 * 4));
+use Test::More tests => (12 + (12 * 4));
 
 my $truth;
 
 my @check_support = (
     "ISO 843", "Common DEU", "ISO 9", "Greeklish", "DIN 31634", "Common RON",
-    "Common CES", "Common Classical MON", "DIN 1460 BUL",
-    "Streamlined System BUL", "Common SLV", "Common SLK", "Common POL"
+    "Common CES", "DIN 1460 BUL", "Streamlined System BUL", "Common SLV",
+    "Common SLK", "Common POL"
 );

t/02_Lingua-Translit.t

@@ -1,11 +1,11 @@
 use strict;
-use Test::More tests => (8 + (4*13));
+use Test::More tests => (8 + (12 * 4));
 
 my $truth;
 my @check_support = (
     "ISO 843", "Common DEU", "ISO 9", "Greeklish", "DIN 31634", "Common RON",
-    "Common CES", "Common Classical MON", "DIN 1460 BUL",
-    "Streamlined System BUL", "Common SLV", "Common SLK", "Common POL"
+    "Common CES", "DIN 1460 BUL", "Streamlined System BUL", "Common SLV",
+    "Common SLK", "Common POL"
 );
 
 my $num_str = "1234567890";

t/17_tr_Common_Classical_MON.t

@@ -1,96 +0,0 @@
-use strict;
-use Test::More tests => 13;
-
-my $name	=   "Common Classical MON";
-
-# "My Native Land" by D.Natsagdorj from http://www.linguamongolia.co.uk/.
-
-my $input	=   "ᠬᠡᠨᠲᠡᠢ ᠂ ᠬᠠᠩᠭᠠᠢ ᠂ ᠰᠣᠶᠣᠨ᠎ᠤ ᠥᠨᠳᠦᠷ ᠰᠠᠶᠢᠬᠠᠨ ᠨᠢᠷᠤᠭᠤᠨ᠎ᠤᠳ " .
-		    "ᠬᠣᠶᠢᠲᠤ ᠵᠦᠭ᠎ᠦᠨ ᠴᠢᠮᠡᠭ ᠪᠣᠯᠤᠭᠰᠠᠨ ᠣᠢ ᠬᠥᠪᠴᠢ᠎ᠶᠢᠨ ᠠᠭᠤᠯᠠᠨ᠎ᠤᠳ " .
-		    "ᠮᠡᠨᠡᠨ ᠂ ᠱᠠᠷᠭ᠎ᠠ ᠂ ᠨᠣᠮᠢᠨ᠎ᠤ ᠥᠷᠭᠡᠨ ᠶᠡᠬᠡ ᠭᠣᠪᠢ᠎ᠤᠳ " .
-		    "ᠡᠮᠦᠨ᠎ᠡ ᠵᠦᠭ᠎ᠦᠨ ᠮᠠᠩᠯᠠᠢ ᠪᠣᠯᠤᠭᠰᠠᠨ ᠡᠯᠡᠰᠦᠨ ᠮᠠᠩᠬᠠᠨ ᠳᠠᠯᠠᠢ᠎ᠤᠳ " .
-		    "ᠡᠨᠡ ᠪᠣᠯ ᠮᠢᠨᠤ ᠲᠥᠷᠦᠭᠰᠡᠨ ᠨᠤᠲᠤᠭ ᠮᠣᠩᠭᠣᠯ᠎ᠤᠨ ᠰᠠᠶᠢᠬᠠᠨ ᠣᠷᠣᠨ ᠃";
-my $output_ok	=   "kentei , qangɣai , soyon-u öndür sayiqan niruɣun-ud " .
-                    "qoyitu jüg-ün čimeg boluɣsan oi köbči-yin aɣulan-ud " .
-                    "menen , šarɣ-a , nomin-u örgen yeke ɣobi-ud " .
-                    "emün-e jüg-ün manglai boluɣsan elesün mangqan dalai-ud " .
-                    "ene bol minu törügsen nutuɣ mongɣol-un sayiqan oron .";
-
-# A phrase from "Secret History of Mongols" by transcription of B.Sumyaabaatar.
-
-my $txt_1	=   "ᠬᠠᠷᠴᠤ᠎ᠶᠢᠨ ᠬᠥᠪᠡᠭᠦᠨ ᠪᠣᠷᠵᠢᠭᠢᠳᠠᠢ᠎ᠮᠡᠷᠭᠡᠨ᠂ ᠮᠣᠩᠭᠣᠯᠵᠢᠨ᠎ᠭᠣᠣ᠎ᠠ ᠭᠡᠷᠭᠡᠢᠲᠦ ᠠᠵᠤᠭᠤ᠃ ᠪᠣᠷᠵᠢᠭᠢᠳᠠᠢ᠎ᠮᠡᠷᠭᠡᠨ᠎ᠤ ᠬᠥᠪᠡᠭᠦᠨ ᠲᠣᠷᠣᠭᠣᠯᠵᠢᠨ᠎ᠪᠠᠶᠠᠨ᠂ ᠪᠣᠷᠣᠭᠴᠢᠨ᠎ᠭᠣᠣ᠎ᠠ ᠭᠡᠷᠭᠡᠢᠲᠦ᠂ ᠪᠣᠷᠣᠯᠳᠠᠢ ᠰᠤᠶᠠᠯᠪᠢ ᠵᠠᠯᠠᠭᠤᠲᠤ᠂ ᠳᠠᠶᠢᠷ ᠪᠣᠷᠣ ᠬᠣᠶᠠᠷ ᠬᠦᠯᠦᠭᠦᠳ ᠠᠭᠲᠠᠰᠲᠤ ᠪᠦᠯᠡᠭᠡ᠃ ᠲᠣᠷᠣᠭᠣᠯᠵᠢᠨ᠎ᠤ ᠬᠥᠪᠡᠭᠦᠨ ᠳᠤᠸᠠ᠎ᠰᠣᠬᠣᠷ ᠳᠣᠪᠤᠨ᠎ᠮᠡᠷᠭᠡᠨ ᠬᠣᠶᠠᠷ ᠪᠦᠯᠡᠭᠡ᠃";
-
-my $txt_1_ok	=   "qarču-yin köbegün borjiɣidai-mergen, mongɣoljin-ɣoo-a gergeitü ajuɣu. borjiɣidai-mergen-u köbegün toroɣoljin-bayan, boroɣčin-ɣoo-a gergeitü, boroldai suyalbi jalaɣutu, dayir boro qoyar külügüd aɣtastu bülege. toroɣoljin-u köbegün duva-soqor dobun-mergen qoyar bülege.";
-
-my $txt_2	=   "ᠤᠭᠤᠷᠬᠠᠢ ᠪᠠᠶᠠᠯᠢᠭ᠎ᠤᠨ ᠣᠬᠢ ᠣᠯᠠᠨ ᠠᠭᠤᠯᠠ ᠳᠠᠪᠠᠭᠠᠨ᠎ᠤᠳ";
-my $txt_2_ok	=   "uɣurqai bayaliɣ-un oki olan aɣula dabaɣan-ud";
-
-my $txt_3	=   "ᠬᠦᠮᠦᠨ ᠪᠦᠭᠦᠳᠡ ᠴᠢᠮᠠᠶᠢᠭᠢ ᠬᠦᠯᠢᠶᠡᠵᠦ᠂ ᠪᠣᠳᠠᠰ ᠪᠦᠭᠦᠳᠡ ᠴᠢᠮᠠᠶᠢᠭᠢ ᠮᠥᠷᠦᠭᠡᠳᠡᠮᠦᠢ";
-my $txt_3_ok	=   "kümün bügüde čimayigi küliyejü, bodas bügüde čimayigi mörügedemüi";
-
-my $txt_4	=   "ᠪᠠᠢᠭᠰᠠᠭᠠᠷ᠂ ᠲᠣᠭᠲᠠᠨᠢᠭᠰᠠᠨ᠂ ᠵᠢᠭᠰᠠᠭᠠᠯ᠎ᠤᠨ";
-my $txt_4_ok	=   "baiɣsaɣar, toɣtaniɣsan, jiɣsaɣal-un";
-
-my $txt_5       =   "᠐᠑᠒᠓᠔᠕᠖᠗᠘᠙ᠧᠫᠱᠸᠹᠺᠻᠼᠽᠿᡀᡁᡂᠾ᠀᠁᠄᠅";
-my $txt_5_ok    =   "0123456789ēpšvfḳǩczžlhzhchh§…:¶";
-
-
-
-
-use Lingua::Translit;
-
-my $tr = new Lingua::Translit($name);
-
-
-my $output = $tr->translit($input);
-
-# 1
-is($tr->can_reverse(), 1, "$name: is reversible");
-
-# 2
-is($output, $output_ok, "$name: transliteration");
-
-# 3
-$output = $tr->translit_reverse($output);
-is($output,$input, "$name: transliteration (reverse)");
-
-# 4
-my $o = $tr->translit($txt_1);
-is($o, $txt_1_ok, "$name: Short text #1");
-
-# 5
-$o = $tr->translit_reverse($o);
-is($o, $txt_1, "$name: Short text #1 (reverse)");
-
-# 6
-$o = $tr->translit($txt_2);
-is($o, $txt_2_ok, "$name: Short text #2");
-
-# 7
-$o = $tr->translit_reverse($o);
-is($o, $txt_2, "$name: Short text #2 (reverse)");
-
-# 8
-$o = $tr->translit($txt_3);
-is($o, $txt_3_ok, "$name: Short text #3");
-
-# 9
-$o = $tr->translit_reverse($o);
-is($o, $txt_3, "$name: Short text #3 (reverse)");
-
-# 10
-$o = $tr->translit($txt_4);
-is($o, $txt_4_ok, "$name: Short text #4");
-
-# 11
-$o = $tr->translit_reverse($o);
-is($o, $txt_4, "$name: Short text #4 (reverse)");
-
-# 12
-$o = $tr->translit($txt_5);
-is($o, $txt_5_ok, "$name: Short text #5");
-
-# 13
-$o = $tr->translit_reverse($o);
-is($o, $txt_5, "$name: Short text #5 (reverse)");
-

t/28_tr_ALA-LC_RUS.t

@@ -0,0 +1,39 @@
+use strict;
+use Test::More tests => 4;
+
+my $name        =   "ALA-LC RUS";
+my $reversible  =   0;
+
+my $upper       =   "AБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ";
+my $upper_ok    =   "ABVGDEËZhZIĬKLMNOPRSTUFKhTSChShShch″Y′ĖIUIA";
+
+my $lower       =   "aбвгдеёжзийклмнопрстуфхцчшщъыьэюя";
+my $lower_ok    =   "abvgdeëzhziĭklmnoprstufkhtschshshch″y′ėiuia";
+
+my $context     =   "труъ ТРУЪ";
+my $context_ok  =   "tru TRU";
+
+
+use Lingua::Translit;
+
+my $tr = new Lingua::Translit($name);
+
+my $output;
+
+
+# 1
+is($tr->can_reverse(), $reversible, "$name: reversibility");
+
+# 2
+$output = $tr->translit($upper);
+is($output, $upper_ok, "$name: upper transliteration");
+
+# 3
+$output = $tr->translit($lower);
+is($output, $lower_ok, "$name: lower transliteration");
+
+# 4
+$output = $tr->translit($context);
+is($output, $context_ok, "$name: transliteration (context-sensitive)");
+
+# vim: sts=4 sw=4 ai et

t/29_tr_Common_ARA.t

@@ -0,0 +1,35 @@
+use strict;
+use Test::More tests => 3;
+
+my $name        =   "Common ARA";
+my $reversible  =   0;
+
+my $input       =   "اخبار اليوم"; # "News Today"
+my $output_ok   =   "akhbar alywm";
+
+my $udohr       =   "يولد جميع الناس أحراراً متساوين في الكرامة والحقوق، " .
+                    "وقد وهبوا عقلاً وضميراً وعليهم أن يعامل بعضهم بعضاً " .
+                    "بروح الإخاء.";
+my $udohr_ok    =   "ywld jmy'e alnas ahrara mtsawyn fy alkramh walhqwq, " .
+                    "wqd whbwa 'eqla wdmyra w'elyhm an y'eaml b'edhm " .
+                    "b'eda brwh alekha'.";
+
+use Lingua::Translit;
+
+my $tr = Lingua::Translit->new( $name );
+
+
+my $output = $tr->translit( $input );
+
+# 1
+is( $tr->can_reverse(), $reversible, "$name: reversibility" );
+
+# 2
+is( $output, $output_ok, "$name: transliteration (short)" );
+
+$output = $tr->translit( $udohr );
+
+# 3
+is( $output, $udohr_ok, "$name: transliteration (UDOHR)" );
+
+# vim: set sts=4 sw=4 ts=4 ai et ft=perl:

tools/substitute_tables.pl

@@ -2,14 +2,14 @@
 
 #
 # Copyright (C) 2007-2008 Alex Linke <alinke@lingua-systems.com>
-# Copyright (C) 2009-2010 Lingua-Systems Software GmbH
+# Copyright (C) 2009-2014 Lingua-Systems Software GmbH
 #
 
 use strict;
 use IO::File;
 
 my $tbl_file = 'xml/tables.dump';
-my $infile   = $ARGV[0] || die "usage: $0 file";
+my $infile = $ARGV[0] || die "usage: $0 file";
 
 my $fh = new IO::File();
 
@@ -25,12 +25,10 @@ $fh->open($tbl_file) or die "$tbl_file: $!\n";
 my $tbls = <$fh>;
 $fh->close();
 
-if ($in_content =~ s/\n\%tables;\s+# PLACEHOLDER\s*\n/\n$tbls\n/)
-{
+if ( $in_content =~ s/\n\%tables;\s+# PLACEHOLDER\s*\n/\n$tbls\n/ ) {
     print "$infile: substituted tables: " . length($tbls) . " bytes.\n";
 }
-else
-{
+else {
     print "$infile: no substitution.\n";
     exit 1;
 }
@@ -42,4 +40,4 @@ $fh->open("> $infile") or die "$infile: $!\n";
 print $fh $in_content;
 $fh->close();
 
-# vim: sts=4 sw=4 enc=utf-8 ai et
+# vim: sts=4 sw=4 ts=4 ai et

translit

@@ -1,14 +1,12 @@
 #!/usr/bin/perl -w
 
-
 #
 # Copyright (C) 2007-2008 ...
 #   Alex Linke <alinke@lingua-systems.com>
 #   Rona Linke <rlinke@lingua-systems.com>
-# Copyright (C) 2009-2010 Lingua-Systems Software GmbH
+# Copyright (C) 2009-2014 Lingua-Systems Software GmbH
 #
 
-
 use strict;
 use Getopt::Long;
 
@@ -17,10 +15,8 @@ require 5.008;
 use Lingua::Translit;
 use Lingua::Translit::Tables qw/:all/;
 
-
 my $VERSION = '0.4';
 
-
 =pod
 
 =head1 NAME
@@ -102,92 +98,82 @@ my %opt = (
     outfile => "",
     reverse => 0,
     list    => 0,
-    verbose => 0,   # off
+    verbose => 0,    # off
     help    => 0
 );
 
-show_help(1) unless GetOptions(
-    "trans|t=s"     => \$opt{trans},
-    "infile|i=s"    => \$opt{infile},
-    "outfile|o=s"   => \$opt{outfile},
-    "reverse|r"     => \$opt{reverse},
-    "list|l"        => \$opt{list},
-    "verbose|v"     => \$opt{verbose},
-    "help|h"        => \$opt{help}
-);
-show_help(0)    if $opt{help};
-show_list()     if $opt{list};
-show_help(1)    unless $opt{trans};
+show_help(1)
+  unless GetOptions(
+    "trans|t=s"   => \$opt{trans},
+    "infile|i=s"  => \$opt{infile},
+    "outfile|o=s" => \$opt{outfile},
+    "reverse|r"   => \$opt{reverse},
+    "list|l"      => \$opt{list},
+    "verbose|v"   => \$opt{verbose},
+    "help|h"      => \$opt{help}
+  );
+show_help(0) if $opt{help};
+show_list()  if $opt{list};
+show_help(1) unless $opt{trans};
 
 # Assure the requested transliteration is supported...
-die "$opt{trans} is not supported.\n" unless translit_supported($opt{trans});
+die "$opt{trans} is not supported.\n" unless translit_supported( $opt{trans} );
 
 # ...and reverse transliteration is supported, too - if requested
 die "$opt{trans} cannot be reversed.\n"
-    if ($opt{reverse} && ! translit_reverse_supported($opt{trans}));
-
+  if ( $opt{reverse} && !translit_reverse_supported( $opt{trans} ) );
 
 # If no input file was specified, use STDIN as a fallback. This way, translit
 # may also be used in pipes!
 my $in;
 
-if ($opt{infile})
-{
+if ( $opt{infile} ) {
     print STDERR "Reading input from $opt{infile}...\n" if $opt{verbose};
 
     open IN, "$opt{infile}" or die "$opt{infile}: $!\n";
 
     $in = *IN;
 }
-else
-{
+else {
     print STDERR "Reading input from STDIN...\n" if $opt{verbose};
 
     $in = *STDIN;
 }
 
-
 # If no output file was specified, use STDOUT as a fallback.
 my $out;
 
-if ($opt{outfile})
-{
+if ( $opt{outfile} ) {
     print STDERR "Writing output to $opt{outfile}...\n" if $opt{verbose};
 
     open OUT, "> $opt{outfile}" or die "$opt{outfile}: $!\n";
 
     $out = *OUT;
 }
-else
-{
+else {
     print STDERR "Writing output to STDOUT...\n" if $opt{verbose};
 
     $out = *STDOUT;
 }
 
-
 # Slurp in all the input and close filehandle
 local $/;
 my $text = <$in>;
 close($in);
 
-
 # Transliterate
-my $tr = new Lingua::Translit($opt{trans});
+my $tr = new Lingua::Translit( $opt{trans} );
 my $text_tr;
 
-unless ($opt{reverse})
-{
+unless ( $opt{reverse} ) {
     print STDERR "Transliterating according to ", $tr->name(), "...\n"
-        if $opt{verbose};
+      if $opt{verbose};
 
     $text_tr = $tr->translit($text);
 }
-else
-{
-    print STDERR "Transliterating according to ", $tr->name(),
-        " (reverse)...\n"
-        if $opt{verbose};
+else {
+    print STDERR "Transliterating according to ", $tr->name(), " (reverse)...\n"
+      if $opt{verbose};
 
     $text_tr = $tr->translit_reverse($text);
 }
@@ -196,38 +182,33 @@ else
 print $out $text_tr;
 close($out);
 
-
-sub show_help
-{
+sub show_help {
     my $retval = shift();
 
     print "translit v$VERSION  --  ",
-        "(c) 2009-2010 Lingua-Systems Software GmbH\n\n",
-        "usage: $0 -i FILE -o FILE -t NAME -r -l -v -h\n\n",
-        "  --infile   -i  FILE       read input from FILE\n",
-        "  --outfile  -o  FILE       write output to FILE\n",
-        "  --trans    -t  NAME       use transliteration NAME\n",
-        "  --reverse  -r             transliterate in reverse direction\n",
-        "  --list     -l             list all supported transliterations\n\n",
-        "  --verbose  -v             print verbose status messages\n",
-        "  --help     -h             show this help\n\n",
-        "Read translit(1) for details.\n";
+      "(c) 2009-2014 Lingua-Systems Software GmbH\n\n",
+      "usage: $0 -i FILE -o FILE -t NAME -r -l -v -h\n\n",
+      "  --infile   -i  FILE       read input from FILE\n",
+      "  --outfile  -o  FILE       write output to FILE\n",
+      "  --trans    -t  NAME       use transliteration NAME\n",
+      "  --reverse  -r             transliterate in reverse direction\n",
+      "  --list     -l             list all supported transliterations\n\n",
+      "  --verbose  -v             print verbose status messages\n",
+      "  --help     -h             show this help\n\n",
+      "Read translit(1) for details.\n";
 
     exit($retval);
 }
 
-
-sub show_list
-{
-    print "Transliterations supported by Lingua::Translit v" .
-        $Lingua::Translit::VERSION . ":\n\n";
+sub show_list {
+    print "Transliterations supported by Lingua::Translit v"
+      . $Lingua::Translit::VERSION . ":\n\n";
 
     translit_list_supported();
 
     exit(0);
 }
 
-
 =head1 RESTRICTIONS
 
 The input has to be UTF-8 encoded.
@@ -242,9 +223,6 @@ Please report bugs to perl@lingua-systems.com.
 
 L<Lingua::Translit>, L<Lingua::Translit::Tables>
 
-L<http://www.lingua-systems.com/transliteration/Lingua-Translit-Perl-module/online-transliteration.html>
-provides an online frontend for L<Lingua::Translit>.
-
 =head1 AUTHORS
 
 Alex Linke <alinke@lingua-systems.com>
@@ -255,7 +233,7 @@ Rona Linke <rlinke@lingua-systems.com>
 
 Copyright (C) 2007-2008 Alex Linke and Rona Linke
 
-Copyright (C) 2009-2010 Lingua-Systems Software GmbH
+Copyright (C) 2009-2014 Lingua-Systems Software GmbH
 
 This program is free software. It may be used, redistributed
 and/or modified under the terms of either the GPL v2 or the
@@ -263,5 +241,4 @@ Artistic license.
 
 =cut
 
-
-# vim: sts=4 sw=4 enc=utf-8 ai et
+# vim: sts=4 sw=4 ts=4 ai et

xml/Makefile

@@ -10,8 +10,8 @@ TABLES		:=	din_31634.xml \
 			streamlined_system_bul.xml \
 			greeklish.xml \
 			common_deu.xml common_ron.xml common_ces.xml \
-				common_classical_mon.xml common_slk.xml \
-				common_slv.xml common_pol.xml \
+				common_slk.xml common_slv.xml common_pol.xml \
+				common_ara.xml \
 			gost_7-79_rus_old.xml gost_7-79_rus.xml \
 				gost_7-79_ukr.xml \
 			ala-lc_rus.xml

xml/ala-lc_rus.xml

@@ -0,0 +1,436 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!DOCTYPE translit SYSTEM "translit.dtd">
+
+
+<!--
+
+  Transliteration definitions for ALA-LC Romanization table for Russian.
+  This table used by the library of Congress and American Library
+  Assosiantion.
+
+  Reference table: http://www.loc.gov/catdir/cpso/roman.html
+
+  Dmitry Smal <mialinx@gmail.com>
+
+  Copyright 2010 (C) Rusar Publishing
+
+-->
+
+
+<translit>
+
+    <name>ALA-LC RUS</name>
+    <desc>ALA-LC:1997, Cyrillic to Latin, Russian</desc>
+    <reverse>false</reverse>
+
+    <rules>
+
+        <rule>
+            <from>А</from>
+            <to>A</to>
+        </rule>
+
+        <rule>
+            <from>Б</from>
+            <to>B</to>
+        </rule>
+
+        <rule>
+            <from>В</from>
+            <to>V</to>
+        </rule>
+
+        <rule>
+            <from>Г</from>
+            <to>G</to>
+        </rule>
+
+        <rule>
+            <from>Д</from>
+            <to>D</to>
+        </rule>
+
+        <rule>
+            <from>Е</from>
+            <to>E</to>
+        </rule>
+
+        <rule>
+            <from>Ё</from>
+            <to>&#x00CB;</to>       <!-- latin capital letter e with diaeresis -->
+        </rule>
+
+        <rule>
+            <from>Ж</from>
+            <to>Zh</to>
+        </rule>
+
+        <rule>
+            <from>З</from>
+            <to>Z</to>
+        </rule>
+
+        <rule>
+            <from>И</from>
+            <to>I</to>
+        </rule>
+
+        <rule>
+            <from>&#x0406;</from>   <!-- belorussian i -->
+            <to>&#x012A;</to>       <!-- latin capital letter i with macron -->
+        </rule>
+
+        <rule>
+            <from>Й</from>
+            <to>&#x012C;</to>       <!-- latin capital letter i with breve -->
+        </rule>
+
+        <rule>
+            <from>К</from>
+            <to>K</to>
+        </rule>
+
+        <rule>
+            <from>Л</from>
+            <to>L</to>
+        </rule>
+
+        <rule>
+            <from>М</from>
+            <to>M</to>
+        </rule>
+
+        <rule>
+            <from>Н</from>
+            <to>N</to>
+        </rule>
+
+        <rule>
+            <from>О</from>
+            <to>O</to>
+        </rule>
+
+        <rule>
+            <from>П</from>
+            <to>P</to>
+        </rule>
+
+        <rule>
+            <from>Р</from>
+            <to>R</to>
+        </rule>
+
+        <rule>
+            <from>С</from>
+            <to>S</to>
+        </rule>
+
+        <rule>
+            <from>Т</from>
+            <to>T</to>
+        </rule>
+
+        <rule>
+            <from>У</from>
+            <to>U</to>
+        </rule>
+
+        <rule>
+            <from>Ф</from>
+            <to>F</to>
+        </rule>
+
+        <rule>
+            <from>Х</from>
+            <to>Kh</to>
+        </rule>
+
+        <rule>
+            <from>Ц</from>
+            <to>TS</to>             <!-- need ligature -->
+        </rule>
+
+        <rule>
+            <from>Ч</from>
+            <to>Ch</to>
+        </rule>
+
+        <rule>
+            <from>Ч</from>
+            <to>Ch</to>
+        </rule>
+
+        <rule>
+            <from>Ш</from>
+            <to>Sh</to>
+        </rule>
+
+        <rule>
+            <from>Щ</from>
+            <to>Shch</to>
+        </rule>
+
+        <rule>
+            <from>Ъ</from>
+            <to></to>
+            <context>
+                <before>\b</before> <!-- letter is disregarded in romanization when found at the end of a word -->
+            </context>
+        </rule>
+
+        <rule>
+            <from>Ъ</from>
+            <to>&#x2033;</to>       <!-- double prime -->
+        </rule>
+
+        <rule>
+            <from>Ы</from>
+            <to>Y</to>
+        </rule>
+
+        <rule>
+            <from>Ь</from>
+            <to>&#x2032;</to>       <!-- prime -->
+        </rule>
+
+        <rule>
+            <from>&#x0462;</from>   <!-- cyrillic capital yat -->
+            <to>IE</to>             <!-- need ligature -->
+        </rule>
+
+        <rule>
+            <from>Э</from>
+            <to>&#x0116;</to>       <!-- latin capital letter e with dot above -->
+        </rule>
+
+        <rule>
+            <from>Ю</from>
+            <to>IU</to>             <!-- need ligature -->
+        </rule>
+
+        <rule>
+            <from>Я</from>
+            <to>IA</to>             <!-- need ligature -->
+        </rule>
+
+        <rule>
+            <from>&#x0466;</from>   <!-- cyrillic capital letter little yus -->
+            <to>&#x0118;</to>       <!-- latin capital letter e with ogonek -->
+        </rule>
+
+        <rule>
+            <from>&#x04E8;</from>   <!-- cyrillic capital letter barred o -->
+            <to>&#x1E1E;</to>       <!-- latin capital letter f with dot above -->
+        </rule>
+
+        <rule>
+            <from>&#x0474;</from>   <!-- cyrillic capital letter izhitsa -->
+            <to>&#x1E8E;</to>       <!-- latin capital letter y with dot above -->
+        </rule>
+
+        <rule>
+            <from>а</from>
+            <to>a</to>
+        </rule>
+
+        <rule>
+            <from>б</from>
+            <to>b</to>
+        </rule>
+
+        <rule>
+            <from>в</from>
+            <to>v</to>
+        </rule>
+
+        <rule>
+            <from>г</from>
+            <to>g</to>
+        </rule>
+
+        <rule>
+            <from>д</from>
+            <to>d</to>
+        </rule>
+
+        <rule>
+            <from>е</from>
+            <to>e</to>
+        </rule>
+
+        <rule>
+            <from>ё</from>
+            <to>&#x00EB;</to>       <!-- latin small letter e with diaeresis -->
+        </rule>
+
+        <rule>
+            <from>ж</from>
+            <to>zh</to>
+        </rule>
+
+        <rule>
+            <from>з</from>
+            <to>z</to>
+        </rule>
+
+        <rule>
+            <from>и</from>
+            <to>i</to>
+        </rule>
+
+        <rule>
+            <from>&#x0456;</from>   <!-- belorussian i -->
+            <to>&#x012B;</to>       <!-- latin small letter i with macron -->
+        </rule>
+
+        <rule>
+            <from>й</from>
+            <to>&#x012D;</to>       <!-- latin small letter i with breve -->
+        </rule>
+
+        <rule>
+            <from>к</from>
+            <to>k</to>
+        </rule>
+
+        <rule>
+            <from>л</from>
+            <to>l</to>
+        </rule>
+
+        <rule>
+            <from>м</from>
+            <to>m</to>
+        </rule>
+
+        <rule>
+            <from>н</from>
+            <to>n</to>
+        </rule>
+
+        <rule>
+            <from>о</from>
+            <to>o</to>
+        </rule>
+
+        <rule>
+            <from>п</from>
+            <to>p</to>
+        </rule>
+
+        <rule>
+            <from>р</from>
+            <to>r</to>
+        </rule>
+
+        <rule>
+            <from>с</from>
+            <to>s</to>
+        </rule>
+
+        <rule>
+            <from>т</from>
+            <to>t</to>
+        </rule>
+
+        <rule>
+            <from>у</from>
+            <to>u</to>
+        </rule>
+
+        <rule>
+            <from>ф</from>
+            <to>f</to>
+        </rule>
+
+        <rule>
+            <from>х</from>
+            <to>kh</to>
+        </rule>
+
+        <rule>
+            <from>ц</from>
+            <to>ts</to>             <!-- need ligature -->
+        </rule>
+
+        <rule>
+            <from>ч</from>
+            <to>ch</to>
+        </rule>
+
+        <rule>
+            <from>ш</from>
+            <to>sh</to>
+        </rule>
+
+        <rule>
+            <from>щ</from>
+            <to>shch</to>
+        </rule>
+
+        <rule>
+            <from>ъ</from>
+            <to></to>
+            <context>
+                <before>\b</before> <!-- letter is disregarded in romanization when found at the end of a word -->
+            </context>
+        </rule>
+
+        <rule>
+            <from>ъ</from>
+            <to>&#x2033;</to>       <!-- double prime -->
+        </rule>
+
+        <rule>
+            <from>ы</from>
+            <to>y</to>
+        </rule>
+
+        <rule>
+            <from>ь</from>
+            <to>&#x2032;</to>       <!-- prime -->
+        </rule>
+
+        <rule>
+            <from>&#x0463;</from>   <!-- cyrillic small yat -->
+            <to>ie</to>             <!-- need ligature -->
+        </rule>
+
+        <rule>
+            <from>э</from>
+            <to>&#x0117;</to>       <!-- latin smal letter e with dot above -->
+        </rule>
+
+        <rule>
+            <from>ю</from>
+            <to>iu</to>             <!-- need ligature -->
+        </rule>
+
+        <rule>
+            <from>я</from>
+            <to>ia</to>             <!-- need ligature -->
+        </rule>
+
+        <rule>
+            <from>&#x0467;</from>   <!-- cyrillic small letter little yus -->
+            <to>&#x0119;</to>       <!-- latin small letter e with ogonek -->
+        </rule>
+
+        <rule>
+            <from>&#x04E9;</from>   <!-- cyrillic small letter barred o -->
+            <to>&#x1E1F;</to>       <!-- latin small letter f with dot above -->
+        </rule>
+
+        <rule>
+            <from>&#x0475;</from>   <!-- cyrillic small letter izhitsa -->
+            <to>&#x1E8F;</to>       <!-- latin small letter y with dot above -->
+        </rule>
+
+    </rules>
+
+</translit>
+
+<!--
+  vim: sts=4 sw=4 ai et
+-->

xml/common_ara.xml

@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!DOCTYPE translit SYSTEM "translit.dtd">
+
+
+<!--
+
+  Transliteration definitions for "Common ARA".
+
+  Copyright (C) 2014 Lingua-Systems Software GmbH
+
+  Loosely based on the transliteration table found at
+  http://www.arabic-keyboard.org/arabic/arabic-transliteration.php
+  and suggestions by Ahmed Elsheshtawy.
+
+-->
+
+
+<translit>
+
+    <name>Common ARA</name>
+    <desc>Common Romanization of Arabic</desc>
+    <reverse>false</reverse>
+
+    <rules>
+
+        <!-- Section #1 -->
+
+        <rule>
+            <from>&#x062B;</from> <!-- ARABIC LETTER THEH -->
+            <to>th</to>
+        </rule>
+
+        <rule>
+            <from>&#x062E;</from> <!-- ARABIC LETTER KHAH -->
+            <to>kh</to>
+        </rule>
+
+        <rule>
+            <from>&#x0634;</from> <!-- ARABIC LETTER SHEEN -->
+            <to>sh</to>
+        </rule>
+
+        <rule>
+            <from>&#x063A;</from> <!-- ARABIC LETTER GHAIN -->
+            <to>gh</to>
+        </rule>
+
+        <rule>
+            <from>&#x0639;</from> <!-- ARABIC LETTER AIN -->
+            <to>'e</to>
+        </rule>
+
+        <rule>
+            <from>&#x0626;</from> <!-- ARABIC LETTER YEH WITH HAMZA ABOVE -->
+            <to>'e</to>
+        </rule>
+
+        <rule>
+            <from>&#x0624;</from> <!-- ARABIC LETTER WAW WITH HAMZA ABOVE -->
+            <to>'e</to>
+        </rule>
+
+
+        <!-- Section #2 -->
+
+        <rule>
+            <from>&#x0627;</from> <!-- ARABIC LETTER ALEF -->
+            <to>a</to>
+        </rule>
+
+        <rule>
+            <from>&#x0623;</from> <!-- ARABIC LETTER ALEF W. HAMZA ABOVE -->
+            <to>a</to>
+        </rule>
+
+        <rule>
+            <from>&#x0622;</from> <!-- ARABIC LETTER ALEF W. MADDA ABOVE -->
+            <to>a</to>
+        </rule>
+
+        <rule>
+            <from>&#x0649;</from> <!-- ARABIC LETTER ALEF MAKSURA -->
+            <to>a</to>
+        </rule>
+
+        <rule>
+            <from>&#x0625;</from> <!-- ARABIC LETTER ALEF W. HAMZA BELOW -->
+            <to>e</to>
+        </rule>
+
+        <rule>
+            <from>&#x0628;</from> <!-- ARABIC LETTER BEH -->
+            <to>b</to>
+        </rule>
+
+        <rule>
+            <from>&#x062A;</from> <!-- ARABIC LETTER TEH -->
+            <to>t</to>
+        </rule>
+
+        <rule>
+            <from>&#x062C;</from> <!-- ARABIC LETTER JEEM -->
+            <to>j</to>
+        </rule>
+
+        <rule>
+            <from>&#x062D;</from> <!-- ARABIC LETTER HAH -->
+            <to>h</to>
+        </rule>
+
+        <rule>
+            <from>&#x062F;</from> <!-- ARABIC LETTER DAL -->
+            <to>d</to>
+        </rule>
+
+        <rule>
+            <from>&#x0630;</from> <!-- ARABIC LETTER THAL -->
+            <to>d</to>
+        </rule>
+
+        <rule>
+            <from>&#x0636;</from> <!-- ARABIC LETTER DAD -->
+            <to>d</to>
+        </rule>
+
+        <rule>
+            <from>&#x0631;</from> <!-- ARABIC LETTER REH -->
+            <to>r</to>
+        </rule>
+
+        <rule>
+            <from>&#x0632;</from> <!-- ARABIC LETTER ZAIN -->
+            <to>z</to>
+        </rule>
+
+        <rule>
+            <from>&#x0638;</from> <!-- ARABIC LETTER ZAH -->
+            <to>z</to>
+        </rule>
+
+        <rule>
+            <from>&#x0633;</from> <!-- ARABIC LETTER SEEN -->
+            <to>s</to>
+        </rule>
+
+        <rule>
+            <from>&#x0635;</from> <!-- ARABIC LETTER SAD -->
+            <to>s</to>
+        </rule>
+
+        <rule>
+            <from>&#x0637;</from> <!-- ARABIC LETTER TAH -->
+            <to>t</to>
+        </rule>
+
+        <rule>
+            <from>&#x0641;</from> <!-- ARABIC LETTER FEH -->
+            <to>f</to>
+        </rule>
+
+        <rule>
+            <from>&#x0642;</from> <!-- ARABIC LETTER QAF -->
+            <to>q</to>
+        </rule>
+
+        <rule>
+            <from>&#x0643;</from> <!-- ARABIC LETTER KAF -->
+            <to>k</to>
+        </rule>
+
+        <rule>
+            <from>&#x0644;</from> <!-- ARABIC LETTER LAM -->
+            <to>l</to>
+        </rule>
+
+        <rule>
+            <from>&#x0645;</from> <!-- ARABIC LETTER MEEM -->
+            <to>m</to>
+        </rule>
+
+        <rule>
+            <from>&#x0646;</from> <!-- ARABIC LETTER NOON -->
+            <to>n</to>
+        </rule>
+
+        <rule>
+            <from>&#x0647;</from> <!-- ARABIC LETTER HEH -->
+            <to>h</to>
+        </rule>
+
+        <rule>
+            <from>&#x0629;</from> <!-- ARABIC LETTER TEH MARBUTA -->
+            <to>h</to>
+        </rule>
+
+        <rule>
+            <from>&#x0648;</from> <!-- ARABIC LETTER WAW -->
+            <to>w</to>
+        </rule>
+
+        <rule>
+            <from>&#x064A;</from> <!-- ARABIC LETTER YEH -->
+            <to>y</to>
+        </rule>
+
+        <rule>
+            <from>&#x0621;</from> <!-- ARABIC LETTER HAMZA -->
+            <to>'</to>
+        </rule>
+
+        <rule>
+            <from>&#x061F;</from> <!-- ARABIC QUESTION MARK -->
+            <to>?</to>
+        </rule>
+
+        <rule>
+            <from>&#x060C;</from> <!-- ARABIC COMMA -->
+            <to>,</to>
+        </rule>
+
+
+        <!-- Section #3 -->
+
+        <rule>
+            <from>&#x0640;</from> <!-- ARABIC TATWEEL -->
+            <to></to>
+        </rule>
+
+        <rule>
+            <from>&#x064B;</from> <!-- ARABIC FATHATAN -->
+            <to></to>
+        </rule>
+
+        <rule>
+            <from>&#x064C;</from> <!-- ARABIC DAMMATAN -->
+            <to></to>
+        </rule>
+
+        <rule>
+            <from>&#x064D;</from> <!-- ARABIC KASRATAN -->
+            <to></to>
+        </rule>
+
+        <rule>
+            <from>&#x064E;</from> <!-- ARABIC FATHA -->
+            <to></to>
+        </rule>
+
+        <rule>
+            <from>&#x064F;</from> <!-- ARABIC DAMMA -->
+            <to></to>
+        </rule>
+
+        <rule>
+            <from>&#x0650;</from> <!-- ARABIC KASRA -->
+            <to></to>
+        </rule>
+
+        <rule>
+            <from>&#x0651;</from> <!-- ARABIC SHADDA -->
+            <to></to>
+        </rule>
+
+        <rule>
+            <from>&#x0652;</from> <!-- ARABIC SUKUN -->
+            <to></to>
+        </rule>
+
+        <rule>
+            <from>&#x200F;</from> <!-- RIGHT-TO-LEFT MARK -->
+            <to></to>
+        </rule>
+
+    </rules>
+
+</translit>
+
+<!-- vim: set sts=4 sw=4 ts=4 ai et ft=xml: -->

xml/common_classical_mon.xml

@@ -1,362 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!DOCTYPE translit SYSTEM "translit.dtd">
-
-
-<!--
-
-  Transliteration definitions for the common transliteration of Classical
-  Mongolian Script to Latin.
-
-  Copyright 2008 Bayanzul Lodoysamba <baynaa@users.sourceforge.net>
-
--->
-
-
-<translit>
-
-    <!-- meta data -->
-
-    <name>Common Classical MON</name>
-
-    <desc>Classical Mongolian Script to Latin</desc>
-
-    <reverse>true</reverse>
-
-
-    <!-- set of transliteration rules -->
-
-    <rules>
-        <rule>
-            <from>&#x1820;</from>   <!-- MONGOLIAN LETTER A (ᠠ) -->
-            <to>a</to>
-        </rule>
-        <rule>
-            <from>&#x1821;</from>   <!-- MONGOLIAN LETTER E (ᠡ) -->
-            <to>e</to>
-        </rule>
-        <rule>
-            <from>&#x1822;</from>   <!-- MONGOLIAN LETTER I (ᠢ) -->
-            <to>i</to>
-        </rule>
-        <rule>
-            <from>&#x1823;</from>   <!-- MONGOLIAN LETTER O (ᠣ) -->
-            <to>o</to>
-        </rule>
-        <rule>
-            <from>&#x1824;</from>   <!-- MONGOLIAN LETTER U (ᠤ) -->
-            <to>u</to>
-        </rule>
-        <rule>
-            <from>&#x1825;</from>   <!-- MONGOLIAN LETTER OE (ᠥ) -->
-            <to>&#x00f6;</to>
-        </rule>
-        <rule>
-            <from>&#x1826;</from>   <!-- MONGOLIAN LETTER UE (ᠦ) -->
-            <to>&#x00fc;</to>
-        </rule>
-        <rule>
-            <from>&#x1827;</from>   <!-- MONGOLIAN LETTER EE (ᠧ) -->
-            <to>&#x0113;</to>
-        </rule>
-        <rule>
-            <from>&#x1829;</from>   <!-- MONGOLIAN LETTER ANG (ᠩ) -->
-            <to>ng</to>
-        </rule>
-        <rule>
-            <from>&#x1828;</from>   <!-- MONGOLIAN LETTER NA (ᠨ) -->
-            <to>n</to>
-        </rule>
-        <rule>
-            <from>&#x182A;</from>   <!-- MONGOLIAN LETTER BA (ᠪ) -->
-            <to>b</to>
-        </rule>
-        <rule>
-            <from>&#x182B;</from>   <!-- MONGOLIAN LETTER PA (ᠫ) -->
-            <to>p</to>
-        </rule>
-        <rule>
-            <from>&#x182C;</from>   <!-- MONGOLIAN LETTER QA (ᠬ) feminine form -->
-            <to>k</to>
-            <context>
-                <before>i</before>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182C;</from>   <!-- MONGOLIAN LETTER QA (ᠬ) masculine form -->
-            <to>q</to>
-            <context>
-                <before>&#x180E;?[aou]</before>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182C;</from>   <!-- MONGOLIAN LETTER QA (ᠬ) masculine form -->
-            <to>q</to>
-            <context>
-                <after>[aou]</after>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182C;</from>   <!-- MONGOLIAN LETTER QA (ᠬ) feminine form -->
-            <to>k</to>
-            <context>
-                <before>[&#xfc;e&#xf6;]</before>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182C;</from>   <!-- MONGOLIAN LETTER QA (ᠬ) feminine form -->
-            <to>k</to>
-            <context>
-                <after>[&#xfc;e&#xf6;]</after>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182C;</from>   <!-- MONGOLIAN LETTER QA (ᠬ) masculine form -->
-            <to>q</to>
-        </rule>
-        <rule>
-            <from>&#x182C;</from>   <!-- MONGOLIAN LETTER QA (ᠬ) feminine form -->
-            <to>k</to>
-        </rule>
-        <rule>
-            <from>&#x182D;</from>   <!-- MONGOLIAN LETTER GA (ᠭ) feminine form -->
-            <to>g</to>
-            <context>
-                <before>i\s</before>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182D;</from>   <!-- MONGOLIAN LETTER GA (ᠭ) masculine form -->
-            <to>&#x0263;</to>
-            <context>
-                <before>&#x180E;?[aou]</before>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182D;</from>   <!-- MONGOLIAN LETTER GA (ᠭ) masculine form -->
-            <to>&#x0263;</to>
-            <context>
-                <after>[aou]</after>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182D;</from>   <!-- MONGOLIAN LETTER GA (ᠭ) feminine form -->
-            <to>g</to>
-            <context>
-                <before>[&#xfc;e&#xf6;]</before>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182D;</from>   <!-- MONGOLIAN LETTER GA (ᠭ) feminine form() -->
-            <to>g</to>
-            <context>
-                <after>[&#xfc;e&#xf6;]</after>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182D;</from>   <!-- MONGOLIAN LETTER GA (ᠭ) masculine form() -->
-            <to>&#x0263;</to>
-            <context>
-                <after>[aou].</after>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182D;</from>   <!-- MONGOLIAN LETTER GA (ᠭ) masculine form() -->
-            <to>&#x0263;</to>
-            <context>
-                <after>[aou].i</after>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182D;</from>   <!-- MONGOLIAN LETTER GA (ᠭ) masculine form() -->
-            <to>&#x0263;</to>
-            <context>
-                <after>[aou]..i</after>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182D;</from>   <!-- MONGOLIAN LETTER GA (ᠭ) masculine form() -->
-            <to>&#x0263;</to>
-            <context>
-                <before>.[aou]</before>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182D;</from>   <!-- MONGOLIAN LETTER GA (ᠭ) masculine form() -->
-            <to>g</to>
-            <context>
-                <before>.[&#xfc;e&#xf6;]</before>
-            </context>
-        </rule>
-        <rule>
-            <from>&#x182D;</from>   <!-- MONGOLIAN LETTER GA (ᠭ) feminine form() -->
-            <to>g</to>
-        </rule>
-        <rule>
-            <from>&#x182D;</from>   <!-- MONGOLIAN LETTER GA (ᠭ) masculine form() -->
-            <to>&#x0263;</to>
-        </rule>
-        <rule>
-            <from>&#x182E;</from>   <!-- MONGOLIAN LETTER MA (ᠮ) -->
-            <to>m</to>
-        </rule>
-        <rule>
-            <from>&#x1830;</from>   <!-- MONGOLIAN LETTER SA (ᠰ) -->
-            <to>s</to>
-        </rule>
-        <rule>
-            <from>&#x1831;</from>   <!-- MONGOLIAN LETTER SHA (ᠱ) -->
-            <to>&#x0161;</to>
-        </rule>
-        <rule>
-            <from>&#x1832;</from>   <!-- MONGOLIAN LETTER TA (ᠲ) -->
-            <to>t</to>
-        </rule>
-        <rule>
-            <from>&#x1833;</from>   <!-- MONGOLIAN LETTER DA (ᠳ) -->
-            <to>d</to>
-        </rule>
-        <rule>
-            <from>&#x1834;</from>   <!-- MONGOLIAN LETTER CHA (ᠴ) -->
-            <to>&#x010d;</to>
-        </rule>
-        <rule>
-            <from>&#x1835;</from>   <!-- MONGOLIAN LETTER JA (ᠵ) -->
-            <to>j</to>
-        </rule>
-        <rule>
-            <from>&#x1836;</from>   <!-- MONGOLIAN LETTER YA (ᠶ) -->
-            <to>y</to>
-        </rule>
-        <rule>
-            <from>&#x1837;</from>   <!-- MONGOLIAN LETTER RA (ᠷ) -->
-            <to>r</to>
-        </rule>
-        <rule>
-            <from>&#x1838;</from>   <!-- MONGOLIAN LETTER WA (ᠸ) -->
-            <to>v</to>
-        </rule>
-        <rule>
-            <from>&#x1838;</from>   <!-- MONGOLIAN LETTER WA (ᠸ) -->
-            <to>w</to>
-        </rule>
-        <rule>
-            <from>&#x1839;</from>   <!-- MONGOLIAN LETTER FA (ᠹ) -->
-            <to>f</to>
-        </rule>
-        <rule>
-            <from>&#x183A;</from>   <!-- MONGOLIAN LETTER KA (ᠺ) -->
-            <to>&#x1e33;</to>
-        </rule>
-        <rule>
-            <from>&#x183B;</from>   <!-- MONGOLIAN LETTER KHA (ᠻ) -->
-            <to>&#x01e9;</to>
-        </rule>
-        <rule>
-            <from>&#x183F;</from>   <!-- MONGOLIAN LETTER ZRA (ᠿ) -->
-            <to>&#x017e;</to>
-        </rule>
-        <rule>
-            <from>&#x1840;</from>   <!-- MONGOLIAN LETTER LHA (ᡀ) -->
-            <to>lh</to>
-        </rule>
-        <rule>
-            <from>&#x182F;</from>   <!-- MONGOLIAN LETTER LA (ᠯ) -->
-            <to>l</to>
-        </rule>
-        <rule>
-            <from>&#x1841;</from>   <!-- MONGOLIAN LETTER ZHI (ᡁ) -->
-            <to>zh</to>
-        </rule>
-        <rule>
-            <from>&#x183D;</from>   <!-- MONGOLIAN LETTER ZA (ᠽ) -->
-            <to>z</to>
-        </rule>
-        <rule>
-            <from>&#x1842;</from>   <!-- MONGOLIAN LETTER CHI (ᡂ) -->
-            <to>ch</to>
-        </rule>
-        <rule>
-            <from>&#x183C;</from>   <!-- MONGOLIAN LETTER TSA (ᠼ) -->
-            <to>c</to>
-        </rule>
-        <rule>
-            <from>&#x183E;</from>   <!-- MONGOLIAN LETTER HAA (ᠾ) -->
-            <to>h</to>
-        </rule>
-        <rule>
-            <from>&#x1800;</from>   <!-- MONGOLIAN BIRGA (᠀) -->
-            <to>&#x00a7;</to>
-        </rule>
-        <rule>
-            <from>&#x1801;</from>   <!-- MONGOLIAN ELLIPSIS (᠁) -->
-            <to>&#x2026;</to>
-        </rule>
-        <rule>
-            <from>&#x1802;</from>   <!-- MONGOLIAN COMMA (᠂) -->
-            <to>,</to>
-        </rule>
-        <rule>
-            <from>&#x1803;</from>   <!-- MONGOLIAN FULL STOP (᠃) -->
-            <to>.</to>
-        </rule>
-        <rule>
-            <from>&#x1804;</from>   <!-- MONGOLIAN COLON (᠄) -->
-            <to>:</to>
-        </rule>
-        <rule>
-            <from>&#x1805;</from>   <!-- MONGOLIAN FOUR DOTS (᠅) -->
-            <to>&#x00b6;</to>
-        </rule>
-        <rule>
-            <from>&#x180E;</from>   <!-- MONGOLIAN VOWEL SEPARATOR (᠎) -->
-            <to>-</to>
-        </rule>
-        <rule>
-            <from>&#x1810;</from>   <!-- MONGOLIAN DIGIT ZERO (᠐) -->
-            <to>0</to>
-        </rule>
-        <rule>
-            <from>&#x1811;</from>   <!-- MONGOLIAN DIGIT ONE (᠑) -->
-            <to>1</to>
-        </rule>
-        <rule>
-            <from>&#x1812;</from>   <!-- MONGOLIAN DIGIT TWO (᠒) -->
-            <to>2</to>
-        </rule>
-        <rule>
-            <from>&#x1813;</from>   <!-- MONGOLIAN DIGIT THREE (᠓) -->
-            <to>3</to>
-        </rule>
-        <rule>
-            <from>&#x1814;</from>   <!-- MONGOLIAN DIGIT FOUR (᠔) -->
-            <to>4</to>
-        </rule>
-        <rule>
-            <from>&#x1815;</from>   <!-- MONGOLIAN DIGIT FIVE (᠕) -->
-            <to>5</to>
-        </rule>
-        <rule>
-            <from>&#x1816;</from>   <!-- MONGOLIAN DIGIT SIX (᠖) -->
-            <to>6</to>
-        </rule>
-        <rule>
-            <from>&#x1817;</from>   <!-- MONGOLIAN DIGIT SEVEN (᠗) -->
-            <to>7</to>
-        </rule>
-        <rule>
-            <from>&#x1818;</from>   <!-- MONGOLIAN DIGIT EIGHT (᠘) -->
-            <to>8</to>
-        </rule>
-        <rule>
-            <from>&#x1819;</from>   <!-- MONGOLIAN DIGIT NINE (᠙) -->
-            <to>9</to>
-        </rule>
-    </rules>
-
-</translit>
-
-
-<!--
-  vim: sts=4 sw=4 ai et
--->

xml/tables.dump

@@ -3507,339 +3507,6 @@
     ],
     "reverse" => "true"
   },
-  "common_classical_mon" => {
-    "desc" => "Classical Mongolian Script to Latin",
-    "name" => "Common Classical MON",
-    "id" => "common_classical_mon",
-    "rules" => [
-      {
-        "to" => "a",
-        "from" => "\x{1820}"
-      },
-      {
-        "to" => "e",
-        "from" => "\x{1821}"
-      },
-      {
-        "to" => "i",
-        "from" => "\x{1822}"
-      },
-      {
-        "to" => "o",
-        "from" => "\x{1823}"
-      },
-      {
-        "to" => "u",
-        "from" => "\x{1824}"
-      },
-      {
-        "to" => "\x{f6}",
-        "from" => "\x{1825}"
-      },
-      {
-        "to" => "\x{fc}",
-        "from" => "\x{1826}"
-      },
-      {
-        "to" => "\x{113}",
-        "from" => "\x{1827}"
-      },
-      {
-        "to" => "ng",
-        "from" => "\x{1829}"
-      },
-      {
-        "to" => "n",
-        "from" => "\x{1828}"
-      },
-      {
-        "to" => "b",
-        "from" => "\x{182a}"
-      },
-      {
-        "to" => "p",
-        "from" => "\x{182b}"
-      },
-      {
-        "to" => "k",
-        "from" => "\x{182c}",
-        "context" => {
-          "before" => "i"
-        }
-      },
-      {
-        "to" => "q",
-        "from" => "\x{182c}",
-        "context" => {
-          "before" => "\x{180e}?[aou]"
-        }
-      },
-      {
-        "to" => "q",
-        "from" => "\x{182c}",
-        "context" => {
-          "after" => "[aou]"
-        }
-      },
-      {
-        "to" => "k",
-        "from" => "\x{182c}",
-        "context" => {
-          "before" => "[\x{fc}e\x{f6}]"
-        }
-      },
-      {
-        "to" => "k",
-        "from" => "\x{182c}",
-        "context" => {
-          "after" => "[\x{fc}e\x{f6}]"
-        }
-      },
-      {
-        "to" => "q",
-        "from" => "\x{182c}"
-      },
-      {
-        "to" => "k",
-        "from" => "\x{182c}"
-      },
-      {
-        "to" => "g",
-        "from" => "\x{182d}",
-        "context" => {
-          "before" => "i\\s"
-        }
-      },
-      {
-        "to" => "\x{263}",
-        "from" => "\x{182d}",
-        "context" => {
-          "before" => "\x{180e}?[aou]"
-        }
-      },
-      {
-        "to" => "\x{263}",
-        "from" => "\x{182d}",
-        "context" => {
-          "after" => "[aou]"
-        }
-      },
-      {
-        "to" => "g",
-        "from" => "\x{182d}",
-        "context" => {
-          "before" => "[\x{fc}e\x{f6}]"
-        }
-      },
-      {
-        "to" => "g",
-        "from" => "\x{182d}",
-        "context" => {
-          "after" => "[\x{fc}e\x{f6}]"
-        }
-      },
-      {
-        "to" => "\x{263}",
-        "from" => "\x{182d}",
-        "context" => {
-          "after" => "[aou]."
-        }
-      },
-      {
-        "to" => "\x{263}",
-        "from" => "\x{182d}",
-        "context" => {
-          "after" => "[aou].i"
-        }
-      },
-      {
-        "to" => "\x{263}",
-        "from" => "\x{182d}",
-        "context" => {
-          "after" => "[aou]..i"
-        }
-      },
-      {
-        "to" => "\x{263}",
-        "from" => "\x{182d}",
-        "context" => {
-          "before" => ".[aou]"
-        }
-      },
-      {
-        "to" => "g",
-        "from" => "\x{182d}",
-        "context" => {
-          "before" => ".[\x{fc}e\x{f6}]"
-        }
-      },
-      {
-        "to" => "g",
-        "from" => "\x{182d}"
-      },
-      {
-        "to" => "\x{263}",
-        "from" => "\x{182d}"
-      },
-      {
-        "to" => "m",
-        "from" => "\x{182e}"
-      },
-      {
-        "to" => "s",
-        "from" => "\x{1830}"
-      },
-      {
-        "to" => "\x{161}",
-        "from" => "\x{1831}"
-      },
-      {
-        "to" => "t",
-        "from" => "\x{1832}"
-      },
-      {
-        "to" => "d",
-        "from" => "\x{1833}"
-      },
-      {
-        "to" => "\x{10d}",
-        "from" => "\x{1834}"
-      },
-      {
-        "to" => "j",
-        "from" => "\x{1835}"
-      },
-      {
-        "to" => "y",
-        "from" => "\x{1836}"
-      },
-      {
-        "to" => "r",
-        "from" => "\x{1837}"
-      },
-      {
-        "to" => "v",
-        "from" => "\x{1838}"
-      },
-      {
-        "to" => "w",
-        "from" => "\x{1838}"
-      },
-      {
-        "to" => "f",
-        "from" => "\x{1839}"
-      },
-      {
-        "to" => "\x{1e33}",
-        "from" => "\x{183a}"
-      },
-      {
-        "to" => "\x{1e9}",
-        "from" => "\x{183b}"
-      },
-      {
-        "to" => "\x{17e}",
-        "from" => "\x{183f}"
-      },
-      {
-        "to" => "lh",
-        "from" => "\x{1840}"
-      },
-      {
-        "to" => "l",
-        "from" => "\x{182f}"
-      },
-      {
-        "to" => "zh",
-        "from" => "\x{1841}"
-      },
-      {
-        "to" => "z",
-        "from" => "\x{183d}"
-      },
-      {
-        "to" => "ch",
-        "from" => "\x{1842}"
-      },
-      {
-        "to" => "c",
-        "from" => "\x{183c}"
-      },
-      {
-        "to" => "h",
-        "from" => "\x{183e}"
-      },
-      {
-        "to" => "\x{a7}",
-        "from" => "\x{1800}"
-      },
-      {
-        "to" => "\x{2026}",
-        "from" => "\x{1801}"
-      },
-      {
-        "to" => ",",
-        "from" => "\x{1802}"
-      },
-      {
-        "to" => ".",
-        "from" => "\x{1803}"
-      },
-      {
-        "to" => ":",
-        "from" => "\x{1804}"
-      },
-      {
-        "to" => "\x{b6}",
-        "from" => "\x{1805}"
-      },
-      {
-        "to" => "-",
-        "from" => "\x{180e}"
-      },
-      {
-        "to" => 0,
-        "from" => "\x{1810}"
-      },
-      {
-        "to" => 1,
-        "from" => "\x{1811}"
-      },
-      {
-        "to" => 2,
-        "from" => "\x{1812}"
-      },
-      {
-        "to" => 3,
-        "from" => "\x{1813}"
-      },
-      {
-        "to" => 4,
-        "from" => "\x{1814}"
-      },
-      {
-        "to" => 5,
-        "from" => "\x{1815}"
-      },
-      {
-        "to" => 6,
-        "from" => "\x{1816}"
-      },
-      {
-        "to" => 7,
-        "from" => "\x{1817}"
-      },
-      {
-        "to" => 8,
-        "from" => "\x{1818}"
-      },
-      {
-        "to" => 9,
-        "from" => "\x{1819}"
-      }
-    ],
-    "reverse" => "true"
-  },
   "din_1460_bul" => {
     "desc" => "DIN 1460:1982, Cyrillic to Latin, Bulgarian",
     "name" => "DIN 1460 BUL",
@@ -4524,6 +4191,206 @@
     ],
     "reverse" => "false"
   },
+  "common_ara" => {
+    "desc" => "Common Romanization of Arabic",
+    "name" => "Common ARA",
+    "id" => "common_ara",
+    "rules" => [
+      {
+        "to" => "th",
+        "from" => "\x{62b}"
+      },
+      {
+        "to" => "kh",
+        "from" => "\x{62e}"
+      },
+      {
+        "to" => "sh",
+        "from" => "\x{634}"
+      },
+      {
+        "to" => "gh",
+        "from" => "\x{63a}"
+      },
+      {
+        "to" => "'e",
+        "from" => "\x{639}"
+      },
+      {
+        "to" => "'e",
+        "from" => "\x{626}"
+      },
+      {
+        "to" => "'e",
+        "from" => "\x{624}"
+      },
+      {
+        "to" => "a",
+        "from" => "\x{627}"
+      },
+      {
+        "to" => "a",
+        "from" => "\x{623}"
+      },
+      {
+        "to" => "a",
+        "from" => "\x{622}"
+      },
+      {
+        "to" => "a",
+        "from" => "\x{649}"
+      },
+      {
+        "to" => "e",
+        "from" => "\x{625}"
+      },
+      {
+        "to" => "b",
+        "from" => "\x{628}"
+      },
+      {
+        "to" => "t",
+        "from" => "\x{62a}"
+      },
+      {
+        "to" => "j",
+        "from" => "\x{62c}"
+      },
+      {
+        "to" => "h",
+        "from" => "\x{62d}"
+      },
+      {
+        "to" => "d",
+        "from" => "\x{62f}"
+      },
+      {
+        "to" => "d",
+        "from" => "\x{630}"
+      },
+      {
+        "to" => "d",
+        "from" => "\x{636}"
+      },
+      {
+        "to" => "r",
+        "from" => "\x{631}"
+      },
+      {
+        "to" => "z",
+        "from" => "\x{632}"
+      },
+      {
+        "to" => "z",
+        "from" => "\x{638}"
+      },
+      {
+        "to" => "s",
+        "from" => "\x{633}"
+      },
+      {
+        "to" => "s",
+        "from" => "\x{635}"
+      },
+      {
+        "to" => "t",
+        "from" => "\x{637}"
+      },
+      {
+        "to" => "f",
+        "from" => "\x{641}"
+      },
+      {
+        "to" => "q",
+        "from" => "\x{642}"
+      },
+      {
+        "to" => "k",
+        "from" => "\x{643}"
+      },
+      {
+        "to" => "l",
+        "from" => "\x{644}"
+      },
+      {
+        "to" => "m",
+        "from" => "\x{645}"
+      },
+      {
+        "to" => "n",
+        "from" => "\x{646}"
+      },
+      {
+        "to" => "h",
+        "from" => "\x{647}"
+      },
+      {
+        "to" => "h",
+        "from" => "\x{629}"
+      },
+      {
+        "to" => "w",
+        "from" => "\x{648}"
+      },
+      {
+        "to" => "y",
+        "from" => "\x{64a}"
+      },
+      {
+        "to" => "'",
+        "from" => "\x{621}"
+      },
+      {
+        "to" => "?",
+        "from" => "\x{61f}"
+      },
+      {
+        "to" => ",",
+        "from" => "\x{60c}"
+      },
+      {
+        "to" => "",
+        "from" => "\x{640}"
+      },
+      {
+        "to" => "",
+        "from" => "\x{64b}"
+      },
+      {
+        "to" => "",
+        "from" => "\x{64c}"
+      },
+      {
+        "to" => "",
+        "from" => "\x{64d}"
+      },
+      {
+        "to" => "",
+        "from" => "\x{64e}"
+      },
+      {
+        "to" => "",
+        "from" => "\x{64f}"
+      },
+      {
+        "to" => "",
+        "from" => "\x{650}"
+      },
+      {
+        "to" => "",
+        "from" => "\x{651}"
+      },
+      {
+        "to" => "",
+        "from" => "\x{652}"
+      },
+      {
+        "to" => "",
+        "from" => "\x{200f}"
+      }
+    ],
+    "reverse" => "false"
+  },
   "gost_7.79_rus" => {
     "desc" => "GOST 7.79:2000, Cyrillic to Latin, Russian",
     "name" => "GOST 7.79 RUS",

xml/template.xml

@@ -59,6 +59,4 @@
 
 </translit>
 
-<!--
-  vim: sts=4 sw=4 ai et
--->
+<!-- vim: set sts=4 sw=4 ts=4 ai et ft=xml: -->

xml/translit.dtd

@@ -2,28 +2,26 @@
 
 
 <!--
-  
-  Basic document type definition for transliteration tables, "translit".
+
+  Basic document type definition for transliteration tables.
 
   Copyright (C) 2007-2008 Alex Linke <alinke@lingua-systems.com>
-  Copyright (C) 2009 Lingua-Systems Software GmbH
+  Copyright (C) 2009-2014 Lingua-Systems Software GmbH
 
 -->
 
 
 <!ELEMENT translit  (name, desc, reverse, rules)>
-<!ELEMENT name	    (#PCDATA)>
-<!ELEMENT desc	    (#PCDATA)>
+<!ELEMENT name      (#PCDATA)>
+<!ELEMENT desc      (#PCDATA)>
 <!ELEMENT reverse   (#PCDATA)>
-<!ELEMENT rules	    (rule+)>
-<!ELEMENT rule	    (from, to, context?)>
-<!ELEMENT from	    (#PCDATA)>
-<!ELEMENT to	    (#PCDATA)>
+<!ELEMENT rules     (rule+)>
+<!ELEMENT rule      (from, to, context?)>
+<!ELEMENT from      (#PCDATA)>
+<!ELEMENT to        (#PCDATA)>
 <!ELEMENT context   ((before|after),(before|after)?)>
 <!ELEMENT before    (#PCDATA)>
-<!ELEMENT after	    (#PCDATA)>
+<!ELEMENT after     (#PCDATA)>
 
 
-<!--
-  vim: sts=2 enc=utf-8
--->
+<!-- vim: set sts=4 ts=4 sw=4 ai et: -->

xml/xml2dump.pl

@@ -2,10 +2,9 @@
 
 #
 # Copyright (C) 2007-2008 Alex Linke <alinke@lingua-systems.com>
-# Copyright (C) 2009 Lingua-Systems Software GmbH
+# Copyright (C) 2009-2014 Lingua-Systems Software GmbH
 #
 
-
 use strict;
 use warnings;
 
@@ -15,30 +14,26 @@ use XML::LibXML;
 use Data::Dumper;
 use Getopt::Long;
 
-
 my $VERSION = '0.5';
 
-
 my %tables;
 
-
 # set default options
 my %opt = (
     output  => "tables.dump",
     verbose => 0,
 );
 
-
 # parse commandline options
-show_help(1) unless GetOptions(
-    "output|o=s"    => \$opt{output},
-    "verbose|v"     => \$opt{verbose},
-    "help|h"        => \$opt{help}
-);
-show_help(1) if scalar(@ARGV) == 0;   # No XML file(s) given
+show_help(1)
+  unless GetOptions(
+    "output|o=s" => \$opt{output},
+    "verbose|v"  => \$opt{verbose},
+    "help|h"     => \$opt{help}
+  );
+show_help(1) if scalar(@ARGV) == 0;    # No XML file(s) given
 show_help(0) if $opt{help};
 
-
 my $xmlparser = new XML::LibXML();
 
 # Set parser options
@@ -48,86 +43,78 @@ $xmlparser->expand_entities(1);
 $xmlparser->keep_blanks(1);
 $xmlparser->line_numbers(1);
 
-
 # Treat everything else in @ARGV as a filename
 foreach my $file (@ARGV) {
     print "Parsing $file..." if $opt{verbose};
 
-    my %counts = (rules => 0, contexts => 0);
+    my %counts = ( rules => 0, contexts => 0 );
 
     my $ds;
 
     my $doc = $xmlparser->parse_file($file)
-        or die "Error parsing $file: $!\n";
+      or die "Error parsing $file: $!\n";
 
     # Retrieve meta-documentation from XML document first
-    foreach my $meta (qw/name desc reverse/)
-    {
+    foreach my $meta (qw/name desc reverse/) {
         my @nodes = $doc->findnodes("/translit/$meta");
 
-        die "#/translit/$meta != 1" if (scalar(@nodes) != 1);
+        die "#/translit/$meta != 1" if ( scalar(@nodes) != 1 );
 
         $ds->{$meta} = $nodes[0]->to_literal();
     }
 
-
     # Perform some basic meta data checks
-    die "Name undefined.\n"             unless $ds->{name};
-    die "Description undefined.\n"      unless $ds->{desc};
-    die "Reversibility undefined.\n"    unless $ds->{reverse};
+    die "Name undefined.\n"          unless $ds->{name};
+    die "Description undefined.\n"   unless $ds->{desc};
+    die "Reversibility undefined.\n" unless $ds->{reverse};
 
     # Check <reverse> tag contains valid data.
     # TODO: move this to the DTD
     die "Reversibility: '$ds->{reverse}' -- Should be 'true' or 'false'.\n"
-        unless $ds->{reverse} =~ /^(true|false)$/;
+      unless $ds->{reverse} =~ /^(true|false)$/;
 
     # Set the table's identifier
-    $ds->{id} = lc($ds->{name});
+    $ds->{id} = lc( $ds->{name} );
     $ds->{id} =~ s/\s/_/g;
 
-
     # Retrieve all rules, extract their data and store it to an appropriate
     # data structure
-    foreach my $rule ($doc->findnodes("/translit/rules/rule"))
-    {
+    foreach my $rule ( $doc->findnodes("/translit/rules/rule") ) {
         my @nodes;
         my $rule_ds;
 
-
         # Retrieve "from" and "to" literals
-        foreach my $n (qw/from to/)
-        {
+        foreach my $n (qw/from to/) {
             @nodes = $rule->findnodes("./$n");
 
-            die "#/translit/rules/rules/$n != 1 " .
-                "(at line " . $rule->line_number() . ")\n"
-                if (scalar(@nodes) != 1);
+            die "#/translit/rules/rules/$n != 1 "
+              . "(at line "
+              . $rule->line_number() . ")\n"
+              if ( scalar(@nodes) != 1 );
 
             $rule_ds->{$n} = $nodes[0]->to_literal();
         }
 
-
         # Retrieve rule's "context"
         @nodes = $rule->findnodes("./context");
 
-        die "#/translit/rules/rule/context > 1 " .
-            "(at line " . $rule->line_number() . ")\n"
-            if (scalar(@nodes) > 1);
+        die "#/translit/rules/rule/context > 1 "
+          . "(at line "
+          . $rule->line_number() . ")\n"
+          if ( scalar(@nodes) > 1 );
 
         # Process rule's "context" if necessary
-        if (scalar(@nodes))
-        {
-            foreach my $context (qw/before after/)
-            {
+        if ( scalar(@nodes) ) {
+            foreach my $context (qw/before after/) {
                 @nodes = $rule->findnodes("./context/$context");
 
-                die "#/translit/rules/rule/context/$context > 1 " .
-                    "(at line " . $rule->line_number() . ")\n"
-                    if (scalar(@nodes) > 1);
+                die "#/translit/rules/rule/context/$context > 1 "
+                  . "(at line "
+                  . $rule->line_number() . ")\n"
+                  if ( scalar(@nodes) > 1 );
 
                 # Copy the context to the rule's data structure
-                if (scalar(@nodes))
-                {
+                if ( scalar(@nodes) ) {
                     $rule_ds->{context}->{$context} = $nodes[0]->to_literal();
                 }
             }
@@ -137,26 +124,23 @@ foreach my $file (@ARGV) {
 
         $counts{rules}++;
 
-
         die $rule_ds->{name} . ": from==to -> " . $rule_ds->{from} . "\n"
-            if ($rule_ds->{from} eq $rule_ds->{to});
+          if ( $rule_ds->{from} eq $rule_ds->{to} );
 
-        push @{$ds->{rules}}, $rule_ds;
+        push @{ $ds->{rules} }, $rule_ds;
     }
 
-
     # Copy transliteration structure over to the final hash
-    $tables{$ds->{id}} = $ds;
+    $tables{ $ds->{id} } = $ds;
 
     print " ($ds->{id}: rules=$counts{rules}, contexts=$counts{contexts})\n"
-        if $opt{verbose};
+      if $opt{verbose};
 
-    undef($ds); # free memory
+    undef($ds);    # free memory
 }
 
-
 # Configure Data::Dumper
-my $dumper = new Data::Dumper([ \%tables ], [ qw/*tables/ ]);
+my $dumper = new Data::Dumper( [ \%tables ], [qw/*tables/] );
 $dumper->Purity(0);
 $dumper->Useqq(1);
 $dumper->Indent(1);
@@ -166,25 +150,22 @@ open FH, ">$opt{output}" or die "$opt{output}: $!\n";
 print FH $dumper->Dump();
 close(FH);
 
-print scalar(keys(%tables)),
-    " transliteration table(s) dumped to $opt{output}.\n"
-    if $opt{verbose};
+print scalar( keys(%tables) ),
+  " transliteration table(s) dumped to $opt{output}.\n"
+  if $opt{verbose};
 
-
-sub show_help
-{
+sub show_help {
     my $retval = shift();
 
     print STDERR
-        "xml2dump v$VERSION -- Copyright 2007-2008 by Alex Linke ",
-        "<alinke\@lingua-systems.com>\n\n",
-        "usage: $0  [-v -h]  -o FILE  XML-FILE(s)\n\n",
-        "\t--output  -o  FILE     set output file (default: transtbl.dump)\n",
-        "\t--verbose -v           be verbose\n",
-        "\t--help    -h           show this help\n";
+      "xml2dump v$VERSION -- Copyright 2007-2008 by Alex Linke ",
+      "<alinke\@lingua-systems.com>\n\n",
+      "usage: $0  [-v -h]  -o FILE  XML-FILE(s)\n\n",
+      "\t--output  -o  FILE     set output file (default: transtbl.dump)\n",
+      "\t--verbose -v           be verbose\n",
+      "\t--help    -h           show this help\n";
 
     exit($retval);
 }
 
-
-# vim: sw=4 sts=4 enc=utf-8 ai et
+# vim: sw=4 sts=4 ts=4 ai et

	Global
`s`	Focus search bar
`?`	Bring up this help dialog

	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)

	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse

	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)