lib/Lingua/JA/Moji.pm - metacpan.org

package Lingua::JA::Moji;

require Exporter;
@ISA = qw(Exporter);

use warnings;
use strict;

our $VERSION = '0.38';

use Carp;
use Convert::Moji qw/make_regex length_one unambiguous/;
use utf8;

our @EXPORT_OK = qw/
                    InHankakuKatakana
                    InKana
                    InWideAscii
                    ascii2wide
                    bracketed2kanji
                    braille2kana
                    circled2kana
                    circled2kanji
                    cyrillic2katakana
                    hira2kata
                    hw2katakana
                    is_hiragana
                    is_kana
                    is_romaji
                    is_romaji_semistrict
                    is_romaji_strict
                    is_voiced
                    kana2braille
                    kana2circled
                    kana2cyrillic
                    kana2hangul
                    kana2hw
                    kana2katakana
                    kana2morse
                    kana2romaji
                    kana_order
                    kana_to_large
                    kanji2bracketed
                    kanji2circled
                    kata2hira
                    katakana2hw
                    katakana2syllable
                    morse2kana
                    new2old_kanji
                    normalize_romaji
                    old2new_kanji
                    romaji2hiragana
                    romaji2kana
                    romaji_styles
                    romaji_vowel_styles
		    square2katakana
		    katakana2square
                    wide2ascii
		    nigori_first
		   /;

our %EXPORT_TAGS = (
    'all' => \@EXPORT_OK,
);

# Load a specified convertor from the shared directory.

sub load_convertor
{
    my ($in, $out) = @_;
    my $filename = $in."2".$out;
    my $file = getdistfile ($filename);
    if (! $file || ! -f $file) {
	croak "Could not find distribution file '$filename'";
    }
    my $convertor = Convert::Moji::load_convertor ($file);
    return $convertor;
}

sub add_boilerplate
{
    my ($code, $name) = @_;
    $code =<<EOSUB;
sub convert_$name
{
    my (\$conv,\$input,\$convert_type) = \@_;
    $code
    return \$input;
}
EOSUB
    $code .= "\\\&".__PACKAGE__."::convert_$name;";
    return $code;
}

sub ambiguous_reverse
{
    my ($table) = @_;
    my %inverted;
    for (keys %$table) {
	my $val = $table->{$_};
	push @{$inverted{$val}}, $_;
    }
    return \%inverted;
}

# Callback

sub split_match
{
    my ($conv, $input, $convert_type) = @_;
    if (!$convert_type) {
        $convert_type = "all";
    }
    my @input = split '', $input;
    my @output;
    for (@input) {
	my $in = $conv->{out2in}->{$_};
	# No conversion defined.
	if (! $in) {
	    push @output, $_;
	    next;
	}
	# Unambigous case
	if (@{$in} == 1) {
	    push @output, $in->[0];
	    next;
	}
	if ($convert_type eq 'all') {
	    push @output, $in;
	}
        elsif ($convert_type eq 'first') {
	    push @output, $in->[0];
	}
        elsif ($convert_type eq 'random') {
	    my $pos = int rand @$in;
	    push @output, $in->[$pos];
	}
    }
    return \@output;
}

sub make_convertors
{
    my ($in, $out, $table) = @_;
    my $conv = {};
    if (!$table) {
	$table = load_convertor ($in, $out);
    }
    $conv->{in2out} = $table;
    my @keys = keys %{$table};
    my @values = values %{$table};
    my $sub_in2out;
    my $sub_out2in;
    if (length_one(@keys)) {
	my $lhs = join '', @keys;

	# Improvement: one way tr/// for the ambiguous case lhs/rhs only.

	if (length_one(@values) && unambiguous($table)) {
	    # can use tr///;
	    my $rhs = join '', @values;
	    $sub_in2out = "\$input =~ tr/$lhs/$rhs/;";
	    $sub_out2in = "\$input =~ tr/$rhs/$lhs/;";
	}
        else {
	    $sub_in2out = "\$input =~ s/([$lhs])/\$conv->{in2out}->{\$1}/eg;";
	    my $rhs = make_regex (@values);
	    if (unambiguous($conv->{in2out})) {
		my %out2in_table = reverse %{$conv->{in2out}};
		$conv->{out2in} = \%out2in_table;
		$sub_out2in = "\$input =~ s/($rhs)/\$conv->{out2in}->{\$1}/eg;";
	    }
            else {
		$conv->{out2in} = ambiguous_reverse ($conv->{in2out});
		$sub_out2in = "\$input = \$conv->split_match (\$input, \$convert_type);";
	    }
	}
    }
    else {
	my $lhs = make_regex (@keys);
	$sub_in2out = "\$input =~ s/($lhs)/\$conv->{in2out}->{\$1}/eg;";
	my $rhs = make_regex (@values);
	if (unambiguous($conv->{in2out})) {
	    my %out2in_table = reverse %{$conv->{in2out}};
	    $conv->{out2in} = \%out2in_table;
	    $sub_out2in = "    \$input =~ s/($rhs)/\$conv->{out2in}->{\$1}/eg;";
	}
    }
    $sub_in2out = add_boilerplate ($sub_in2out, "${in}2$out");
    my $sub1 = eval $sub_in2out;
    $conv->{in2out_sub} = $sub1;
    if ($sub_out2in) {
	$sub_out2in = add_boilerplate ($sub_out2in, "${out}2$in");
	my $sub2 = eval $sub_out2in;
	if ($@) {
	    print "Errors are ",$@,"\n";
	    print "\$sub2 = ",$sub2,"\n";
	}
	$conv->{out2in_sub} = $sub2;
    }
    bless $conv;
    return $conv;
}

sub convert
{
    my ($conv, $input) = @_;
    return &{$conv->{in2out_sub}}($conv, $input);
}

sub invert
{
    my ($conv, $input, $convert_type) = @_;
    return &{$conv->{out2in_sub}}($conv, $input, $convert_type);
}


# Kana ordered by consonant. Adds bogus "q" gyou for small vowels and
# "x" gyou for youon (ya, yu, yo) to the usual ones.

my @gyou = (
    a => [qw/ア イ ウ エ オ/],
    q => [qw/ァ ィ ゥ ェ ォ/],
    k => [qw/カ キ ク ケ コ/],
    g => [qw/ガ ギ グ ゲ ゴ/],
    s => [qw/サ シ ス セ ソ/],
    z => [qw/ザ ジ ズ ゼ ゾ/],
    t => [qw/タ チ ツ テ ト/],
    d => [qw/ダ ヂ ヅ デ ド/],
    n => [qw/ナ ニ ヌ ネ ノ/],
    h => [qw/ハ ヒ フ ヘ ホ/],
    b => [qw/バ ビ ブ ベ ボ/],
    p => [qw/パ ピ プ ペ ポ/],
    m => [qw/マ ミ ム メ モ/],
    y => [qw/ヤ    ユ    ヨ/],
    xy => [qw/ャ    ュ    ョ/],
    r => [qw/ラ リ ル レ ロ/],
    w => [qw/ワ ヰ    ヱ ヲ/],
    v => [qw/ヴ/],
);

my %gyou = @gyou;

sub kana_order
{
    # I don't know if it's necessary to copy the array or not, but I don't
    # want to take a chance messing up the array.
    my @copy = @gyou;
    return \@copy;
}

# Kana => consonant mapping.

my %siin;

for my $consonant (keys %gyou) {
    for my $kana (@{$gyou{$consonant}}) {
        if ($consonant eq 'a') {
            $siin{$kana} = '';
        }
	else {
            $siin{$kana} = $consonant;
        }
    }
}

# Vowel => kana mapping.

my %dan = (a => [qw/ア カ ガ サ ザ タ ダ ナ ハ バ パ マ ヤ ラ ワ ャ ァ/],
	   i => [qw/イ キ ギ シ ジ チ ヂ ニ ヒ ビ ピ ミ リ ヰ ィ/],
	   u => [qw/ウ ク グ ス ズ ツ ヅ ヌ フ ブ プ ム ユ ル ュ ゥ ヴ/],
	   e => [qw/エ ケ ゲ セ ゼ テ デ ネ ヘ ベ ペ メ レ ヱ ェ/],
	   o => [qw/オ コ ゴ ソ ゾ ト ド ノ ホ ボ ポ モ ヨ ロ ヲ ョ ォ/]);

# Kana => vowel mapping

my %boin;

# List of kana with a certain vowel.

my %vowelclass;

for my $vowel (keys %dan) {
    my @kana_list = @{$dan{$vowel}};
    for my $kana (@kana_list) {
	$boin{$kana} = $vowel;
    }
    $vowelclass{$vowel} = join '', @kana_list;
}

# Kana gyou which can be preceded by a sokuon (small tsu).

# Added d to the list for ウッド BKB 2010-07-20 23:27:07
# Added z for "badge" etc.
# Added g for ドッグ etc.

my @takes_sokuon_gyou = qw/s t k p d z g/;
my @takes_sokuon = (map {@{$gyou{$_}}} @takes_sokuon_gyou);
my $takes_sokuon = join '', @takes_sokuon;

# N

# Kana gyou which need an apostrophe when preceded by an "n" kana.

my $need_apostrophe = join '', (map {@{$gyou{$_}}} qw/a y/);

# Gyou which turn an "n" into an "m" in some kinds of romanization

my $need_m = join '', (map {@{$gyou{$_}}} qw/p b m/);

# YOUON

# Small ya, yu, yo.

my $youon = join '', (@{$gyou{xy}});
my %youon = qw/a ャ u ュ o ョ ou ョ/;

# HEPBURN

# Hepburn irregular romanization

my %hepburn = qw/シ sh ツ ts チ ch ジ j ヅ z ヂ j フ f/;

# Hepburn map from vowel to list of kana with that vowel.

my %hep_vowel = (i => 'シチジヂ', u => 'ヅツフ');
my $hep_list = join '', keys %hepburn;

# Hepburn irregular romanization of ッチ as "tch".

my %hepburn_sokuon = qw/チ t/;
my $hep_sok_list = join '', keys %hepburn_sokuon;

# Hepburn variants for the youon case.

my %hepburn_youon = qw/シ sh チ ch ジ j ヂ j/;
my $is_hepburn_youon = join '', keys %hepburn_youon;

# Kunrei romanization

my %kunrei = qw/ヅ z ヂ z/;

my $kun_list = join '', keys %kunrei;

my %kunrei_youon = qw/ヂ z/;
my $is_kunrei_youon = join '', keys %kunrei_youon;

# LONG VOWELS

# Long vowels, another bugbear of Japanese romanization.

my @aiueo = qw/a i u e o ou/;

# Various ways to display the long vowels.

my %chouonhyouki;
@{$chouonhyouki{circumflex}}{@aiueo} = qw/â  î  û  ê  ô  ô/;
@{$chouonhyouki{macron}}{@aiueo}     = qw/ā  ii  ū  ē  ō  ō/;
@{$chouonhyouki{wapuro}}{@aiueo}     = qw/aa ii uu ee oo ou/;
@{$chouonhyouki{passport}}{@aiueo}   = qw/a  i  u  e  oh oh/;
@{$chouonhyouki{none}}{@aiueo}       = qw/a  ii  u  e  o  o/;

my $vowel_re = qr/[aeiouâêîôûāēōū]/i;
my $no_u_vowel_re = qr/[aeioâêîôāēō]/i;
my $u_re = qr/[uūû]/i;

sub kana2romaji
{
    my ($input, $options) = @_;
    $input = kana2katakana ($input);
    if (! $options) {
        $options = {};
    }
    # Parse the options
    my $debug = $options->{debug};
    my $kunrei;
    my $hepburn;
    my $passport;
    my $common;
    if ($options->{style}) {
        my $style = $options->{style};
        if ($style eq 'kunrei') {
            $kunrei   = 1;
        }
        if ($style eq 'passport') {
            $passport = 1;
        }
        if ($style eq 'hepburn') {
            $hepburn  = 1;
        }
        if ($style eq 'common') {
            $hepburn  = 1;
	    $common = 1;
        }
        if (!$kunrei && !$passport && !$hepburn && $style ne "nihon" &&
	    $style ne 'nippon') {
            croak "Unknown romanization style '$options->{style}'";
        }
    }
    my $wapuro;
    if ($options->{wapuro}) {
        $wapuro = 1;
    }
    my $use_m = 0;
    if ($hepburn || $passport) {
	$use_m = 1;
    }
    if (defined $options->{use_m}) {
	$use_m = $options->{use_m}
    }
    my $ve_type = 'circumflex'; # type of vowel extension to use.
    if ($hepburn) {
	$ve_type = 'macron';
    }
    if ($wapuro) {
        $ve_type = 'wapuro';
    }
    if ($passport) {
	$hepburn = 1;
	$ve_type = 'passport';
	$use_m = 1;
    }
    if ($options->{ve_type}) {
	$ve_type = $options->{ve_type};
    }
    if (! $chouonhyouki{$ve_type}) {
	print STDERR "Warning: unrecognized long vowel type '$ve_type'\n";
	$ve_type = 'circumflex';
    }
    my $wo;
    if ($options->{wo}) {
	$wo = 1;
    }
    # Start of conversion

    # 撥音 (ん)
    $input =~ s/ン(?=[$need_apostrophe])/n\'/g;
    if ($use_m) {
	$input =~ s/ン(?=[$need_m])/m/g;
    }
    $input =~ s/ン/n/g;
    # 促音 (っ)
    if ($hepburn) {
	$input =~ s/ッ([$hep_sok_list])/$hepburn_sokuon{$1}$1/g;
    }
    $input =~ s/ッ([$takes_sokuon])/$siin{$1}$1/g;
    # 長音 (ー)
    for my $vowel (@aiueo) {
	my $ve = $chouonhyouki{$ve_type}->{$vowel};
	my $vowelclass;
	my $vowel_kana;
	if ($vowel eq 'ou') {
	    $vowelclass = $vowelclass{o};
	    $vowel_kana = 'ウ';
	} else {
	    $vowelclass = $vowelclass{$vowel};
	    $vowel_kana = $dan{$vowel}->[0];
	}
	# 長音 (ー) + 拗音 (きょ)
	my $y = $youon{$vowel};
	if ($y) {
	    if ($hepburn) {
		$input =~ s/([$is_hepburn_youon])${y}[ー$vowel_kana]/$hepburn_youon{$1}$ve/g;
	    }
	    $input =~ s/([$vowelclass{i}])${y}[ー$vowel_kana]/$siin{$1}y$ve/g;
	}
	if ($hepburn && $hep_vowel{$vowel}) {
	    $input =~ s/([$hep_vowel{$vowel}])[ー$vowel_kana]/$hepburn{$1}$ve/g;
	}
	$input =~ s/${vowel_kana}[ー$vowel_kana]/$ve/g;
	$input =~ s/([$vowelclass])[ー$vowel_kana]/$siin{$1}$ve/g; 
    }
    # 拗音 (きょ)
    if ($hepburn) {
	$input =~ s/([$is_hepburn_youon])([$youon])/$hepburn_youon{$1}$boin{$2}/g;
    }
    elsif ($kunrei) {
	$input =~ s/([$is_kunrei_youon])([$youon])/$kunrei_youon{$1}y$boin{$2}/g;
    }
    $input =~ s/([$vowelclass{i}])([$youon])/$siin{$1}y$boin{$2}/g;
    # その他
    if ($wo) {
	$input =~ s/ヲ/wo/g;
	$input =~ s/([アイウエオ])/$boin{$1}/g;
    }
    else {
	$input =~ s/([アイウエオヲ])/$boin{$1}/g;
    }
    $input =~ s/([ァィゥェォ])/q$boin{$1}/g;
    if ($hepburn) {
	$input =~ s/([$hep_list])/$hepburn{$1}$boin{$1}/g;
    }
    elsif ($kunrei) {
	$input =~ s/([$kun_list])/$kunrei{$1}$boin{$1}/g;
    }
    $input =~ s/([カ-ヂツ-ヱヴ])/$siin{$1}$boin{$1}/g;
    $input =~ s/q($vowel_re)/x$1/g;
    if ($common) {
	# Convert kana + small vowel into thingumibob, if there is a
	# consonant before.
	$input =~ s/([^\Waiueo])$vowel_re[x]($vowel_re)/$1$2/;
	# Convert u + small kana into w + vowel
	$input =~ s/($vowel_re|\b)ux([iue])/$1w$2/i;
    }
    return $input;
}

sub romaji2hiragana
{
    my ($input, $options) = @_;
    if (! $options) {
        $options = {};
    }
    my $katakana = romaji2kana ($input, {wapuro => 1, %$options});
    return kata2hira ($katakana);
}

sub romaji_styles
{
    my ($check) = @_;
    my @styles = (
    {
        abbrev    => "hepburn",
        full_name => "Hepburn",
    }, {
        abbrev    => 'nihon',
        full_name => 'Nihon-shiki',
    }, {
        abbrev    => 'kunrei',
        full_name => 'Kunrei-shiki',
    }, {
	abbrev => 'common',
	full_name => 'common',
    });
    if (! defined ($check)) {
        return (@styles);
    }
    else {
        for my $style (@styles) {
            if ($check eq $style->{abbrev}) {
                return 1;
            }
        }
        return;
    }
}

# Check whether this vowel style is allowed.

sub romaji_vowel_styles
{
    my ($check) = @_;
    my @styles = (
    {
        abbrev    => "macron",
        full_name => "Macron",
    },
    {
        abbrev    => 'circumflex',
        full_name => 'Circumflex',
    },
    {
        abbrev    => 'wapuro',
        full_name => 'Wapuro',
    },
    {
        abbrev    => 'passport',
        full_name => 'Passport',
    },
    {
        abbrev    => 'none',
        full_name => "Do not indicate",
    },
    );
    if (! defined ($check)) {
        return (@styles);
    }
    else {
        for (@styles) {
            if ($check eq $_->{abbrev}) {
                return 1;
            }
        }
        return;
    }
}

my $romaji2katakana;
my $romaji_regex;

my %longvowels;
@longvowels{qw/â  î  û  ê  ô/}  = qw/aー iー uー eー oー/;
@longvowels{qw/ā  ī  ū  ē  ō/}  = qw/aー iー uー eー oー/;
my $longvowels = join '|', sort {length($a)<=>length($b)} keys %longvowels;

sub romaji2kana
{
    my ($input, $options) = @_;
    if (! defined $romaji2katakana) {
	$romaji2katakana = load_convertor ('romaji', 'katakana');
	$romaji_regex = make_regex (keys %$romaji2katakana);
    }
    # Set to true if we want long o to be オウ rather than オー
    my $wapuro;
    # Set to true if we want gumma to be ぐっま and onnna to be おんな.
    my $ime;
    if ($options) {
	$wapuro = $options->{wapuro};
	$ime = $options->{ime};
    }

    if (! defined $input) {
        return;
    }
    $input = lc $input;
    # Deal with long vowels
    if ($wapuro) {
        $input =~ s/[âā]/aa/g;
        $input =~ s/[îī]/ii/g;
        $input =~ s/[êē]/ee/g;
	$input =~ s/[ûū]/uu/g;
	$input =~ s/[ôō]/ou/g;
    }
    else {
	$input =~ s/($longvowels)/$longvowels{$1}/g;
        # Doubled vowels to chouon
        $input =~ s/([aiueo])\1/$1ー/g;
    }
    # Deal with double consonants
    # danna -> だんな
    if ($ime) {
	# IME romaji rules:
	# Allow double n for ん:
	# gunnma -> グンマ, dannna -> ダンナ
	$input =~ s/n{1,2}(?=[nm][aiueo])/ン/g;
	# Substitute sokuon for mm + vowel:
	# gumma -> グッマ
	$input =~ s/m(?=[nm][aiueo])/ッ/g;
    }
    else {
	# Usual romaji rules: Don't allow double n for ん, change
	# gumma to グンマ.
	$input =~ s/[nm](?=[nm][aiueo])/ン/g;
    }
    # shimbun -> しんぶん
    $input =~ s/m(?=[pb]y?[aiueo])/ン/g;
    # tcha -> っちゃ
    $input =~ s/t(?=ch[aiueo])/ッ/g;
    # ccha -> っちゃ
    $input =~ s/c(?=ch[aiueo])/ッ/g;
    # kkya -> っきゃ etc.
    $input =~ s/([kstfhmrgzdbpjqvwy])(?=\1y?[aiueo])/ッ/g;
    # kkya -> っきゃ etc.
    $input =~ s/ttsu/ッツ/g;
    # ssha -> っしゃ
    $input =~ s/([s])(?=\1h[aiueo])/ッ/g;
    # Passport romaji,
    # oh{consonant} -> oo
    if (! $ime) {
	# IMEs do not recognize passport romaji.
	if ($wapuro) {
	    $input =~ s/oh(?=[ksthmrgzdbp])/オウ/g;
	}
	else {
	    $input =~ s/oh(?=[ksthmrgzdbp])/オー/g;
	}
    }
    # All the special cases have been dealt with, now substitute all
    # the kana.
    $input =~ s/($romaji_regex)/$romaji2katakana->{$1}/g;
    return $input;
}

sub is_voiced
{
    my ($sound) = @_;
    if (is_kana ($sound)) {
        $sound = kana2romaji ($sound);
    }
    elsif (my $romaji = is_romaji ($sound)) {
        # Normalize to nihon shiki so that we don't have to worry
        # about ch, j, ts, etc. at the start of the sound.
        $sound = $romaji;
    }
    if ($sound =~ /^[aiueogzbpmnry]/) {
        return 1;
    }
    else {
        return;
    }
}

sub is_romaji
{
    my ($romaji) = @_;
    # Test that $romaji contains only characters which may be
    # romanized Japanese.
    if ($romaji =~ /[^\sa-zāīūēōâîûêô'-]/i) {
        return;
    }
    my $kana = romaji2kana ($romaji, {wapuro => 1});
    if ($kana =~ /^[ア-ンッー\s]+$/) {
        return kana2romaji ($kana, {wapuro => 1});
    }
    return;
}


sub is_romaji_semistrict
{
    my ($romaji) = @_;
    if (! is_romaji ($romaji)) {
	return;
    }
    if ($romaji =~ /
		       # Don't allow small vowels, small tsu, or fya,
		       # fye etc.
		       (fy|l|x|v)y?($vowel_re|ts?u|wa|ka|ke)
		   |
		       # Don't allow hyi, hye, yi, ye.
		       [zh]?y[ieêîē]
		   |
		       # Don't allow tye
		       ty[eêē]
		   |
		       # Don't allow wh-, kw-, gw-, dh-, etc.
		       (wh|kw|gw|dh|thy)$vowel_re
		   |
		       # Don't allow "t'i"
		       [dt]'(i|y?$u_re)
		   |
		       # Don't allow dwu, twu
		       [dt](w$u_re)
		   |
		       hwy$u_re
		   |
		       # Don't allow "wi" or "we".
		       w(i|e)
		   |
		       # Don't allow some non-Japanese double consonants.
		       (?:rr|yy)
		   |
		       # Don't allow 'thi'
		       thi
		   /ix) {
        return;
    }
    return 1;
}

sub is_romaji_strict
{
    my ($romaji) = @_;
    if (! is_romaji ($romaji)) {
	return;
    }
    if ($romaji =~ /
		       # Don't allow small vowels, small tsu, or fya,
		       # fye etc.
		       (fy|l|x|v)y?($vowel_re|ts?u|wa|ka|ke)
		   |
		       # Don't allow hyi, hye, yi, ye.
		       [zh]?y[ieêîē]
		   |
		       # Don't allow tye
		       ty[eêē]
		   |
		       # Don't allow wh-, kw-, gw-, dh-, etc.
		       (wh|kw|gw|dh|thy)$vowel_re
		   |
		       # Don't allow tsa, tsi, tse, tso, fa, fe, fi, fo.
		       (ts|f)$no_u_vowel_re
		   |
		       # Don't allow "t'i"
		       [dt]'(i|y?$u_re)
		   |
		       # Don't allow dwu, twu
		       [dt](w$u_re)
		   |
		       hwy$u_re
		   |
		       # Don't allow "wi" or "we".
		       w(i|e)
		   |
		       # Don't allow 'je', 'che', 'she'
		       (?:[cs]h|j)e
		   |
		       # Don't allow some non-Japanese double consonants.
		       (?:rr|yy)
		   |
		       # Don't allow 'thi'
		       thi
		   /ix) {
        return;
    }
    return 1;
}

sub hira2kata
{
    my (@input) = @_;
    if (!@input) {
        return;
    }
    for (@input) {tr/ぁ-んゔ/ァ-ンヴ/}
    return wantarray ? @input : "@input";
}

sub kata2hira
{
    my (@input) = @_;
    for (@input) {tr/ァ-ンヴ/ぁ-んゔ/}
    return wantarray ? @input : "@input";
}

# Make the list of dakuon stuff.

sub make_dak_list
{
    my @dak_list;
    for (@_) {
	push @dak_list, @{$gyou{$_}};
	push @dak_list, hira2kata (@{$gyou{$_}});
    }
    return @dak_list;
}

my $strip_daku;

sub load_strip_daku
{
    if (!$strip_daku) {
	my %dakuten;
	@dakuten{(make_dak_list (qw/g d z b/))} = 
	    map {$_."゛"} (make_dak_list (qw/k t s h/));
	@dakuten{(make_dak_list ('p'))} = map {$_."゜"} (make_dak_list ('h'));
	my $dakuten = join '', keys %dakuten;
	$strip_daku = make_convertors ("ten_joined", "ten_split", \%dakuten);
    }
}

my %dakuten;
@dakuten{(make_dak_list (qw/g d z b/))} = 
    map {$_."゛"} (make_dak_list (qw/k t s h/));
@dakuten{(make_dak_list ('p'))} = map {$_."゜"} (make_dak_list ('h'));

sub kana2hw2
{
    my $conv = Convert::Moji->new (["oneway", "tr", "あ-ん", "ア-ン"],
				   ["file",
				    getdistfile ("katakana2hw_katakana")]);
    return $conv;
}

my $kata2hw;

sub make_kata2hw
{
   if (!$kata2hw) {
       $kata2hw = make_convertors ('katakana','hw_katakana');
   }
}

my $kana2hw;

sub kana2hw
{
   my ($input) = @_;
   if (! $kana2hw) {
       $kana2hw = kana2hw2 ();
   }
   return $kana2hw->convert ($input);
}

sub katakana2hw
{
    my ($input) = @_;
    make_kata2hw ();
    return $kata2hw->convert ($input);
}

sub hw2katakana
{
    my ($input) = @_;
    if (!$kata2hw) {
        $kata2hw = make_convertors ('katakana','hw_katakana');
    }
    return $kata2hw->invert ($input);
}

sub InHankakuKatakana
{
    return <<'END';
+utf8::Katakana
&utf8::InHalfwidthAndFullwidthForms
END
}

sub wide2ascii
{
    my ($input) = @_;
    $input =~ tr/\x{3000}\x{FF01}-\x{FF5E}/ -~/;
    return $input;
}

sub ascii2wide
{
    my ($input) = @_;
    $input =~ tr/ -~/\x{3000}\x{FF01}-\x{FF5E}/;
    return $input;
}

sub InWideAscii
{
    return <<'END';
FF01 FF5E
3000
END
}

my $kana2morse;

sub load_kana2morse
{
    if (!$kana2morse) {
	$kana2morse = make_convertors ('katakana', 'morse');
    }
}

sub kana2morse
{
    my ($input) = @_;
    load_kana2morse;
    $input = hira2kata ($input);
    $input =~ tr/ァィゥェォャュョッ/アイウエオヤユヨツ/;
    load_strip_daku;
    $input = $strip_daku->convert ($input);
    $input = join ' ', (split '', $input);
    $input = $kana2morse->convert ($input);
    return $input;
}


sub getdistfile
{
    my ($filename) = @_;
    my $dir = __FILE__;
    $dir =~ s!\.pm$!/!;
    my $file = "$dir$filename.txt";
    return $file;
}

sub morse2kana
{
    my ($input) = @_;
    load_kana2morse;
    my @input = split ' ',$input;
    for (@input) {
	$_ = $kana2morse->invert ($_);
    }
    $input = join '',@input;
    $input = $strip_daku->invert ($input);
    return $input;
}

my $kana2braille;

sub load_kana2braille
{
    if (!$kana2braille) {
	$kana2braille = make_convertors ('katakana', 'braille');
    }
}

my %nippon2kana;

for my $k (keys %gyou) {
    for my $ar (@{$gyou{$k}}) {
	my $vowel = $boin{$ar};
	my $nippon = $k.$vowel;
	$nippon2kana{$nippon} = $ar;
    }
}

sub is_kana
{
    my ($may_be_kana) = @_;
    if ($may_be_kana =~ /^[あ-んア-ン]+$/) {
        return 1;
    }
    return;
}

sub is_hiragana
{
    my ($may_be_kana) = @_;
    if ($may_be_kana =~ /^[あ-ん]+$/) {
        return 1;
    }
    return;
}

sub kana2katakana
{
    my ($input) = @_;
    $input = hira2kata($input);
    if ($input =~ /\p{InHankakuKatakana}/) {
	$input = hw2katakana($input);
    }
    return $input;
}

sub kana2braille
{
    my ($input) = @_;
    load_kana2braille;
    $input = kana2katakana ($input);
    load_strip_daku;
    $input = $strip_daku->convert ($input);
    $input =~ s/(.)゛([ャュョ])/'⠘'.$nippon2kana{$siin{$1}.$boin{$2}}/eg;
    $input =~ s/(.)゜([ャュョ])/'⠨'.$nippon2kana{$siin{$1}.$boin{$2}}/eg;
    $input =~ s/(.)([ャュョ])/'⠈'.$nippon2kana{$siin{$1}.$boin{$2}}/eg;
    $input =~ s/([$vowelclass{o}])ウ/$1ー/g;
    $input = $kana2braille->convert ($input);
    $input =~ s/(.)([⠐⠠])/$2$1/g;
    return $input;
}

sub braille2kana
{
    my ($input) = @_;
    load_kana2braille;
    $input =~ s/([⠐⠠])(.)/$2$1/g;
    $input = $kana2braille->invert ($input);
    $input =~ s/⠘(.)/$nippon2kana{$siin{$1}.'i'}.'゛'.$youon{$boin{$1}}/eg;
    $input =~ s/⠨(.)/$nippon2kana{$siin{$1}.'i'}.'゜'.$youon{$boin{$1}}/eg;
    $input =~ s/⠈(.)/$nippon2kana{$siin{$1}.'i'}.$youon{$boin{$1}}/eg;
    $input = $strip_daku->invert ($input);
    return $input;
}

my $circled_conv;

sub load_circled_conv
{
    if (!$circled_conv) {
	$circled_conv = make_convertors ("katakana", "circled");
    }
}

sub kana2circled
{
    my ($input) = @_;
    $input = kana2katakana($input);
    load_strip_daku;
    $input = $strip_daku->convert($input);
    load_circled_conv;
    $input = $circled_conv->convert ($input);
    return $input;
}

sub circled2kana
{
    my ($input) = @_;
    load_circled_conv;
    load_strip_daku;
    $input = $circled_conv->invert ($input);
    $input = $strip_daku->invert ($input);
    return $input;
}

sub normalize_romaji
{
    my ($romaji) = @_;
    my $kana = romaji2kana ($romaji, {ve_type => 'wapuro'});
    $kana =~ s/[っッ]/xtu/g;
    my $romaji_out = kana2romaji ($kana, {ve_type => 'wapuro'});
}

my $new2old_kanji;

sub load_new2old_kanji
{
    $new2old_kanji = Convert::Moji->new (
        ['file', getdistfile ('new_kanji2old_kanji')],
    );
}

sub new2old_kanji
{
    my ($new_kanji) = @_;
    if (! $new2old_kanji) {
        load_new2old_kanji ();
    }
    my $old_kanji = $new2old_kanji->convert ($new_kanji);
    return $old_kanji;
}

sub old2new_kanji
{
    my ($old_kanji) = @_;
    if (! $new2old_kanji) {
        load_new2old_kanji ();
    }
    my $new_kanji = $new2old_kanji->invert ($old_kanji);
    return $new_kanji;
}

my $katakana2cyrillic;

sub load_katakana2cyrillic
{
    $katakana2cyrillic = Convert::Moji->new (['file', getdistfile ('katakana2cyrillic')]);
}

sub kana2cyrillic
{
    my ($kana) = @_;
    my $katakana = kana2katakana ($kana);
    $katakana =~ s/ン([アイウエオヤユヨ])/ンъ$1/g;
    if (! $katakana2cyrillic) {
        load_katakana2cyrillic ();
    }
    my $cyrillic = $katakana2cyrillic->convert ($katakana);
    $cyrillic =~ s/н([пбм])/м$1/g;
    return $cyrillic;
}

sub cyrillic2katakana
{
    my ($cyrillic) = @_;
    # Convert the Cyrillic letters to lower case versions of the
    # letters. This table of conversions was made from the one in
    # Wikipedia at <http://en.wikipedia.org/wiki/Cyrillic_alphabets>
    # using Emacs, the revision being
    # <http://en.wikipedia.org/w/index.php?title=Cyrillic_alphabets&oldid=482154809>.
    # I do not know if it covers the alphabets perfectly.
    $cyrillic =~ tr/АБВГДЕЖЗИЙIКЛМНОПРСТУФХЦЧШЩЬЮЯ/абвгдежзийiклмнопрстуфхцчшщьюя/;
    if (! $katakana2cyrillic) {
        load_katakana2cyrillic ();
    }
    my $katakana = $katakana2cyrillic->invert ($cyrillic);
    $katakana =~ s/м/ン/g; 
    $katakana =~ s/ンъ([アイウエオヤユヨ])/ン$1/g;
    return $katakana;
}

my $first2hangul;
my $rest2hangul;

my $first2hangul_re;
my $rest2hangul_re;

sub load_kana2hangul
{
    $first2hangul = load_convertor ('first', 'hangul');
    $rest2hangul = load_convertor ('rest', 'hangul');
    $first2hangul_re = '\b' . make_regex (keys %$first2hangul);
    $rest2hangul_re = make_regex (keys %$rest2hangul);
}

sub kana2hangul
{
    my ($kana) = @_;
    my $katakana = kana2katakana ($kana);
    if (! $first2hangul) {
        load_kana2hangul ();
        $katakana =~ s/($first2hangul_re)/$first2hangul->{$1}/g;
        $katakana =~ s/($rest2hangul_re)/$rest2hangul->{$1}/g;
    }
    return $katakana;
}

sub kana_to_large
{
    my ($kana) = @_;
    $kana =~ tr/ゃゅょぁぃぅぇぉっゎ/やゆよあいうえおつわ/;
    $kana =~ tr/ャュョァィゥェォッヮ/ヤユヨアイウエオツワ/;
    # Katakana phonetic extensions.
    $kana =~ tr/ㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ/クシストヌハヒフヘホムラリルレロ/;
    return $kana;
}

my $circled2kanji;

sub load_circled2kanji
{
    if (! $circled2kanji) {
        $circled2kanji =
        Convert::Moji->new (["file",
                             getdistfile ('circled2kanji')]);
    }
    if (! $circled2kanji) {
        die "ERROR";
    }
}

sub circled2kanji
{
    my ($input) = @_;
    load_circled2kanji ();
    return $circled2kanji->convert ($input);
}

sub kanji2circled
{
    my ($input) = @_;
    load_circled2kanji ();
    return $circled2kanji->invert ($input);
}

my $bracketed2kanji;

sub load_bracketed2kanji
{
    if (! $bracketed2kanji) {
        $bracketed2kanji =
        Convert::Moji->new (["file",
                             getdistfile ('bracketed2kanji')]);
    }
}

sub bracketed2kanji
{
    my ($input) = @_;
    load_bracketed2kanji ();
    return $bracketed2kanji->convert ($input);
}

sub kanji2bracketed
{
    my ($input) = @_;
    load_bracketed2kanji ();
    return $bracketed2kanji->invert ($input);
}

sub InKana
{
    return <<'END';
+utf8::Katakana
+utf8::InKatakana
+utf8::InHiragana
FF9E\tFF9F
FF70
-utf8::IsCn
-30FB
END
    # Explanation of the above gibberish: The funny hex is for dakuten
    # and handakuten half width. The Katakana catches halfwidth
    # katakana, and the InKatakana catches the chouon mark. IsCn means
    # "other, not assigned". 30FB is "Katakana middle dot", which is
    # not kana as far as I know.
}

# お

my $kana2syllable_re = qr/ッ?[アイウエオ-モヤユヨ-ヴ](?:[ャュョァィゥェォ])?ー?ン?/;

sub katakana2syllable
{
    my ($kana) = @_;
    my @pieces;
    while ($kana =~ /($kana2syllable_re)/g) {
        push @pieces, $1;
    }
    return \@pieces;
}

my $square2katakana;

sub load_square2katakana
{
    if (! $square2katakana) {
        $square2katakana =
        Convert::Moji->new (["file",
                             getdistfile ('square-katakana')]);
    }
}

sub square2katakana
{
    load_square2katakana ();
    return $square2katakana->convert (@_);
}

sub katakana2square
{
    load_square2katakana ();
    return $square2katakana->invert (@_);
}

# Turn shima into jima etc.

my %nigori = (qw/
カ ガ
キ ギ
ク グ
ケ ゲ
コ ゴ
サ ザ
シ ジ
ス ズ
セ ゼ
ソ ゾ
タ ダ
チ ヂ
ツ ヅ
テ デ
ト ド
ハ バ
ヒ ビ
フ ブ
ヘ ベ
ホ ボ
/);

my %handaku = (qw/
ハ パ
ヒ ピ
フ プ
ヘ ペ
ホ ポ
/);

sub nigori_first
{
    my ($list) = @_;
    my @nigori;
    for my $kana (@$list) {
	my ($first, $remaining) = split //, $kana, 2;
	my $nf = $nigori{$first};
	if ($nf) {
	    #	print "$kana -> $nf$remaining\n";
	    push @nigori, $nf.$remaining;
	}
	my $hf = $handaku{$first};
	if ($hf) {
	    push @nigori, $hf.$remaining;
	}
    }
    if (@nigori) {
	push @$list, @nigori;
    }
}

1;
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)