The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Lingua::JA::Kana;
use warnings;
use strict;
use utf8;

our $VERSION = sprintf "%d.%02d", q$Revision: 0.7 $ =~ /(\d+)/g;

use re ();
require Exporter;
use base qw/Exporter/;
our @EXPORT = qw(
 hira2kata hiragana2katakana
 kata2hira katakana2hiragana
 romaji2hiragana romaji2katakana
 kana2romaji
 hankaku2zenkaku zenkaku2hankaku
);

our $USE_REGEXP_ASSEMBLE = do {
    eval 'require Regexp::Assemble';
    $@ ? 0 : 1;
};


our $Re_Vowels     = qr/[aeiou]/i;
our $Re_Consonants = qr/[bcdfghjklpqrstvwxyz]/i; # note the absense of n and m

our %Kata2Hepburn = qw(
  ア   a       イ   i       ウ   u       エ   e       オ   o
  ァ   xa      ィ   xi      ゥ   xu      ェ   xe      ォ   xo
  カ   ka      キ   ki      ク   ku      ケ   ke      コ   ko
  ガ   ga      ギ   gi      グ   gu      ゲ   ge      ゴ   go
  キャ kya                  キュ kyu                  キョ kyo
  ギャ gya                  ギュ gyu                  ギョ gyo
  サ   sa      シ   shi     ス   su      セ   se      ソ   so
  ザ   za      ジ   ji      ズ   zu      ゼ   ze      ゾ   zo
  シャ sha                  シュ shu                  ショ sho
  ジャ ja                   ジュ ju                   ジョ jo
  タ   ta      チ   chi     ツ   tsu     テ   te      ト   to
               ティ ti      トゥ tu
  ダ   da      ディ di      ドゥ du      デ   de      ド   do
               ヂ   dhi     ヅ   dhu
  チャ cha                  チュ chu     チェ che     チョ cho
  ヂャ dha                  ヂュ dhu     ヂェ dhe     ヂョ dho
  ナ   na      ニ   ni      ヌ   nu      ネ   ne      ノ   no
  ニャ nya                  ニュ nyu                  ニョ nyo
  ハ   ha      ヒ   hi      フ   fu      ヘ   he      ホ   ho
  ヒャ hya                  ヒュ hyu                  ヒョ hyo
  バ   ba      ビ   bi      ブ   bu      ベ   be      ボ   bo
  ビャ bya                  ビュ byu                  ビョ byo
  パ   pa      ピ   pi      プ   pu      ペ   pe      ポ   po
  ピャ pya                  ピュ pyu                  ピョ pyo
  ファ fa      フィ fi                   フェ fe      フォ fo
  マ   ma      ミ   mi      ム   mu      メ   me      モ   mo
  ミャ mya                  ミュ myu                  ミョ myo
  ヤ   ya                   ユ   yu      イェ ye      ヨ   yo
  ャ   xya                  ュ   xyu                  ョ   xyo
  ラ   ra      リ   ri      ル   ru      レ   re      ロ   ro
  リャ rya                  リュ ryu                  リョ ryo
  ワ   wa      ヰ   wi                   ヱ   we      ヲ   wo
  ウァ wa      ウィ wi                   ウェ we      ウォ wo
  ヴァ va      ヴィ vi      ヴ   vu      ヴェ ve      ヴォ vo
  ン   n
);

our %Kana2Hepburn =
  ( %Kata2Hepburn, map { katakana2hiragana($_) } %Kata2Hepburn );

our $Re_Kana2Hepburn = do {
    if ($USE_REGEXP_ASSEMBLE) {
        my $ra = Regexp::Assemble->new();
        $ra->add($_) for keys %Kana2Hepburn;
        $ra->re;
    }
    else {
        my $str = join '|', keys %Kana2Hepburn;
        qr/(?:$str)/;
    }
};

our %Romaji2Kata = qw(
  a    ア      i    イ      u    ウ      e    エ      o    オ
  xa   ァ      xi   ィ      xu   ゥ      xe   ェ      xo   ォ
  ka   カ      ki   キ      ku   ク      ke   ケ      ko   コ
  ga   ガ      gi   ギ      gu   グ      ge   ゲ      go   ゴ
  kya  キャ                 kyu キュ                  kyo  キョ
  gya  ギャ                 gyu ギュ                  gyo  ギョ 
  sa   サ      shi  シ      su   ス      se   セ      so   ソ
               si   シ
  za   ザ      ji   ジ      zu   ズ      ze   ゼ      zo   ゾ
               zi   ジ
  sha  シャ                 shu  シュ                 sho  ショ
  ja   ジャ                 ju   ジュ                 jo   ジョ
  sya  シャ                 syu  シュ                 syo  ショ
  ta   タ      chi  チ      tsu  ツ      te   テ      to   ト
                            xtu  ッ 
               ti   ティ    tu   トゥ
  da   ダ      di   ディ    du   ドゥ    de   デ      do   ド
               dhi  ヂ      dhu  ヅ
  cha  チャ                 chu  チュ    che  チェ    cho  チョ
  tya  チャ                 tyu  チュ    tye  チェ    tyo  チョ
  dha  ヂャ                 dhu  ヂュ    dhe  ヂェ    dho  ヂョ
  dya  ヂャ                 tyu  ヂュ    tye  ヂェ    tyo  ヂョ
  na   ナ      ni   ニ      nu   ヌ      ne   ネ      no   ノ
  nya ニャ                  nyu ニュ                  nyo ニョ 
  ha   ハ      hi   ヒ      fu   フ      he   ヘ      ho   ホ
                            hu   フ
  hya  ヒャ                 hyu  ヒュ                 hyo  ヒョ
  ba   バ      bi   ビ      bu   ブ      be   ベ      bo   ボ
  bya  ビャ                 byu  ビュ                 byo  ビョ
  pa   パ      pi   ピ      pu   プ      pe   ペ      po   ポ
  pya  ピャ                 pyu  ピュ                 pyo  ピョ
  fa   ファ    fi   フィ                 fe   フェ    fo   フォ
  ma   マ      mi   ミ      mu   ム      me   メ      mo   モ
  mya ミャ                  myu ミュ                  myo ミョ 
  ya   ヤ                   yu   ユ      ye   イェ    yo   ヨ
  xya  ャ                   xyu  ュ                   xyo  ョ
  ra   ラ      ri   リ      ru   ル      re   レ      ro   ロ
  rya  リャ                 ryu  リュ                 ryo  リョ
  la   ラ      li   リ      lu   ル      le   レ      lo   ロ
  wa   ワ                                             wo   ヲ
               wi   ウィ                 we   ウェ
  va   ヴァ    vi   ヴィ    vu   ヴ      ve   ヴェ    vo   ヴォ
);

our $Re_Romaji2Kata = do {
    if ($USE_REGEXP_ASSEMBLE) {
        my $ra = Regexp::Assemble->new();
        $ra->add($_) for keys %Romaji2Kata;
        my $str = $ra->re;
        if ($] >= 5.009005) {
            my ($pattern, $mod) = re::regexp_pattern($str);
            $str = $pattern;
        } else {
            substr( $str, 0,  8, '' );    # remove '(?-xism:'
            substr( $str, -1, 1, '' );    # and ')';
        }
        qr/$str/i;                    # and recompile with i
    }
    else {
        my $str = join '|', sort {length($b) <=> length($a)} keys %Romaji2Kata;
        qr/(?:$str)/i;
    }
};


our %Kana2Romaji    = %Kana2Hepburn;
our $Re_Kana2Romaji = $Re_Kana2Hepburn;

sub katakana2hiragana{
  my $str = shift;
  $str =~ tr/ァ-ンヴ/ぁ-んゔ/;
  $str;
}

sub hiragana2katakana{
  my $str = shift;
  $str =~ tr/ぁ-んゔ/ァ-ンヴ/;
  $str;
}

{
    no warnings 'once';
    *kata2hira = \&katakana2hiragana;
    *hira2kata = \&hiragana2katakana;
}

sub romaji2katakana{
  my $str = shift;
  # step 1; tta -> ッta
  $str =~ s{ ($Re_Consonants) \1 }{ "ッ$1" }msxgei;
  # step 2;
  $str =~ s{ ($Re_Romaji2Kata) }{ $Romaji2Kata{lc $1} || $1 }msxgei;
  # step 3;
  $str =~ s{ ([ァ-ン])[mn] }{ "$1ン" }msxgei;
  $str;
}

sub romaji2hiragana{ katakana2hiragana(romaji2katakana(shift)) };

sub kana2romaji{
  my $str = shift;
  # step 1;
  $str =~ s{ ($Re_Kana2Romaji) }{ $Kana2Romaji{$1} || $1 }msxge;
  # step 2; ッta -> tta
  $str =~ s{ [っッ]($Re_Consonants) }{ "$1$1" }msxge;
  # step 3; oー -> oo
  $str =~ s{ ($Re_Vowels)ー }{ "$1$1" }msxge;
  $str;
}


if ($0 eq __FILE__){
    warn $USE_REGEXP_ASSEMBLE;
    binmode STDOUT, ':utf8';
    local $\ = "\n";
    warn $Re_Romaji2Kata;
    print romaji2katakana("Dan Kogai");
    print romaji2katakana("shimbashi");
    print romaji2katakana("konnichiwa");
    print romaji2hiragana("Dan Kogai");
    print romaji2hiragana("shimbashi");
    warn $Re_Kana2Romaji;
    print kana2romaji("ダンコガイ");
    print kana2romaji("マイッタ");
    print kana2romaji("シンバシ");
    print romaji2hiragana("ryoukai");   # RT#39590
    print romaji2hiragana("virama");    # RT#45402
}

use Encode;
use Encode::JP::H2Z;
my $eucjp = Encode::find_encoding('eucjp');
sub hankaku2zenkaku { 
    my $str = $eucjp->encode(shift);
    Encode::JP::H2Z::h2z(\$str);
    $eucjp->decode($str);
}

sub zenkaku2hankaku { 
    my $str = $eucjp->encode(shift);
    Encode::JP::H2Z::z2h(\$str);
    $eucjp->decode($str);
}


1; # End of Lingua::JA::Kana
__END__

=head1 NAME

Lingua::JA::Kana - Kata-Romaji related utilities

=head1 VERSION

$Id: Kana.pm,v 0.7 2012/08/06 01:56:17 dankogai Exp $

=head1 SYNOPSIS

    use Lingua::JA::Kana;

    my $hiragana = romaji2hiragana("ohayou");
    my $katakana = romaji2katakana("ohasumi");
    my $romaji   = kana2romaji($str);

=head1 DESCRIPTION

This module is a simple utility to convert katakana, hiragana, and romaji
at ease.  This module makes use of utf8 semantics which is introduced in
Perl 5.8.0 and became stable enough in Perl 5.8.1 so you need Perl 5.8.1
or better.

Also note that strings in this module must be utf8-flagged.  If they are
not, you can use L<Encode> to do so.

  use Encode;
  use Lingua::JA::Kana
  my $romaji = kana2romaji(decode_utf8 $octet);

See L<Encode>, L<perluniintro>, and L<perlunicode> for details.

=head1 EXPORT

This module exports functions below:

=head2 hiragana2katakana

Converts all occurance of hiragana to katakana.

  my $hiragana = hiragana2katakana($str);

=over 2

=item hira2kata

its alias.

=back

=head2 katakana2hiragana

Converts all occurance of katakana to hiragana. C<kata2hira> is an alias thereof.

  my $katakana = katakana2hiragana($str);

=over 2

=item kata2hira

its alias.

=back

=head2 romaji2katakana

Converts all occurance of romaji to katakana.

  my $romaji = romaji2hiragana($str);

=head2 romaji2hiragana

Converts all occurance of romaji to hiragana.

  my $katakana = romaji2hiragana($str);

=head2 kana2romaji

Converts all occurance of kana (both katakana and hiragana) to romaji.

  my $romaji = kana2romaji($str);

=head2 hankaku2zenkaku

Converts all occurance of hankaku to zenkaku.

  my $romaji = hankaku2zenkaku($str);

=head2 zenkaku2hankaku

Converts all occurance  of zenkaku to hankaku.

  my $romaji = zenkaku2hankaku($str);

=head1 INSTALLATION

To install this module, run the following commands:

    perl Makefile.PL
    make
    make test
    make install

=head1 AUTHOR

Dan Kogai, C<< <dankogai at dan.co.jp> >>

=head1 BUGS

Please report any bugs or feature requests to C<bug-lingua-ja-kana at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Lingua-JA-Kana>.  I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc Lingua::JA::Kana


You can also look for information at:

=over 4

=item * RT: CPAN's request tracker

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Lingua-JA-Kana>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/Lingua-JA-Kana>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/Lingua-JA-Kana>

=item * Search CPAN

L<http://search.cpan.org/dist/Lingua-JA-Kana>

=back

=head1 ACKNOWLEDGEMENTS

L<Lingua::JA::Romaji>

=head1 COPYRIGHT & LICENSE

Copyright 2007 Dan Kogai, all rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.


=cut