#!/home/ben/software/install/bin/perl
use warnings;
use strict;
use Lingua::JA::Moji ':all';
use Text::Fuzzy;
use utf8;
binmode STDOUT, ":utf8";
my $infile = '/home/ben/data/edrdg/edict';
open my $in, "<:encoding(EUC-JP)", $infile or die $!;
my @kana;
while (<$in>) {
my $kana;
if (/\[(\p{InKana}+)\]/) {
$kana = $1;
}
elsif (/^(\p{InKana}+)/) {
$kana = $1;
}
if ($kana) {
$kana = kana2katakana ($kana);
push @kana, $kana;
}
}
printf "Starting fuzzy searches over %d lines.\n", scalar @kana;
search ('ウオソウコ');
search ('アイウエオカキクケコバビブベボハヒフヘホ');
search ('アルベルトアインシュタイン');
search ('バババブ');
search ('バババブアルベルト');
exit;
sub search
{
my ($silly) = @_;
my $max = 10;
my $search = Text::Fuzzy->new ($silly, max => $max);
my $n = $search->nearest (\@kana);
if (defined $n) {
printf "$silly nearest is $kana[$n] (distance %d)\n",
$search->last_distance ();
}
else {
printf "Nothing like '$silly' was found within $max edits.\n";
}
}