use strict;
# http://labs.unoh.net/2007/02/post_65.html to dat/convert-map-utf8.yaml
# perl tools/convert-map-scrape.pl > dat/convert-map-utf8.yaml
use Encode;
use Encode::JP::Mobile 0.09;
use LWP::Simple;
use YAML;
my %files;
for my $file (qw( emoji_e2is.txt emoji_i2es.txt emoji_s2ie.txt )) {
$files{$file} = decode('cp932', get("http://labs.unoh.net/$file"));
}
my $no2uni = {};
for my $file (keys %files) {
for my $line (split /\n/, $files{$file}) {
next unless $line =~ /^%/;
my ($no, $byte) = split "\t", $line;
$file eq 'emoji_i2es.txt' && do {
$no2uni->{$no} = sprintf '%04X', ord decode('x-sjis-docomo', pack 'H*', $byte);
};
$file eq 'emoji_e2is.txt' && do {
$no2uni->{$no} = sprintf '%04X', ord decode('x-sjis-kddi-auto', pack 'H*', $byte);
};
$file eq 'emoji_s2ie.txt' && do {
$no2uni->{$no} = sprintf '%04X', ord decode('x-sjis-softbank', "\x1b\x24$byte\x0f");
};
}
}
my %map;
for my $file (keys %files) {
for my $line (split /\n/, $files{$file}) {
next unless $line =~ /^%/;
chomp $line;
$file eq 'emoji_i2es.txt' && do {
my ($docomo, undef, $kddi, $softbank) = split "\t", $line;
$map{docomo}{ $no2uni->{$docomo} }->{kddi} = get_unicode($kddi);
$map{docomo}{ $no2uni->{$docomo} }->{softbank} = get_unicode($softbank);
};
$file eq 'emoji_e2is.txt' && do {
my ($kddi, undef, $docomo, $softbank) = split "\t", $line;
$map{kddi}{ $no2uni->{$kddi} }->{docomo} = get_unicode($docomo);
$map{kddi}{ $no2uni->{$kddi} }->{softbank} = get_unicode($softbank);
};
$file eq 'emoji_s2ie.txt' && do {
my ($softbank, undef, $docomo, $kddi) = split "\t", $line;
$map{softbank}{ $no2uni->{$softbank} }->{docomo} = get_unicode($docomo);
$map{softbank}{ $no2uni->{$softbank} }->{kddi} = get_unicode($kddi);
};
}
}
sub get_unicode($) {
my $key = shift;
if ($key =~ /^%/) {
$key =~ s/(%[^%]+%)/$no2uni->{$1}/ge;
return +{ type => 'pictogram', unicode => $key };
} else {
return +{ type => 'name', unicode => $key };
}
}
binmode STDOUT, ":utf8";
print YAML::Dump(\%map);