The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
use strict;
use warnings;
use FindBin;
use Path::Class;
use YAML;
use Encode;
use Encode::JP::Mobile;

my @encodings = (
    +{
        name        => 'x-sjis-airh-raw',
        alias       => "x-sjis-airedge-raw",
        carrier     => 'AirHPhone',
        table_maker => \&airh_table,
    },
    +{
        name        => 'x-sjis-docomo-raw',
        alias       => 'x-sjis-imode-raw',
        carrier     => 'DoCoMo',
        table_maker => table_maker_maker( 'docomo-table.yaml' ),
    },
    +{
        name        => 'x-sjis-kddi-auto-raw',
        alias       => 'x-sjis-ezweb-auto-raw',
        carrier     => 'KDDI/AU',
        table_maker => table_maker_maker( 'kddi-table.yaml', 'unicode_auto' ),
    },
    +{
        name        => 'x-sjis-kddi-cp932-raw',
        alias       => 'x-sjis-ezweb-cp932-raw',
        carrier     => 'KDDI/AU',
        table_maker => table_maker_maker( 'kddi-table.yaml' ),
    },
    +{
        name        => 'x-sjis-softbank-auto-raw',
        alias       => 'x-sjis-vodafone-auto-raw',
        carrier     => 'SoftBank',
        table_maker => table_maker_maker( 'softbank-table.yaml', 'unicode', 'sjis_auto' ),
        is_skip     => \&is_skip_softbank_auto,
    },
);

&main;exit;

sub airh_table {
    my $sort_key = shift;

    my @ret;
    my $add_to_ret = sub {
        my $x = shift;
        push @ret,
          +{
            unicode => sprintf('%X', $x ),
            sjis    => unpack( 'H*', encode( 'cp932', chr $x ) ),
          };
    };
    my $map = join "", Encode::JP::Mobile::InAirEdgePictograms(), Encode::JP::Mobile::InDoCoMoPictograms();
    for my $line (split /\n/, $map) {
        if ($line =~ /\t/) {
            my ($min, $max) = map { hex $_ } split /\t/, $line;
            my $i = $min;
            while ($i <= $max) {
                $add_to_ret->($i);
                $i++;
            }
        } else {
            $add_to_ret->(hex $line);
        }
    }
    @ret;
}

sub table_maker_maker {
    my ($file, $unicode_key, $sjis_key) = @_;
    $unicode_key ||= 'unicode';
    $sjis_key ||= 'sjis';

    sub {
        map { +{ unicode => $_->{$unicode_key}, sjis => $_->{$sjis_key} } }
          grep { $_->{$sjis_key} }
          sort { hex( $a->{$unicode_key} ) <=> hex( $b->{$unicode_key} ) }
          @{ YAML::LoadFile( file( $FindBin::Bin, '..', 'dat', $file ) ) };
    };
}

sub is_skip_softbank_auto {
    my $line = shift;

    # x-sjis-softbank-auto ではIBM拡張漢字の領域をつぶして絵文字用につかっている模様。
    # たとえば、U+52AF は IBM EXT では \xFB\x77 で、NEC EXT. では \xEE\x5B と表現できる(see cp932.ucm)
    # このうち、\xFB\x77 の方を絵文字領域として使用しているのだ。
    if ($line =~ /^<U[0-9A-F]+> (\S+) \|\d/) {
        if (in_softbank_pictogram($1)) {
            return 1;
        }
    }
    return;
}

my $sjis_auto_map;
sub in_softbank_pictogram {
    my $sjis = shift;
    $sjis_auto_map ||=
      +{ map { ( uc hexify( $_->{sjis} ) ) => 1 }
          grep { $_->{sjis} }
          table_maker_maker( 'softbank-table.yaml', 'unicode', 'sjis_auto' )->()
      };
    return $sjis_auto_map->{uc $sjis};
}

sub hexify {
    local $_ = shift;
    s/(..)/\\x$1/g;
    $_;
}

sub header {
    my $encoding = shift;

    return <<"...";
<code_set_name> "$encoding->{name}"
<code_set_alias> "$encoding->{alias}"
...
}

sub footer() {
    return <<'...';
END CHARMAP
...
}

sub generate_ucm {
    my $encoding = shift;
    my $cp932 = file($FindBin::Bin, '..', 'ucm', 'cp932.ucm')->openr;

    my $fh = file($FindBin::Bin, '..', 'ucm', "$encoding->{name}.ucm")->openw;
    $fh->print(header($encoding));
    while (<$cp932>) {
        next if /^#/;
        next if /<code_set_name> "cp932"/;
        next if /PRIVATE USE AREA/;
        next if /END CHARMAP/;
        next if $encoding->{is_skip} && $encoding->{is_skip}->($_);
        $fh->print($_);
    }
    $fh->print('<U301C> \x81\x60 |1 # WAVE DUSH', "\n"); # ad-hoc solution for  FULLWIDTH TILDE Problem.
    $fh->print("# below are copied from $encoding->{carrier}'s pictogram map\n");
    for my $row ($encoding->{table_maker}->()) {
        $fh->print(sprintf "<U%s> %s |0 # $encoding->{carrier} Pictogram\n", $row->{'unicode'}, hexify($row->{sjis}));
    }
    $fh->print(footer);
}

sub main {
    for my $encoding (@encodings) {
        generate_ucm($encoding);
    }
}