The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
use strict;
use warnings;
use Path::Class;
use YAML;
use FindBin;

# how to make 103-111-HTML_2.0.0.txt
# 1. get PDF from http://www2.developers.softbankmobile.co.jp/dp/tool_dl/download.php?docid=120&companyid=
# 2. xdoc2txt -n 103-111-HTML_2.0.0.pdf > 103-111-HTML_2.0.0.txt
#  ref. http://www31.ocn.ne.jp/~h_ishida/xdoc2txt.html

my $pdf_text_file = shift or die "Usage: softbank-scrape-autosjis.pl 103-111-HTML_2.0.0.txt";
my $pdf_fh =file($pdf_text_file)->openr;

my %map;
while (my $line = <$pdf_fh>) {
    chomp $line;
    next if $line !~ /^&#\d\d\d\d\d;\s*&#x/;

    my @codes = split /\s+/, $line;
    next if @codes != 4;

    my $unicode  = strip_entity_ref_mark($codes[1]);
    my $shiftjis = $codes[3];

    $map{ $unicode } = $shiftjis;
}
close $pdf_fh;

print Dump(\%map);


sub strip_entity_ref_mark {
    local $_ = shift;
    s/(^&#x|;$)//g;
    $_;
}