The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package WWW::MobileCarrierJP::EZWeb::PictogramInfo;
use strict;
use warnings;
use CAM::PDF;
use LWP::UserAgent;
use Carp;
use Encode;

my $url = 'http://www.au.kddi.com/ezfactory/tec/spec/pdf/typeD.pdf';
sub url { [ $url ] }

sub scrape {
    my $ua = LWP::UserAgent->new(agent => __PACKAGE__);
    my $res = $ua->get($url);
    if ($res->is_success) {
        return _process_pdf($res->content);
    } else {
        croak "Can't fetch $url";
    }
}

sub _process_pdf {
    my $content = shift;
    my $doc  = CAM::PDF->new($content);

    my @res;
    foreach my $p (1..$doc->numPages()) {
        my $text = decode("shift_jis", $doc->getPageText($p));
        while ($text =~ m/(\d+)(?: |[abcdef \x{FF43}\x{3000}]+|\x{306A}\x{3057} )([^ ]*) ([0-9A-F]{4})([0-9A-F]{4})([0-9A-F]{4})([0-9A-F]{4})/gs) {
            my %data;
            @data{qw( number name sjis unicode email_jis email_sjis )} = ($1, $2, $3, $4, $5, $6);
            $data{name} =~ s/\n//g;
            push @res, \%data;
        }
    }

    @res = sort { $a->{number} <=> $b->{number} } @res;
    return \@res;
}

1;
__END__

=encoding utf-8

=head1 NAME

WWW::MobileCarrierJP::EZWeb::PictogramInfo - 絵文字(EZWeb)

=head1 SYNOPSIS

    use WWW::MobileCarrierJP::EZWeb::PictogramInfo;
    WWW::MobileCarrierJP::EZWeb::PictogramInfo->scrape();

=head1 AUTHOR

Tokuhiro Matsuno < tokuhirom gmail com >

=head1 THANKS

This code is copied from Encode-JP-Mobile.

miyagawa++


=head1 SEE ALSO

L<WWW::MobileCarrierJP>