lib/WWW/MobileCarrierJP/DoCoMo/UserAgent.pm

package WWW::MobileCarrierJP::DoCoMo::UserAgent;
use WWW::MobileCarrierJP::Declare;
use HTML::Selector::XPath 'selector_to_xpath';
use charnames ':full';

my $URL = 'http://www.nttdocomo.co.jp/service/developer/make/content/spec/useragent/index.html';
sub url { [$URL] }

# 不要なカラム
our @TRASH_XPATHS = (
    '//td[@rowspan]',                     # シリーズ名とか
    '//td[@class="acenter middle"]',      # シリーズ名
    selector_to_xpath('.brownLight'),     # ヘッダ
);

sub scrape {
    my $content = get($URL);
    $content =~ s{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">}{};
    my $result = scraper {
        process '//tr', 'rows[]', scraper {
            my $tree = as_tree($_);

            # 不要なカラムを削除しておく
            for my $xpath (@TRASH_XPATHS) {
                for my $node ($tree->findnodes($xpath)) {
                    $node->delete;
                }
            }

            my @cols = $tree->findnodes('//td');
            return if (scalar @cols <= 1);

            my $model = do {
                local $_ = shift @cols;
                $_ =  $_->as_text;
                s/\N{GREEK SMALL LETTER MU}/MYU/;
                s/\s?（.+//;
                s/\s//g;
                uc $_;
            };

            my $ua = sub {
                for my $col (@cols) {
                    $col = $col->as_HTML;
                    next unless $col =~ /DoCoMo/;

                    $col = (
                        grep {/DoCoMo/}
                        split m[(?:&nbsp;|<br\s*/>|\r|\n)]i, $col
                    )[0]; # XXX: 複数あるときは一番最初のがブラウザ
                    $col =~ s/ *（.+//;
                    $col =~ s/<[^><]+>//g; # remove tags
                    $col =~ s/(&#xA0.*)$//;
                    $col =~ s/^\s*//;
                    $col =~ s/\s*$//;
                    return $col;
                }
            }->();
            debug ("$model - $ua");
            debug ("------------------------------------");

            $tree = $tree->delete;

            result qw/model user_agent/;
            return {model => $model, user_agent => $ua};
        };
    }->scrape( $content )->{rows};

    @$result = grep { %$_ } @$result; # remove empty rows

    return $result;
}

1;
__END__

=encoding utf-8

=head1 NAME

WWW::MobileCarrierJP::DoCoMo::UserAgent - UserAgent(DoCoMo)

=head1 SYNOPSIS

    use WWW::MobileCarrierJP::DoCoMo::UserAgent;
    WWW::MobileCarrierJP::DoCoMo::UserAgent->scrape();

=head1 AUTHOR

Takefumi Kimura

Tokuhiro Matsuno < tokuhirom gmail com >

=head1 SEE ALSO

L<WWW::MobileCarrierJP>,
L<http://www.nttdocomo.co.jp/service/imode/make/content/spec/useragent/index.html>

	Global
`s`	Focus search bar
`?`	Bring up this help dialog

	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)

	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse

	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)