The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Plagger::Plugin::CustomFeed::GoogleNews;
use strict;
use base qw( Plagger::Plugin );

use Plagger::UserAgent;
use Plagger::Util;
use URI;
use URI::QueryParam;

sub register {
    my($self, $context) = @_;
    $context->register_hook(
        $self,
        'customfeed.handle' => \&handle,
    );
}

sub handle {
    my($self, $context, $args) = @_;

    if ($args->{feed}->url =~ m!^http://news\.google\.(?:co\.jp|com)/! && $args->{feed}->url !~ /output=(?:rss|atom)/) {
        $self->aggregate($context, $args);
        return 1;
    }

    return;
}

sub aggregate {
    my($self, $context, $args) = @_;

    my $url = URI->new($args->{feed}->url);

    # ned=jp -> ned=tjp
    my $ned = $url->query_param('ned') || 'us';
       $ned = "t$ned" unless $ned =~ /^t/;
    $url->query_param(ned => $ned);

    $context->log(info => "GET $url");

    my $agent = Plagger::UserAgent->new;
    my $res = $agent->fetch($url, $self);

    if ($res->is_error) {
        $context->log(error => "GET $url failed: " . $res->status);
        return;
    }

    my $content = Plagger::Util::decode_content($res);
    my $title   = Plagger::Util::extract_title($content);

    my $feed = Plagger::Feed->new;
    $feed->title($title);
    $feed->link($args->{feed}->url);

    while ($content =~ m!<a href="(http://[^"]*)" id=r-\d[^>]*>(.*?)</a>!g) {
        my($link, $title) = ($1, $2);
        $title =~ s!<b>(.*?)</b>!$1!g;

        my $entry = Plagger::Entry->new;
        $entry->title($title);
        $entry->link($link);

        $feed->add_entry($entry);
    }

    $context->update->add($feed);
}

1;

__END__

=head1 NAME

Plagger::Plugin::CustomFeed::GoogleNews - Create Google News custom feed

=head1 SYNOPSIS

  - module: Subscription::Config
    config:
      feed:
        - http://news.google.com/news?ned=jp&rec=0&topic=s
        - http://news.google.co.jp/news?hl=ja&ned=jp&q=%E5%9B%B2%E7%A2%81

  - module: CustomFeed::GoogleNews

=head1 DESCRIPTION

This plugin creates a custom feed off of Google News HTML pages. Use
with EntryFullText plugin to get full content and accurate datetime of
articles.

=head1 AUTHOR

Tatsuhiko Miyagawa

=head1 SEE ALSO

L<Plagger>

=cut