package Plagger::Plugin::Filter::StripRSSAd;
use strict;
use base qw( Plagger::Plugin );
use DirHandle;
sub init {
my $self = shift;
$self->SUPER::init(@_);
Plagger->context->autoload_plugin('Filter::BloglinesContentNormalize');
$self->load_patterns();
}
sub load_patterns {
my $self = shift;
my $dir = $self->assets_dir;
my $dh = DirHandle->new($dir) or Plagger->context->error("$dir: $!");
for my $file (grep -f $_->[0] && $_->[1] =~ /^[\w\-\.]+$/,
map [ File::Spec->catfile($dir, $_), $_ ], sort $dh->read) {
$self->load_pattern(@$file);
}
}
sub load_pattern {
my($self, $file, $base) = @_;
Plagger->context->log(debug => "loading $file");
if ($file =~ /\.yaml$/) {
$self->load_yaml($file, $base);
} else {
$self->load_regexp($file, $base);
}
}
sub load_regexp {
my($self, $file, $base) = @_;
open my $fh, $file or Plagger->context->error("$file: $!");
my $re = join '', <$fh>;
chomp($re);
push @{$self->{pattern}}, { site => $base, re => qr/$re/ };
}
sub load_yaml {
my($self, $file, $base) = @_;
my $pattern = eval { YAML::LoadFile($file) }
or Plagger->context->error("$file: $@");
push @{$self->{pattern}}, { site => $base, %$pattern };
}
sub register {
my($self, $context) = @_;
$context->register_hook(
$self,
'update.entry.fixup' => \&update,
);
}
sub update {
my($self, $context, $args) = @_;
my $body = $args->{entry}->body;
for my $pattern (@{ $self->{pattern} }) {
if (my $re = $pattern->{re}) {
if (my $count = $body =~ s!$re!defined($1) ? $1 : ''!egs) {
Plagger->context->log(info => "Stripped $pattern->{site} Ad on " . $args->{entry}->link);
}
} elsif (my $cond = $pattern->{condition}) {
local $args->{body} = $body;
if (eval $cond && $pattern->{strip}) {
$args->{feed}->delete_entry($args->{entry});
Plagger->context->log(info => "Stripped Ad entry " . $args->{entry}->link);
} elsif ($@) {
Plagger->context->log(error => "Error evaluating $cond: $@");
}
}
}
$args->{entry}->body($body);
}
1;
__END__
=head1 NAME
Plagger::Plugin::Filter::StripRSSAd - Strip RSS Ads from feed content
=head1 SYNOPSIS
- module: Filter::StripRSSAd
=head1 DESCRIPTION
This plugin strips RSS context based ads from feed content, like
Google AdSense or rssad.jp. It uses quick regular expression to strip
the images and map tags.
=head1 AUTHOR
Tatsuhiko Miyagawa, Masahiro Nagano
=head1 SEE ALSO
L<Plagger>
=cut