The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Plagger::Plugin::Filter::Base;
use strict;
use base qw( Plagger::Plugin );

sub register {
    my($self, $context) = @_;
    $context->register_hook(
        $self,
        'update.entry.fixup' => \&update,
    );
}

sub update {
    my($self, $context, $args) = @_;
    my $body = $args->{entry}->body;

    my $count;
    if ($self->conf->{text_only}) {
        ($count, $body) = $self->filter_textonly($body);
    } else {
        ($count, $body) = $self->filter($body);
    }

    if ($count) {
        $self->log(info => "Filtered $count occurence(s)");
    }

    $args->{entry}->body($body);
}

sub filter {
    my $self = shift;
    Plagger->context->error(ref($self) . " should override filter");
}

sub filter_textonly {
    my($self, $body) = @_;
    require HTML::Parser;

    my($count, $output);

    my $p = HTML::Parser->new(api_version => 3);
    $p->handler( default => sub { $output .= $_[0] }, "text" );
    $p->handler( text => sub {
        my($c, $body) = $self->filter($_[0]);
        $count  += $c;
        $output .= $body;
    }, "text");

    $p->parse($body);
    $p->eof;

    ($count, $output);
}

1;

__END__

=head1 NAME

Plagger::Plugin::Filter::Base - Base filter class to handle HTML snippets

=head1 SYNOPSIS

  package Plagger::Plugin::Filter::Foo;
  use base qw( Plagger::Plugin::Filter::Base )

  sub filter {
      my($self, $body) = @_;

      # filter $body
      # store how many chunks are filtered into $count

      return ($count, $body);
  }

=head1 DESCRIPTION

Plagger::Plugin::Filter::Base is a base class for
Plagger::Plugin::Filter to handle entry body with as much care as
possible not to break HTML structure.

Your filter will support C<text_only> configuration by subclassing
this module:

  - module: Filter::Foo
    config:
      text_only: 1

=head1 AUTHOR

Tatsuhiko Miyagawa

=head1 SEE ALSO

L<Plagger>, L<HTML::Parser>

=cut