The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package App::Zapzi::Transformers::HTMLExtractMain;
# ABSTRACT: transform text using HTMLExtractMain


use utf8;
use strict;
use warnings;

our $VERSION = '0.008'; # VERSION

use HTML::ExtractMain 0.63;
use Moo;

extends "App::Zapzi::Transformers::HTML";


sub name
{
    return 'HTMLExtractMain';
}


sub handles
{
    my $self = shift;
    my $content_type = shift;

    return 1 if $content_type =~ m|text/html|;
}

# transform and _extract_title inherited from parent

sub _extract_html
{
    my $self = shift;
    my ($raw_html) = @_;

    my $tree = HTML::ExtractMain::extract_main_html($raw_html,
                                                    output_type => 'tree' );

    return $tree;
}

1;

__END__

=pod

=head1 NAME

App::Zapzi::Transformers::HTMLExtractMain - transform text using HTMLExtractMain

=head1 VERSION

version 0.008

=head1 DESCRIPTION

This class takes HTML and returns readable HTML using HTML::ExtractMain.

=head1 METHODS

=head2 name

Name of transformer visible to user.

=head2 handles($content_type)

Returns true if this module handles the given content-type

=head1 AUTHOR

Rupert Lane <rupert@rupert-lane.org>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2013 by Rupert Lane.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut