The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# $Id: /mirror/gungho/lib/Gungho/Component/RobotsMETA.pm 31095 2007-11-26T00:05:40.329716Z lestrrat  $
#
# Copyright (c) 2007 Daisuke Maki <daisuke@endeworks.jp>
# All rights reserved.

package Gungho::Component::RobotsMETA;
use strict;
use warnings;
use base qw(Gungho::Component);
use HTML::RobotsMETA;

__PACKAGE__->mk_classdata($_) for qw(robots_meta);

sub setup
{
    my $self = shift;
    $self->next::method(@_);
    $self->robots_meta( HTML::RobotsMETA->new );
}

sub handle_response
{
    my ($self, $req, $res) = @_;

    if ($res->is_success && $res->content_type =~ m{^text/html}i) {
        eval {
            my $rules = $self->robots_meta->parse_rules( $res->content );
            $res->notes( robots_meta => $rules );
        };
        if ($@) {
            $self->log->debug("Failed to parse " . $res->request->uri . " for robots META information: $@");
        }
    }
    $self->next::method($req, $res);
}

1;

__END__

=head1 NAME

Gungho::Component::RobotsMETA - Automatically Parse Robots META

=head1 SYNOPSIS

  components:
    - RobotsMETA

=head1 DESCRIPTION

This module automatically parses any text/html document for robots exclusion
directies embedded in the document.

=head1 METHODS

=head2 setup

Initializes the component.

=head2 handle_response

Overrides Gungho::Component::Core::handle_response()

=head1 SEE ALSO 

L<HTML::RobotsMETA|HTML::RobotsMETA>

=cut