The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# ABSTRACT: Scrappy HTTP Request Constraints System
# Dist::Zilla: +PodWeaver

package Scrappy::Scraper::Control;
BEGIN {
  $Scrappy::Scraper::Control::VERSION = '0.94112090';
}

# load OO System
use Moose;

# load other libraries
use URI;

has 'allowed' => (is => 'rw', isa => 'HashRef', default => sub { {} });
has 'options' => (
    is        => 'ro',
    isa       => 'HashRef',
    'default' => sub { {} }
);
has 'restricted' => (is => 'rw', isa => 'HashRef', default => sub { {} });


sub allow {
    my ($self, $target, %constraints) = @_;
    my $i = 0;

    $target = URI->new($target);

    next
        unless
            $target &&
                ("URI::http" eq ref $target || "URI::https" eq ref $target);
                
    $target = $target->host;

    delete $self->restricted->{$target} if defined $self->restricted->{$target};
    $self->allowed->{$target} = { %constraints };
    $i++ if defined $target;

    return $i;
}


sub restrict {
    my ($self, $target, %constraints) = @_;
    my $i = 0;

    $target = URI->new($target);

        next
            unless
                $target &&
                    ("URI::http" eq ref $target || "URI::https" eq ref $target);
    
    $target = $target->host;

        delete $self->allowed->{$target} if defined $self->allowed->{$target};
        $self->restricted->{$target} = { %constraints };
        $i++ if defined $target;

    return $i;
}


sub is_allowed {
    my $self = shift;
    my $url  = shift;
    my %options = @_;

    # empty domain not allowed
    return 0 unless $url;
    
    # for advanced constraints checking, an HTTP::Response object may be passed
    my $http ;
    
    if ("HTTP::Response" eq ref $url) {
        $http = $url;
        $url = $url->request->uri;
    }
    
    return 0 unless ("URI::http" eq ref $url || "URI::https" eq ref $url);
    
    $url = $url->host;

    # is anything explicitly allowed, if so everything is restricted unless
    # explicitly defined in allowed
    if (keys %{$self->allowed}) {
        if (keys %{$self->allowed}) {
            # return $self->allowed->{$url} ? 1 : 0;
            if ($self->allowed->{$url}) {
                if ($self->allowed->{$url}->{if}) {
                    return $self->_check_constraints(
                                $self->allowed->{$url}->{if},
                                $http
                            );
                }
                else {
                    return 1;
                }
            }
            else {
                return 0;
            }
        }
    }

    # is it explicitly restricted
    if (keys %{$self->restricted}) {
        if (keys %{$self->restricted}) {
            # return 0 if $self->restricted->{$url};
            if ($self->restricted->{$url}) {
                if ($self->restricted->{$url}->{if}) {
                    return $self->_check_constraints(
                                $self->allowed->{$url}->{if},
                                $http
                            );
                }
                else {
                    return 1;
                }
            }
            else {
                return 0;
            }
        }
    }

    # i guess its cool
    return 1;
}

sub _check_constraints {
    my ($self, $constraints, $http_response) = @_;
    # check for failure, if not pass it
    if ($constraints->{content_type}) {
        my $ctype = $http_response->header('content_type');
        my $types = "ARRAY" eq ref $constraints->{content_type} ?
            $constraints->{content_type} : [$constraints->{content_type}];
        
        return 1 if (grep { $ctype eq $_ } @{$types});
        
    }
    return 0;
}

1;

__END__
=pod

=head1 NAME

Scrappy::Scraper::Control - Scrappy HTTP Request Constraints System

=head1 VERSION

version 0.94112090

=head1 SYNOPSIS

    #!/usr/bin/perl
    use Scrappy::Scraper::Control;

    my  $control = Scrappy::Scraper::Control->new;
    
        $control->allow('http://search.cpan.org');
        $control->allow('http://search.cpan.org', if => {
                content_type => ['text/html', 'application/x-tar']
            }
        );
        
        $control->restrict('http://www.cpan.org');
        
        if ($control->is_allowed('http://search.cpan.org/')) {
            ...
        }
        
        # constraints will only be checked if the is_allowed method is
        # passed a HTTP::Response object.

=head1 DESCRIPTION

Scrappy::Scraper::Control provides HTTP request access control for the L<Scrappy> framework.

=head2 ATTRIBUTES

The following is a list of object attributes available with every Scrappy::Scraper::Control
instance.

=head3 allowed

The allowed attribute holds a hasherf of allowed domain/contraints.

    my  $control = Scrappy::Scraper::Control->new;
        $control->allowed;
        
        e.g.
        
        {
            'www.foobar.com' => {
                methods => [qw/GET POST PUSH PUT DELETE/],
                content_type => ['text/html']
            }
        }

=head3 restricted

The restricted attribute holds a hasherf of restricted domain/contraints.

    my  $control = Scrappy::Scraper::Control->new;
        $control->restricted;
        
        e.g.
        
        {
            'www.foobar.com' => {
                methods => [qw/GET POST PUSH PUT DELETE/]
            }
        }

=head1 METHODS

=head2 allow

    my  $control = Scrappy::Scraper::Control->new;
        $control->allow('http://www.perl.org');
        $control->allow('http://search.cpan.org', if => {
                content_type => ['text/html', 'application/x-tar']
            }
        );

=head2 restrict

    my  $control = Scrappy::Scraper::Control->new;
        $control->restrict('http://www.perl.org');
        $control->restrict('http://search.cpan.org', if => {
                content_type => ['text/html', 'application/x-tar']
            }
        );

=head2 is_allowed

    my  $control = Scrappy::Scraper::Control->new;
        $control->allow('http://search.cpan.org');
        $control->restrict('http://www.perl.org');
        
        if (! $control->is_allowed('http://perl.org')) {
            die 'Cant get to Perl.org';
        }

=head1 AUTHOR

Al Newkirk <awncorp@cpan.org>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2010 by awncorp.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut