The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
use strict;
use warnings FATAL => 'all';

package WWW::Mechanize::Cached;
$WWW::Mechanize::Cached::VERSION = '1.48';
use 5.006;

use Class::Load 'try_load_class';
use Moo 1.004005;
use MooX::Types::MooseLike::Base qw(AnyOf Bool Enum Maybe);
use namespace::clean;
extends 'WWW::Mechanize';

use Carp qw( carp croak );
use Data::Dump qw( dump );
use Storable qw( nfreeze thaw );

has is_cached => ( is => 'rw', isa => Maybe [Bool], default => undef );
has positive_cache   => ( is => 'rw', isa => Bool, default => 1 );
has ref_in_cache_key => ( is => 'rw', isa => Bool, default => 0 );
has _verbose_dwarn   => ( is => 'rw', isa => Bool, default => 0 );

for (qw(cache_undef_content_length cache_zero_content_length)) {
    has $_ =>
        ( is => 'rw', isa => AnyOf [ Bool, Enum ['warn'] ], default => 0 );
}
has cache_mismatch_content_length => (
    is      => 'rw',
    isa     => AnyOf [ Bool, Enum ['warn'] ],
    default => 'warn',
);

has cache => ( is => 'lazy', isa => \&_isa_warn_cache );

sub _isa_warn_cache {
    return
            if 'HASH' ne ref $_[0]
        and $_[0]->can('get')
        and $_[0]->can('set');
    carp 'The cache param must be an initialized cache object';
    $_[0] = undef;
}

sub _build_cache {
    my $self = shift;

    return Cache::FileCache->new({
        default_expires_in => '1d',
        namespace          => 'www-mechanize-cached',
    }) if try_load_class 'Cache::FileCache';
    return CHI->new(
        driver     => 'File',
        expires_in => '1d',
        namespace  => 'www-mechanize-cached',
    ) if try_load_class 'CHI';

    croak(    'Could not create a default cache.'
            . 'Please make sure either CHI or Cache::FileCache are installed or configure manually as appropriate'
    );
}

around _make_request => sub {
    my ( $orig, $self, $request ) = splice @_, 0, 3;
    my $req = $request;

    $self->is_cached(0);

    # An odd line to need.
    # No idea what purpose this serves?  OALDERS
    $self->{proxy} = {} unless defined $self->{proxy};    ## no critic

    # RT #56757
    if ( !$self->ref_in_cache_key ) {
        my $clone = $request->clone;
        $clone->header( Referer => undef );
        $req = $clone->as_string;
    }

    my $response = $self->cache->get($req);

    if ($response) {
        $response = thaw($response);
    }
    if ( $self->_cache_ok($response) ) {
        $self->is_cached(1);
        return $response;
    }

    $response = $self->$orig( $request, @_ );

    # decode strips some important headers.
    my $headers = $response->headers->clone;

    my $should_cache = $self->_response_cache_ok( $response, $headers );

    # http://rt.cpan.org/Public/Bug/Display.html?id=42693
    $response->decode();
    delete $response->{handlers};    ## no critic

    $self->cache->set( $req, nfreeze($response) ) if $should_cache;

    return $response;
};

sub _dwarn_filter {
    my ( $ctx, $ref ) = @_;
    return {
        hide_keys => [
            qw( _content cookie content set-cookie handlers cookie_jar cache req res page_stack )
        ]
    };

}

sub _dwarn {
    my $self    = shift;
    my $message = shift;

    return unless my $handler = $self->{onwarn};    ## no critic

    return if $self->quiet;

    if ( $self->_verbose_dwarn ) {
        my $payload = {
            self    => $self,
            message => $message,
            debug   => \@_,
        };
        require Data::Dump;
        return $handler->( Data::Dump::dumpf( $payload, \&_dwarn_filter ) );
    }
    else {
        return $handler->($message);
    }
}

sub _response_cache_ok {
    my $self     = shift;
    my $response = shift;
    my $headers  = shift;

    return 0 if !$response;
    return 1 if !$self->positive_cache;

    return 0 if $response->code < 200;
    return 0 if $response->code > 301;

    my $size;
    {
        ## no critic (ValuesAndExpressions::ProhibitAccessOfPrivateData)
        if ( exists $headers->{'client-transfer-encoding'} ) {
            for my $cte ( @{ $headers->{'client-transfer-encoding'} } ) {

                # Transfer-Encoding = chunked means document consistency
                # is independent of Content-Length value,
                # and that Content-Length can be safely ignored.
                # Its not obvious how the lower levels represent a
                # failed chuncked-transfer yet.
                # But its safe to say relying on content-length proves pointless.
                return 1 if $cte eq 'chunked';
            }
        }

        $size = $headers->{'content-length'};
    }

    if ( not defined $size ) {
        if ( $self->cache_undef_content_length . q{} eq q{warn} ) {
            $self->_dwarn(
                q[Content-Length header was undefined, not caching]
                    . q[ (E=WWW_MECH_CACHED_CONTENTLENGTH_MISSING)],
                $headers
            );
            return 0;
        }
        if ( $self->cache_undef_content_length == 0 ) {
            return 0;
        }
    }

    if ( defined $size and $size == 0 ) {
        if ( $self->cache_zero_content_length . q{} eq q{warn} ) {
            $self->_dwarn(
                q{Content-Length header was 0, not caching}
                    . q{ (E=WWW_MECH_CACHED_CONTENTLENGTH_ZERO)},
                $headers
            );
            return 0;
        }
        if ( $self->cache_zero_content_length == 0 ) {
            return 0;
        }
    }

    if (    defined $size
        and $size != 0
        and $size != length( $response->content ) ) {
        if ( $self->cache_mismatch_content_length . "" eq "warn" ) {
            $self->_dwarn(
                q{Content-Length header did not match contents actual length, not caching}
                    . q{ (E=WWW_MECH_CACHED_CONTENTLENGTH_MISSMATCH)} );
            return 0;
        }
        if ( $self->cache_mismatch_content_length == 0 ) {
            return 0;
        }
    }

    return 1;
}

sub _cache_ok {

    my $self     = shift;
    my $response = shift;

    return 0 if !$response;
    return 1 if !$self->positive_cache;

    return 0 if $response->code < 200;
    return 0 if $response->code > 301;

    return 1;
}

"We miss you, Spoon";    ## no critic

# ABSTRACT: Cache response to be polite

__END__

=pod

=encoding UTF-8

=head1 NAME

WWW::Mechanize::Cached - Cache response to be polite

=head1 VERSION

version 1.48

=head1 SYNOPSIS

    use WWW::Mechanize::Cached;

    my $cacher = WWW::Mechanize::Cached->new;
    $cacher->get( $url );

    # or, with your own Cache object
    use CHI;
    use WWW::Mechanize::Cached;

    my $cache = CHI->new(
        driver   => 'File',
        root_dir => '/tmp/mech-example'
    );

    my $mech = WWW::Mechanize::Cached->new( cache => $cache );
    $mech->get("http://www.wikipedia.org");

=head1 DESCRIPTION

Uses the L<Cache::Cache> hierarchy by default to implement a caching Mech. This
lets one perform repeated requests without hammering a server impolitely.
Please note that L<Cache::Cache> has been superceded by L<CHI>, but the default
has not been changed here for reasons of backwards compatibility.  For this
reason, you are encouraged to provide your own L<CHI> caching object to
override the default.

=head1 CONSTRUCTOR

=head2 new

Behaves like, and calls, L<WWW::Mechanize>'s C<new> method. Any params, other
than those explicitly listed here are passed directly to WWW::Mechanize's
constructor.

You may pass in a C<< cache => $cache_object >> if you wish. The
I<$cache_object> must have C<get()> and C<set()> methods like the
C<Cache::Cache> family.

The default Cache object is set up with the following params:

    my $cache_params = {
        default_expires_in => "1d", namespace => 'www-mechanize-cached',
    };

    $cache = Cache::FileCache->new( $cache_params );

This should be fine if you only want to use a disk-based cache, you only want
to cache results for 1 day and you're not in a shared hosting environment.
If any of this presents a problem for you, you should pass in your own Cache
object.  These defaults will remain unchanged in order to maintain backwards
compatibility.

For example, you may want to try something like this:

    use WWW::Mechanize::Cached;
    use CHI;

    my $cache = CHI->new(
        driver   => 'File',
        root_dir => '/tmp/mech-example'
    );

    my $mech = WWW::Mechanize::Cached->new( cache => $cache );
    $mech->get("http://www.wikipedia.org");

=head1 METHODS

Most methods are provided by L<WWW::Mechanize>. See that module's
documentation for details.

=head2 cache( $cache_object )

Requires an caching object which has a get() and a set() method. Using the CHI
module to create your cache is the recommended way. See new() for examples.

=head2 is_cached()

Returns true if the current page is from the cache, or false if not. If it
returns C<undef>, then you don't have any current request.

=head2 positive_cache( 0|1 )

As of v1.36 positive caching is enabled by default. Up to this point,
this module had employed a negative cache, which means it cached 404
responses, temporary redirects etc. In most cases, this is not what you want,
so the default behaviour now better reflects this. You can revert to the
negative cache quite easily:

    # cache everything (404s, all 300s etc)
    $mech->positive_cache( 0 );

=head2 ref_in_cache_key( 0|1 )

Allow the referring URL to be used when creating the cache key.  This is off
by default.  In almost all cases, you will not want to enable this, but it is
available to you for reasons of backwards compatibility and giving you enough
rope to hang yourself.

Previous to v1.36 the following was in the "BUGS AND LIMITATIONS" section:

    It may sometimes seem as if it's not caching something. And this may well
    be true. It uses the HTTP request, in string form, as the key to the cache
    entries, so any minor changes will result in a different key. This is most
    noticable when following links as L<WWW::Mechanize> adds a C<Referer>
    header.

See RT #56757 for a detailed example of the bugs this functionality can
trigger.

=head2 cache_undef_content_length( 0 | 'warn' | 1 )

This is configuration option which adjusts how caching behaviour performs when
the Content-Length header is not specified by the server.

Default behaviour is 0, which is not to cache.

Setting this value to 1, will cache pages even if the Content-Length header is
missing, which was the default behaviour prior to the addition of this feature.

And thirdly, you can set the value to the string 'warn', to warn if this
scenario occurs, and then not cache it.

=head2 cache_zero_content_length( 0 | 'warn' | 1 )

This is configuration option which adjusts how caching behaviour performs when
the Content-Length header is equal to 0.

Default behaviour is 0, which is not to cache.

Setting this value to 1, will cache pages even if the Content-Length header is
0, which was the default behaviour prior to the addition of this feature.

And thirdly, you can set the value to the string 'warn', to warn if this
scenario occurs, and then not cache it.

=head2 cache_mismatch_content_length( 0 | 'warn' | 1 )

This is configuration option which adjusts how caching behaviour performs when
the Content-Length header differs from the length of the content itself. ( Which
usually indicates a transmission error )

Setting this value to 0, will silenly not cache pages with a Content-Length
mismatch.

Setting this value to 1, will cache pages even if the Content-Length header
conflicts with the content length, which was the default behaviour prior to the
addition of this feature.

And thirdly, you can set the value to the string 'warn', to warn if this
scenario occurs, and then not cache it. ( This is the default behaviour )

=head1 UPGRADING FROM 1.40 OR EARLIER

Caching behaviour has changed since 1.40, and this may result in pages that were
previously cached start failing to cache, and in some cases, emit warnings.

To return to the 1.40 behaviour:

    $mech->cache_undef_content_length(1);  # Default is 0
    $mech->cache_zero_content_length(1);   # Default is 0
    $mech->cache_mismatch_content_length(1); # Default is 'warn'

=head1 THANKS

Iain Truskett for writing this in the first place.
Andy Lester for graciously handing over maintainership.
Kent Fredric for adding content length handling.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc WWW::Mechanize::Cached

=over

=item * Search CPAN

L<https://metacpan.org/module/WWW::Mechanize::Cached>

=back

=head1 SEE ALSO

L<WWW::Mechanize>, L<WWW::Mechanize::Cached::GZip>.

=head1 AUTHORS

=over 4

=item *

Iain Truskett (original author)

=item *

Andy Lester <petdance@cpan.org> (2004 - July 2009)

=item *

Olaf Alders <olaf@wundercounter.com> (current maintainer)

=back

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2015 by Iain Truskett and Andy Lester.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut