The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Wallflower;
$Wallflower::VERSION = '1.006';
use strict;
use warnings;

use Plack::Util ();
use Path::Class;
use URI;
use HTTP::Date qw( time2str );
use Carp;

# quick getters
for my $attr (qw( application destination env index url )) {
    no strict 'refs';
    *$attr = sub { $_[0]{$attr} };
}

# create a new instance
sub new {
    my ( $class, %args ) = @_;
    my $self = bless {
        destination => Path::Class::Dir->new(),    # File::Spec->curdir
        env         => {},
        index       => 'index.html',
        url         => 'http://localhost/',
        %args,
    }, $class;

    # some basic parameter checking
    croak "application is required" if !defined $self->application;
    croak "destination is invalid"
        if !-e $self->destination || !-d $self->destination;

    # turn the url attribute into a URI object
    $self->{url} = URI->new( $self->url );

    # if the application is mounted somewhere
    my $path;
    if ( $path = $self->url->path and $path ne '/' ) {
        require Plack::App::URLMap;
        my $urlmap = Plack::App::URLMap->new;
        $urlmap->mount( $path => $self->application );
        $self->{application} = $urlmap->to_app;
    }

    return $self;
}

# url -> file converter
sub target {
    my ( $self, $uri ) = @_;

    # the URI must have a path
    croak "$uri has an empty path" if !length $uri->path;

    # URI ending with / have the empty string as their last path_segment
    my @segments = $uri->path_segments;
    $segments[-1] = $self->index if $segments[-1] eq '';

    # generate target file name
    return Path::Class::File->new( $self->destination, @segments );
}

# save the URL to a file
sub get {
    my ( $self, $uri ) = @_;
    $uri = URI->new($uri) if !ref $uri;

    # absolute paths have the empty string as their first path_segment
    croak "$uri is not an absolute URI"
        if $uri->path && length +( $uri->path_segments )[0];

    # setup the environment
    my $env = {

        # current environment
        %ENV,

        # overridable defaults
        'psgi.errors' => \*STDERR,

        # current instance defaults
        %{ $self->env },
        ('psgi.url_scheme' => $self->url->scheme )x!! $self->url->scheme,

        # request-related environment variables
        REQUEST_METHOD => 'GET',

        # request attributes
        SCRIPT_NAME     => '',
        PATH_INFO       => $uri->path,
        REQUEST_URI     => $uri->path,
        QUERY_STRING    => '',
        SERVER_NAME     => $self->url->host,
        SERVER_PORT     => $self->url->port,
        SERVER_PROTOCOL => "HTTP/1.0",

        # wallflower defaults
        'psgi.streaming' => '',
    };

    # add If-Modified-Since headers if the target file exists
    my $target = $self->target($uri);
    $env->{HTTP_IF_MODIFIED_SINCE} = time2str( ( stat _ )[9] ) if -e $target;

    # fixup URI (needed to resolve relative URLs in retrieved documents)
    $uri->scheme( $env->{'psgi.url_scheme'} ) if !$uri->scheme;
    $uri->host( $env->{SERVER_NAME} ) if !$uri->host;

    # get the content
    my ( $status, $headers, $file, $content ) = ( 500, [], '', '' );
    my $res = Plack::Util::run_app( $self->application, $env );

    if ( ref $res eq 'ARRAY' ) {
        ( $status, $headers, $content ) = @$res;
    }
    elsif ( ref $res eq 'CODE' ) {
        croak "Delayed response and streaming not supported yet";
    }
    else { croak "Unknown response from application: $res"; }

    # save the content to a file
    if ( $status eq '200' ) {

        # get a file to save the content in
        my $dir = ( $file = $target )->dir;
        $dir->mkpath if !-e $dir;
        open my $fh, '> :raw', $file    # no stinky crlf on Win32
          or croak "Can't open $file for writing: $!";

        # copy content to the file
        if ( ref $content eq 'ARRAY' ) {
            print $fh @$content;
        }
        elsif ( ref $content eq 'GLOB' ) {
            local $/ = \8192;
            print {$fh} $_ while <$content>;
            close $content;
        }
        elsif ( eval { $content->can('getline') } ) {
            local $/ = \8192;
            while ( defined( my $line = $content->getline ) ) {
                print {$fh} $line;
            }
            $content->close;
        }
        else {
            croak "Don't know how to handle body: $content";
        }

        # finish
        close $fh;
    }

    return [ $status, $headers, $file ];
}

1;

# ABSTRACT: Stick Plack applications to the wallpaper

__END__

=pod

=head1 NAME

Wallflower - Stick Plack applications to the wallpaper

=head1 VERSION

version 1.006

=head1 SYNOPSIS

    use Wallflower;

    my $w = Wallflower->new(
        application => $app, # a PSGI app
        destination => $dir, # target directory
    );

    # dump all URL from $app to files in $dir
    $w->get( $_ ) for @urls;

=head1 DESCRIPTION

Given a URL and a L<Plack> application, a L<Wallflower> object will
save the corresponding response to a file.

=head1 METHODS

=head2 new( %args )

Create a new L<Wallflower> object.

The parameters are:

=over 4

=item C<application>

The PSGI/Plack application, as a CODE reference.

This parameter is I<required>.

=item C<destination>

The destination directory. Default is the current directory.

The destination directory must exist.

=item C<env>

Additional environment key/value pairs.

=item C<index>

The default filename for URLs ending in C</>.
The default value is F<index.html>.

=item C<url>

URL where the root of the application will be reachable in production.

If the URL has a path component, the application will be "mounted" at
that position.

=back

=head2 get( $url )

Perform a C<GET> request for C<$url> through the application, and
if successful, save the result to a filename derived from C<$url> by
the C<target()> method.

C<$url> can be either a string or a L<URI> object, representing an
absolute URL (the path must start with a C</>). The scheme, host, port,
and query string are ignored if present.

The return value is very similar to a L<Plack> application's:

   [ $status, $headers, $file ]

where C<$status> and C<$headers> are those returned by the application
itself for the given C<$url>, and C<$file> is the name of the file where
the content has been saved.

If a file exists at the location pointed to by the target, a
C<If-Modified-Since> header is added to the Plack environment,
with the modification timestamp for this file as the value.
If the application sends a C<304 Not modified> in response,
the target file will not be modified.

=head2 target( $uri )

Return the filename where the content of C<$uri> will be saved.

The C<path> component of C<$uri> is concatenated to the C<destination>
attribute. If the URL ends with a C</>, the C<index> attribute is appended
to create a file path.

Note that C<target()> assumes C<$uri> is a L<URI> object, and that it
must be absolute.

=head1 ACCESSORS

Accessors (getters only) exist for all parameters
to C<new()> and bear the same name.

=head1 AUTHOR

Philippe Bruhat (BooK) <book@cpan.org>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2015 by Philippe Bruhat (BooK).

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut