The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#
#  Copyright 2009-2015 MongoDB, Inc.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

package MongoDB::GridFSBucket;

# ABSTRACT: A file storage abstraction

use version;
our $VERSION = 'v1.4.5';

use Moo;
use MongoDB::GridFSBucket::DownloadStream;
use MongoDB::GridFSBucket::UploadStream;
use MongoDB::_Types qw(
  ReadPreference
  WriteConcern
  ReadConcern
  BSONCodec
  NonNegNum
);
use Scalar::Util qw/reftype/;
use Types::Standard qw(
  Bool
  Int
  Str
  InstanceOf
);
use namespace::clean -except => 'meta';

#pod =attr database
#pod
#pod The L<MongoDB::Database> containing the GridFS bucket collections.
#pod
#pod =cut

has database => (
    is       => 'ro',
    isa      => InstanceOf ['MongoDB::Database'],
    required => 1,
);

#pod =attr bucket_name
#pod
#pod The name of the GridFS bucket.  Defaults to 'fs'.  The underlying
#pod collections that are used to implement a GridFS bucket get this string as a
#pod prefix (e.g "fs.chunks").
#pod
#pod =cut

has bucket_name => (
    is      => 'ro',
    isa     => Str,
    default => sub { 'fs' },
);

#pod =attr chunk_size_bytes
#pod
#pod The number of bytes per chunk.  Defaults to 261120 (255kb).
#pod
#pod =cut

has chunk_size_bytes => (
    is      => 'ro',
    isa     => NonNegNum,
    default => sub { 255 * 1024 },
);

#pod =attr write_concern
#pod
#pod A L<MongoDB::WriteConcern> object.  It may be initialized with a hash
#pod reference that will be coerced into a new MongoDB::WriteConcern object.
#pod By default it will be inherited from a L<MongoDB::Database> object.
#pod
#pod =cut

has write_concern => (
    is       => 'ro',
    isa      => WriteConcern,
    required => 1,
    coerce   => WriteConcern->coercion,
);

#pod =attr read_concern
#pod
#pod A L<MongoDB::ReadConcern> object.  May be initialized with a hash
#pod reference or a string that will be coerced into the level of read
#pod concern.
#pod
#pod By default it will be inherited from a L<MongoDB::Database> object.
#pod
#pod =cut

has read_concern => (
    is       => 'ro',
    isa      => ReadConcern,
    required => 1,
    coerce   => ReadConcern->coercion,
);

#pod =attr read_preference
#pod
#pod A L<MongoDB::ReadPreference> object.  It may be initialized with a string
#pod corresponding to one of the valid read preference modes or a hash reference
#pod that will be coerced into a new MongoDB::ReadPreference object.
#pod By default it will be inherited from a L<MongoDB::Database> object.
#pod
#pod B<Note:> Because many GridFS operations require multiple independent reads from
#pod separate collections, use with secondaries is B<strongly discouraged> because
#pod reads could go to different secondaries, resulting in inconsistent data
#pod if all file and chunk documents have not replicated to all secondaries.
#pod
#pod =cut

has read_preference => (
    is       => 'ro',
    isa      => ReadPreference,
    required => 1,
    coerce   => ReadPreference->coercion,
);

#pod =attr bson_codec
#pod
#pod An object that provides the C<encode_one> and C<decode_one> methods, such
#pod as from L<MongoDB::BSON>.  It may be initialized with a hash reference that
#pod will be coerced into a new MongoDB::BSON object.  By default it will be
#pod inherited from a L<MongoDB::Database> object.
#pod
#pod =cut

has bson_codec => (
    is       => 'ro',
    isa      => BSONCodec,
    coerce   => BSONCodec->coercion,
    required => 1,
);

#pod =attr max_time_ms
#pod
#pod Specifies the maximum amount of time in milliseconds that the server should
#pod use for working on a query.  By default it will be inherited from a
#pod L<MongoDB::Database> object.
#pod
#pod B<Note>: this will only be used for server versions 2.6 or greater, as that
#pod was when the C<$maxTimeMS> meta-operator was introduced.
#pod
#pod =cut

has max_time_ms => (
    is       => 'ro',
    isa      => NonNegNum,
    required => 1,
);

# determines whether or not to attempt index creation
has _tried_indexing => (
    is => 'rwp',
    isa => Bool,
);

has _files => (
    is       => 'lazy',
    isa      => InstanceOf ['MongoDB::Collection'],
    init_arg => undef,
);

sub _build__files {
    my $self = shift;
    my $coll = $self->database->get_collection(
        $self->bucket_name . '.files',
        {
            read_preference => $self->read_preference,
            write_concern   => $self->write_concern,
            read_concern    => $self->read_concern,
            max_time_ms     => $self->max_time_ms,
            bson_codec      => $self->bson_codec,
        }
    );
    return $coll;
}

has _chunks => (
    is       => 'lazy',
    isa      => InstanceOf ['MongoDB::Collection'],
    init_arg => undef,
);

sub _build__chunks {
    my $self = shift;
    my $coll = $self->database->get_collection(
        $self->bucket_name . '.chunks',
        {
            read_preference => $self->read_preference,
            write_concern   => $self->write_concern,
            read_concern    => $self->read_concern,
            max_time_ms     => $self->max_time_ms,
            # XXX: Generate a new bson codec here to
            # prevent users from changing it?
            bson_codec => $self->bson_codec,
        }
    );
    return $coll;
}

# index operations need primary server, regardless of bucket read prefs
sub _create_indexes {
    my ($self) = @_;
    $self->_set__tried_indexing(1);

    my $pf = $self->_files->clone( read_preference => 'primary' );

    return if $pf->count > 0;

    my $pfi = $pf->indexes;
    my $pci = $self->_chunks->clone( read_preference => 'primary' )->indexes;

    if ( !grep { $_->{name} eq 'filename_1_uploadDate_1' } $pfi->list->all ) {
        $pfi->create_one( [ filename => 1, uploadDate => 1 ], { unique => 1 } );
    }

    if ( !grep { $_->{name} eq 'files_id_1_n_1' } $pci->list->all ) {
        $pci->create_one( [ files_id => 1, n => 1 ], { unique => 1 } );
    }

    return;
}

#pod =method find
#pod
#pod     $result = $bucket->find($filter);
#pod     $result = $bucket->find($filter, $options);
#pod
#pod     $file_doc = $result->next;
#pod
#pod Executes a query on the file documents collection with a
#pod L<filter expression|MongoDB::Collection/Filter expression> and
#pod returns a L<MongoDB::QueryResult> object.  It takes an optional hashref
#pod of options identical to L<MongoDB::Collection/find>.
#pod
#pod =cut

sub find {
    my ( $self, $filter, $options ) = @_;
    return $self->_files->find( $filter, $options )->result;
}

#pod =method find_one
#pod
#pod     $file_doc = $bucket->find_one($filter, $projection);
#pod     $file_doc = $bucket->find_one($filter, $projection, $options);
#pod
#pod Executes a query on the file documents collection with a
#pod L<filter expression|MongoDB::Collection/Filter expression> and
#pod returns the first document found, or C<undef> if no document is found.
#pod
#pod See L<MongoDB::Collection/find_one> for details about the
#pod C<$projection> and optional C<$options> fields.
#pod
#pod =cut

sub find_one {
    my ( $self, $filter, $projection, $options ) = @_;
    return $self->_files->find_one( $filter, $projection, $options );
}

#pod =method find_id
#pod
#pod     $file_doc = $bucket->find_id( $id );
#pod     $file_doc = $bucket->find_id( $id, $projection );
#pod     $file_doc = $bucket->find_id( $id, $projection, $options );
#pod
#pod Executes a query with a L<filter expression|/Filter expression> of
#pod C<< { _id => $id } >> and returns a single document or C<undef> if no document
#pod is found.
#pod
#pod See L<MongoDB::Collection/find_one> for details about the
#pod C<$projection> and optional C<$options> fields.
#pod
#pod =cut

sub find_id {
    my ( $self, $id, $projection, $options ) = @_;
    return $self->_files->find_id( $id, $projection, $options );
}

#pod =method open_download_stream
#pod
#pod     $stream = $bucket->open_download_stream($id);
#pod     $line = $stream->readline;
#pod
#pod Returns a new L<MongoDB::GridFSBucket::DownloadStream> that can be used to
#pod download the file with the file document C<_id> matching C<$id>.  This
#pod throws a L<MongoDB::GridFSError> if no such file exists.
#pod
#pod =cut

sub open_download_stream {
    my ( $self, $id ) = @_;
    MongoDB::UsageError->throw('No id provided to open_download_stream') unless $id;
    my $file_doc = $self->_files->find_id($id);
    MongoDB::GridFSError->throw("FileNotFound: no file found for id '$id'")
      unless $file_doc;
    my $result =
        $file_doc->{'length'} > 0
      ? $self->_chunks->find( { files_id => $id }, { sort => { n => 1 } } )->result
      : undef;
    return MongoDB::GridFSBucket::DownloadStream->new(
        {
            id       => $id,
            file_doc => $file_doc,
            _result  => $result,
        }
    );
}

#pod =method open_upload_stream
#pod
#pod     $stream = $bucket->open_upload_stream($filename);
#pod     $stream = $bucket->open_upload_stream($filename, $options);
#pod
#pod     $stream->print('data');
#pod     $stream->close;
#pod     $file_id = $stream->id
#pod
#pod Returns a new L<MongoDB::GridFSBucket::UploadStream> that can be used
#pod to upload a new file to a GridFS bucket.
#pod
#pod This method requires a filename to store in the C<filename> field of the
#pod file document.  B<Note>: the filename is an arbitrary string; the method
#pod does not read from this filename locally.
#pod
#pod You can provide an optional hash reference of options that are passed to the
#pod L<MongoDB::GridFSBucket::UploadStream> constructor:
#pod
#pod =for :list
#pod * C<chunk_size_bytes> – the number of bytes per chunk.  Defaults to the
#pod   C<chunk_size_bytes> of the bucket object.
#pod * C<metadata> – a hash reference for storing arbitrary metadata about the
#pod   file.
#pod
#pod =cut

sub open_upload_stream {
    my ( $self, $filename, $options ) = @_;
    MongoDB::UsageError->throw('No filename provided to open_upload_stream')
      unless defined $filename && length $filename;

    $self->_create_indexes unless $self->_tried_indexing;

    return MongoDB::GridFSBucket::UploadStream->new(
        {
            chunk_size_bytes => $self->chunk_size_bytes,
            ( $options ? %$options : () ),
            _bucket  => $self,
            filename => "$filename", # stringify path objects
        }
    );
}

#pod =method download_to_stream
#pod
#pod     $bucket->download_to_stream($id, $out_fh);
#pod
#pod Downloads the file matching C<$id> and writes it to the file handle C<$out_fh>.
#pod This throws a L<MongoDB::GridFSError> if no such file exists.
#pod
#pod =cut

sub download_to_stream {
    my ( $self, $id, $target_fh ) = @_;
    MongoDB::UsageError->throw('No id provided to download_to_stream')
      unless defined $id;
    MongoDB::UsageError->throw('No handle provided to download_to_stream')
      unless defined $target_fh;
    MongoDB::UsageError->throw(
        'Invalid handle $target_fh provided to download_to_stream')
      unless reftype $target_fh eq 'GLOB';

    my $download_stream = $self->open_download_stream($id);
    my $csb             = $download_stream->file_doc->{chunkSize};
    my $buffer;
    while ( $download_stream->read( $buffer, $csb ) ) {
        print {$target_fh} $buffer;
    }
    $download_stream->close;
    return;
}

#pod =method upload_from_stream
#pod
#pod     $file_id = $bucket->upload_from_stream($filename, $in_fh);
#pod     $file_id = $bucket->upload_from_stream($filename, $in_fh, $options);
#pod
#pod Reads from a filehandle and uploads its contents to GridFS.  It returns the
#pod C<_id> field stored in the file document.
#pod
#pod This method requires a filename to store in the C<filename> field of the
#pod file document.  B<Note>: the filename is an arbitrary string; the method
#pod does not read from this filename locally.
#pod
#pod You can provide an optional hash reference of options, just like
#pod L</open_upload_stream>.
#pod
#pod =cut

sub upload_from_stream {
    my ( $self, $filename, $source_fh, $options ) = @_;
    MongoDB::UsageError->throw('No filename provided to upload_from_stream')
      unless defined $filename && length $filename;
    MongoDB::UsageError->throw('No handle provided to upload_from_stream')
      unless defined $source_fh;
    MongoDB::UsageError->throw(
        'Invalid handle $source_fh provided to upload_from_stream')
      unless reftype $source_fh eq 'GLOB';

    my $upload_stream = $self->open_upload_stream( $filename, $options );
    my $csb = $upload_stream->chunk_size_bytes;
    my $buffer;
    while ( read $source_fh, $buffer, $csb ) {
        $upload_stream->print($buffer);
    }
    $upload_stream->close;
    return $upload_stream->id;
}

#pod =method delete
#pod
#pod     $bucket->delete($id);
#pod
#pod Deletes the file matching C<$id> from the bucket.
#pod This throws a L<MongoDB::GridFSError> if no such file exists.
#pod
#pod =cut

sub delete {
    my ( $self, $id ) = @_;

    $self->_create_indexes unless $self->_tried_indexing;

    my $delete_result = $self->_files->delete_one( { _id => $id } );
    # This should only ever be 0 or 1, checking for exactly 1 to be thorough
    unless ( $delete_result->deleted_count == 1 ) {
        MongoDB::GridFSError->throw("FileNotFound: no file found for id $id");
    }
    $self->_chunks->delete_many( { files_id => $id } );
    return;
}

#pod =method drop
#pod
#pod     $bucket->drop;
#pod
#pod Drops the underlying files documents and chunks collections for this bucket.
#pod
#pod =cut

sub drop {
    my ($self) = @_;
    $self->_files->drop;
    $self->_chunks->drop;
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

MongoDB::GridFSBucket - A file storage abstraction

=head1 VERSION

version v1.4.5

=head1 SYNOPSIS

    $bucket = $database->gfs;

    # upload a file
    $stream  = $bucket->open_upload_stream("foo.txt");
    $stream->print( $data );
    $stream->close;

    # find and download a file
    $result  = $bucket-find({filename => "foo.txt"});
    $file_id = $result->next->{_id};
    $stream  = $bucket->open_download_stream($file_id)
    $data    = do { local $/; $stream->readline() };

=head1 DESCRIPTION

This class models a GridFS file store in a MongoDB database and provides an
API for interacting with it.

Generally, you never construct one of these directly with C<new>.  Instead,
you call C<gfs> (short for C<get_gridfsbucket>) on a L<MongoDB::Database>
object.

=head1 USAGE

=head2 Data model

A GridFS file is represented in MongoDB as a "file document" with
information like the file's name, length, MD5 hash, and any user-supplied
metadata.  The actual contents are stored as a number of "chunks" of binary
data.  (Think of the file document as a directory entry and the chunks like
blocks on disk.)

Valid file documents typically include the following fields:

=over 4

=item *

_id – a unique ID for this document, typically type BSON ObjectId. Legacy GridFS documents may store this value as a different type. New files must be stored using an ObjectId.

=item *

length – the length of this stored file, in bytes

=item *

chunkSize – the size, in bytes, of each full data chunk of this file. This value is configurable per file.

=item *

uploadDate – the date and time this file was added to GridFS, stored as a BSON datetime value.

=item *

md5 – a hash of the contents of the stored file

=item *

filename – the name of this stored file; this does not need to be unique

=item *

metadata – any additional application data the user wishes to store (optional)

=item *

contentType – DEPRECATED (store this in C<metadata> if you need it) (optional)

=item *

aliases – DEPRECATED (store this in C<metadata> if you need it) (optional)

=back

The C<find> method searches file documents using these fields.  Given the
C<_id> from a document, a file can be downloaded using the download
methods.

=head2 API Overview

In addition to general methods like C<find>, C<delete> and C<drop>, there
are two ways to go about uploading and downloading:

=over 4

=item *

filehandle-like: you get an object that you can read/write from similar to a filehandle.  You can even get a tied filehandle that you can hand off to other code that requires an actual Perl handle.

=item *

streaming: you provide a file handle to read from (upload) or print to (download) and data is streamed to (upload) or from (download) GridFS until EOF.

=back

=head2 Error handling

Unless otherwise explictly documented, all methods throw exceptions if
an error occurs.  The error types are documented in L<MongoDB::Error>.

=head1 ATTRIBUTES

=head2 database

The L<MongoDB::Database> containing the GridFS bucket collections.

=head2 bucket_name

The name of the GridFS bucket.  Defaults to 'fs'.  The underlying
collections that are used to implement a GridFS bucket get this string as a
prefix (e.g "fs.chunks").

=head2 chunk_size_bytes

The number of bytes per chunk.  Defaults to 261120 (255kb).

=head2 write_concern

A L<MongoDB::WriteConcern> object.  It may be initialized with a hash
reference that will be coerced into a new MongoDB::WriteConcern object.
By default it will be inherited from a L<MongoDB::Database> object.

=head2 read_concern

A L<MongoDB::ReadConcern> object.  May be initialized with a hash
reference or a string that will be coerced into the level of read
concern.

By default it will be inherited from a L<MongoDB::Database> object.

=head2 read_preference

A L<MongoDB::ReadPreference> object.  It may be initialized with a string
corresponding to one of the valid read preference modes or a hash reference
that will be coerced into a new MongoDB::ReadPreference object.
By default it will be inherited from a L<MongoDB::Database> object.

B<Note:> Because many GridFS operations require multiple independent reads from
separate collections, use with secondaries is B<strongly discouraged> because
reads could go to different secondaries, resulting in inconsistent data
if all file and chunk documents have not replicated to all secondaries.

=head2 bson_codec

An object that provides the C<encode_one> and C<decode_one> methods, such
as from L<MongoDB::BSON>.  It may be initialized with a hash reference that
will be coerced into a new MongoDB::BSON object.  By default it will be
inherited from a L<MongoDB::Database> object.

=head2 max_time_ms

Specifies the maximum amount of time in milliseconds that the server should
use for working on a query.  By default it will be inherited from a
L<MongoDB::Database> object.

B<Note>: this will only be used for server versions 2.6 or greater, as that
was when the C<$maxTimeMS> meta-operator was introduced.

=head1 METHODS

=head2 find

    $result = $bucket->find($filter);
    $result = $bucket->find($filter, $options);

    $file_doc = $result->next;

Executes a query on the file documents collection with a
L<filter expression|MongoDB::Collection/Filter expression> and
returns a L<MongoDB::QueryResult> object.  It takes an optional hashref
of options identical to L<MongoDB::Collection/find>.

=head2 find_one

    $file_doc = $bucket->find_one($filter, $projection);
    $file_doc = $bucket->find_one($filter, $projection, $options);

Executes a query on the file documents collection with a
L<filter expression|MongoDB::Collection/Filter expression> and
returns the first document found, or C<undef> if no document is found.

See L<MongoDB::Collection/find_one> for details about the
C<$projection> and optional C<$options> fields.

=head2 find_id

    $file_doc = $bucket->find_id( $id );
    $file_doc = $bucket->find_id( $id, $projection );
    $file_doc = $bucket->find_id( $id, $projection, $options );

Executes a query with a L<filter expression|/Filter expression> of
C<< { _id => $id } >> and returns a single document or C<undef> if no document
is found.

See L<MongoDB::Collection/find_one> for details about the
C<$projection> and optional C<$options> fields.

=head2 open_download_stream

    $stream = $bucket->open_download_stream($id);
    $line = $stream->readline;

Returns a new L<MongoDB::GridFSBucket::DownloadStream> that can be used to
download the file with the file document C<_id> matching C<$id>.  This
throws a L<MongoDB::GridFSError> if no such file exists.

=head2 open_upload_stream

    $stream = $bucket->open_upload_stream($filename);
    $stream = $bucket->open_upload_stream($filename, $options);

    $stream->print('data');
    $stream->close;
    $file_id = $stream->id

Returns a new L<MongoDB::GridFSBucket::UploadStream> that can be used
to upload a new file to a GridFS bucket.

This method requires a filename to store in the C<filename> field of the
file document.  B<Note>: the filename is an arbitrary string; the method
does not read from this filename locally.

You can provide an optional hash reference of options that are passed to the
L<MongoDB::GridFSBucket::UploadStream> constructor:

=over 4

=item *

C<chunk_size_bytes> – the number of bytes per chunk.  Defaults to the C<chunk_size_bytes> of the bucket object.

=item *

C<metadata> – a hash reference for storing arbitrary metadata about the file.

=back

=head2 download_to_stream

    $bucket->download_to_stream($id, $out_fh);

Downloads the file matching C<$id> and writes it to the file handle C<$out_fh>.
This throws a L<MongoDB::GridFSError> if no such file exists.

=head2 upload_from_stream

    $file_id = $bucket->upload_from_stream($filename, $in_fh);
    $file_id = $bucket->upload_from_stream($filename, $in_fh, $options);

Reads from a filehandle and uploads its contents to GridFS.  It returns the
C<_id> field stored in the file document.

This method requires a filename to store in the C<filename> field of the
file document.  B<Note>: the filename is an arbitrary string; the method
does not read from this filename locally.

You can provide an optional hash reference of options, just like
L</open_upload_stream>.

=head2 delete

    $bucket->delete($id);

Deletes the file matching C<$id> from the bucket.
This throws a L<MongoDB::GridFSError> if no such file exists.

=head2 drop

    $bucket->drop;

Drops the underlying files documents and chunks collections for this bucket.

=head1 SEE ALSO

Core documentation on GridFS: L<http://dochub.mongodb.org/core/gridfs>.

=head1 AUTHORS

=over 4

=item *

David Golden <david@mongodb.com>

=item *

Mike Friedman <friedo@friedo.com>

=item *

Kristina Chodorow <k.chodorow@gmail.com>

=item *

Florian Ragwitz <rafl@debian.org>

=back

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2016 by MongoDB, Inc.

This is free software, licensed under:

  The Apache License, Version 2.0, January 2004

=cut