The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#======================================================================
package DBD::SQLite::VirtualTable::FileContent;
#======================================================================
use strict;
use warnings;
use base 'DBD::SQLite::VirtualTable';

my %option_ok = map {($_ => 1)} qw/source content_col path_col
                                   expose root get_content/;

my %defaults = (
  content_col => "content",
  path_col    => "path",
  expose      => "*",
  get_content => "DBD::SQLite::VirtualTable::FileContent::get_content",
);


#----------------------------------------------------------------------
# object instanciation
#----------------------------------------------------------------------

sub NEW {
  my $class = shift;

  my $self  = $class->_PREPARE_SELF(@_);

  local $" = ", "; # for array interpolation in strings

  # initial parameter check
  !@{$self->{columns}}
    or die "${class}->NEW(): illegal options: @{$self->{columns}}";
  $self->{options}{source}
    or die "${class}->NEW(): missing (source=...)";
  my @bad_options = grep {!$option_ok{$_}} keys %{$self->{options}};
  !@bad_options
    or die "${class}->NEW(): bad options: @bad_options";

  # defaults ... tempted to use //= but we still want to support perl 5.8 :-(
  foreach my $k (keys %defaults) {
    defined $self->{options}{$k}
      or $self->{options}{$k} = $defaults{$k};
  }

  # get list of columns from the source table
  my $src_table  = $self->{options}{source};
  my $sql        = "PRAGMA table_info($src_table)";
  my $dbh        = ${$self->{dbh_ref}}; # can't use method ->dbh, not blessed yet
  my $src_info   = $dbh->selectall_arrayref($sql, {Slice => [1, 2]});
  @$src_info
    or die "${class}->NEW(source=$src_table): no such table in database";

  # associate each source colname with its type info or " " (should eval true)
  my %src_col = map  { ($_->[0] => $_->[1] || " ") } @$src_info;


  # check / complete the exposed columns
  my @exposed_cols;
  if ($self->{options}{expose} eq '*') {
    @exposed_cols = map {$_->[0]} @$src_info;
  }
  else {
    @exposed_cols = split /\s*,\s*/, $self->{options}{expose};
    my @bad_cols  = grep { !$src_col{$_} } @exposed_cols;
    die "table $src_table has no column named @bad_cols" if @bad_cols;
  }
  for (@exposed_cols) {
    die "$class: $self->{options}{content_col} cannot be both the "
      . "content_col and an exposed col" if $_ eq $self->{options}{content_col};
  }

  # build the list of columns for this table
  $self->{columns} = [ "$self->{options}{content_col} TEXT",
                       map {"$_ $src_col{$_}"} @exposed_cols ];

  # acquire a coderef to the get_content() implementation, which
  # was given as a symbolic reference in %options
  no strict 'refs';
  $self->{get_content} = \ &{$self->{options}{get_content}};

  bless $self, $class;
}

sub _build_headers {
  my $self = shift;

  my $cols = $self->sqlite_table_info;

  # headers : names of columns, without type information
  $self->{headers} = [ map {$_->{name}} @$cols ];
}


#----------------------------------------------------------------------
# method for initiating a search
#----------------------------------------------------------------------

sub BEST_INDEX {
  my ($self, $constraints, $order_by) = @_;

  $self->_build_headers if !$self->{headers};

  my @conditions;
  my $ix = 0;
  foreach my $constraint (grep {$_->{usable}} @$constraints) {
    my $col     = $constraint->{col};

    # if this is the content column, skip because we can't filter on it
    next if $col == 0;

    # for other columns, build a fragment for SQL WHERE on the underlying table
    my $colname = $col == -1 ? "rowid" : $self->{headers}[$col];
    push @conditions, "$colname $constraint->{op} ?";
    $constraint->{argvIndex} = $ix++;
    $constraint->{omit}      = 1;     # SQLite doesn't need to re-check the op
  }

  # TODO : exploit $order_by to add ordering clauses within idxStr

  my $outputs = {
    idxNum           => 1,
    idxStr           => join(" AND ", @conditions),
    orderByConsumed  => 0,
    estimatedCost    => 1.0,
    estimatedRows    => undef,
   };

  return $outputs;
}


#----------------------------------------------------------------------
# method for preventing updates
#----------------------------------------------------------------------

sub _SQLITE_UPDATE {
  my ($self, $old_rowid, $new_rowid, @values) = @_;

  die "attempt to update a readonly virtual table";
}


#----------------------------------------------------------------------
# file slurping function (not a method!)
#----------------------------------------------------------------------

sub get_content {
  my ($path, $root) = @_;

  $path = "$root/$path" if $root;

  my $content = "";
  if (open my $fh, "<", $path) {
    local $/;          # slurp the whole file into a scalar
    $content = <$fh>;
    close $fh;
  }
  else {
    warn "can't open $path";
  }

  return $content;
}



#======================================================================
package DBD::SQLite::VirtualTable::FileContent::Cursor;
#======================================================================
use strict;
use warnings;
use base "DBD::SQLite::VirtualTable::Cursor";


sub FILTER {
  my ($self, $idxNum, $idxStr, @values) = @_;

  my $vtable = $self->{vtable};

  # build SQL
  local $" = ", ";
  my @cols = @{$vtable->{headers}};
  $cols[0] = 'rowid';                 # replace the content column by the rowid
  push @cols, $vtable->{options}{path_col}; # path col in last position
  my $sql  = "SELECT @cols FROM $vtable->{options}{source}";
  $sql .= " WHERE $idxStr" if $idxStr;

  # request on the index table
  my $dbh = $vtable->dbh;
  $self->{sth} = $dbh->prepare($sql)
    or die DBI->errstr;
  $self->{sth}->execute(@values);
  $self->{row} = $self->{sth}->fetchrow_arrayref;

  return;
}


sub EOF {
  my ($self) = @_;

  return !$self->{row};
}

sub NEXT {
  my ($self) = @_;

  $self->{row} = $self->{sth}->fetchrow_arrayref;
}

sub COLUMN {
  my ($self, $idxCol) = @_;

  return $idxCol == 0 ? $self->file_content : $self->{row}[$idxCol];
}

sub ROWID {
  my ($self) = @_;

  return $self->{row}[0];
}

sub file_content {
  my ($self) = @_;

  my $root = $self->{vtable}{options}{root};
  my $path = $self->{row}[-1];
  my $get_content_func = $self->{vtable}{get_content};

  return $get_content_func->($path, $root);
}


1;

__END__


=head1 NAME

DBD::SQLite::VirtualTable::FileContent -- virtual table for viewing file contents


=head1 SYNOPSIS

Within Perl :

  $dbh->sqlite_create_module(fcontent => "DBD::SQLite::VirtualTable::FileContent");

Then, within SQL :

  CREATE VIRTUAL TABLE tbl USING fcontent(
     source      = src_table,
     content_col = content,
     path_col    = path,
     expose      = "path, col1, col2, col3", -- or "*"
     root        = "/foo/bar"
     get_content = Foo::Bar::read_from_file
    );

  SELECT col1, path, content FROM tbl WHERE ...;

=head1 DESCRIPTION

A "FileContent" virtual table is bound to some underlying I<source
table>, which has a column containing paths to files.  The virtual
table behaves like a database view on the source table, with an added
column which exposes the content from those files.

This is especially useful as an "external content" to some
fulltext table (see L<DBD::SQLite::Fulltext_search>) : the index
table stores some metadata about files, and then the fulltext engine
can index both the metadata and the file contents.

=head1 PARAMETERS

Parameters for creating a C<FileContent> virtual table are
specified within the C<CREATE VIRTUAL TABLE> statement, just
like regular column declarations, but with an '=' sign.
Authorized parameters are :

=over

=item C<source>

The name of the I<source table>.
This parameter is mandatory. All other parameters are optional.

=item C<content_col>

The name of the virtual column exposing file contents.
The default is C<content>.

=item C<path_col>

The name of the column in C<source> that contains paths to files.
The default is C<path>.

=item C<expose>

A comma-separated list (within double quotes) of source column names
to be exposed by the virtual table. The default is C<"*">, which means
all source columns.

=item C<root>

An optional root directory that will be prepended to the I<path> column
when opening files.

=item C<get_content>

Fully qualified name of a Perl function for reading file contents.
The default implementation just slurps the entire file into a string;
but this hook can point to more sophisticated implementations, like for
example a function that would remove html tags. The hooked function is
called like this :

  $file_content = $get_content->($path, $root);

=back

=head1 AUTHOR

Laurent Dami E<lt>dami@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright Laurent Dami, 2014.

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut