lib/LucyX/Index/ByteBufDocWriter.pm

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

use strict;
use warnings;

package LucyX::Index::ByteBufDocWriter;
use base qw( Lucy::Index::DataWriter );
use Carp;
use Scalar::Util qw( blessed );
use bytes;
no bytes;

# Inside-out member vars.
our %field;
our %width;
our %outstream;

sub new {
    my ( $either, %args ) = @_;
    my $width = delete $args{width};
    my $field = delete $args{field};
    my $self  = $either->SUPER::new(%args);
    confess("Missing required param 'width'") unless defined $width;
    confess("Missing required param 'field'") unless defined $field;
    if ( $width < 1 ) { confess("'width' must be at least 1") }
    $field{$$self} = $field;
    $width{$$self} = $width;
    return $self;
}

sub _lazy_init {
    my $self = shift;

    # Get outstream.  Skip past non-doc #0.
    my $folder    = $self->get_folder;
    my $filename  = $self->get_segment->get_name . "/bytebufdocs.dat";
    my $outstream = $outstream{$$self} = $folder->open_out($filename)
        or confess Lucy->error;
    my $nulls = "\0" x $width{$$self};
    $outstream->print($nulls);

    return $outstream;
}

sub add_inverted_doc {
    my ( $self, %args ) = @_;
    my $outstream = $outstream{$$self} || _lazy_init($self);
    my $fields    = $args{inverter}->get_doc->get_fields;
    my $width     = $width{$$self};
    my $field     = $field{$$self};
    if ( bytes::length( $fields->{$field} ) != $width ) {
        confess("Width of '$fields->{$field}' not $width");
    }
    $outstream->print( $fields->{$field} );
}

sub add_segment {
    my ( $self, %args ) = @_;
    my $seg_reader = $args{reader};
    my $doc_map    = $args{doc_map};
    my $doc_max    = $seg_reader->doc_max;

    # Bail if the supplied segment is empty. */
    return unless $doc_max;

    my $outstream = $outstream{$$self} || _lazy_init($self);
    my $doc_reader = $seg_reader->obtain("Lucy::Index::DocReader");
    confess("Not a ByteBufDocReader")
        unless ( blessed($doc_reader)
        and $doc_reader->isa("LucyX::Index::ByteBufDocReader") );

    for ( my $i = 1; $i <= $doc_max; $i++ ) {
        next unless $doc_map->get($i);
        my $buf;
        $doc_reader->read_record( $i, \$buf );
        $outstream->print($buf);
    }
}

sub finish {
    my $self      = shift;
    my $outstream = $outstream{$$self};
    if ($outstream) {
        $outstream->close;
        my $segment = $self->get_segment;
        $segment->store_metadata(
            key      => 'bytebufdocs',
            metadata => $self->metadata
        );
    }
}

sub format {1}

sub DESTROY {
    my $self = shift;
    delete $field{$$self};
    delete $width{$$self};
    delete $outstream{$$self};
    $self->SUPER::DESTROY;
}

1;

__END__

__POD__

=head1 NAME

LucyX::Index::ByteBufDocWriter - Write a Doc as a fixed-width byte array.

=head1 SYNOPSIS

Create an L<Architecture|Lucy::Plan::Architecture> subclass which
overrides register_doc_writer() and register_doc_reader():

    package MyArchitecture;
    use base qw( Lucy::Plan::Architecture );
    use LucyX::Index::ByteBufDocReader;
    use LucyX::Index::ByteBufDocWriter;

    sub register_doc_writer {
        my ( $self, $seg_writer ) = @_; 
        my $doc_writer = LucyX::Index::ByteBufDocWriter->new(
            width      => 16,
            field      => 'value',
            snapshot   => $seg_writer->get_snapshot,
            segment    => $seg_writer->get_segment,
            polyreader => $seg_writer->get_polyreader,
        );  
        $seg_writer->register(
            api       => "Lucy::Index::DocReader",
            component => $doc_writer,
        );  
        $seg_writer->add_writer($doc_writer);
    }

    sub register_doc_reader {
        my ( $self, $seg_reader ) = @_; 
        my $doc_reader = LucyX::Index::ByteBufDocReader->new(
            width    => 16,
            field    => 'value',
            schema   => $seg_reader->get_schema,
            folder   => $seg_reader->get_folder,
            segments => $seg_reader->get_segments,
            seg_tick => $seg_reader->get_seg_tick,
            snapshot => $seg_reader->get_snapshot,
        );  
        $seg_reader->register(
            api       => 'Lucy::Index::DocReader',
            component => $doc_reader,
        );  
    }

    package MySchema;
    use base qw( Lucy::Plan::Schema );

    sub architecture { MyArchitecture->new }

Proceed as normal in your indexer app, making sure that every supplied
document supplies a valid value for the field in question:

    $indexer->add_doc({
        title   => $title,
        content => $content,
        id      => $id,      # <---- Must meet spec.
    });

Then, in your search app:

    my $searcher = Lucy::Search::IndexSearcher->new( 
        index => '/path/to/index',
    );
    my $hits = $searcher->hits( query => $query );
    while ( my $id = $hits->next ) {
        my $real_doc = $external_document_source->fetch( $doc->{value} );
        ...
    }

=head1 DESCRIPTION

This is a proof-of-concept class to demonstrate alternate implementations for
fetching documents.  It is unsupported.

=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)