The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

parcel Lucy;

/** Build inverted indexes.
 *
 * The Indexer class is Apache Lucy's primary tool for managing the content of
 * inverted indexes, which may later be searched using
 * [](cfish:IndexSearcher).
 *
 * In general, only one Indexer at a time may write to an index safely.  If a
 * write lock cannot be secured, new() will throw an exception.
 *
 * If an index is located on a shared volume, each writer application must
 * identify itself by supplying an
 * [](cfish:IndexManager) with a unique
 * `host` id to Indexer's constructor or index corruption will
 * occur.  See [](FileLocking) for a detailed
 * discussion.
 *
 * Note: at present, [](cfish:.Delete_By_Term) and [](cfish:.Delete_By_Query) only affect
 * documents which had been previously committed to the index -- and not any
 * documents added this indexing session but not yet committed.  This may
 * change in a future update.
 */
public class Lucy::Index::Indexer inherits Clownfish::Obj {

    Schema            *schema;
    Folder            *folder;
    Segment           *segment;
    IndexManager      *manager;
    PolyReader        *polyreader;
    Snapshot          *snapshot;
    SegWriter         *seg_writer;
    DeletionsWriter   *del_writer;
    FilePurger        *file_purger;
    Lock              *write_lock;
    Lock              *merge_lock;
    Doc               *stock_doc;
    String            *snapfile;
    bool               truncate;
    bool               optimize;
    bool               needs_commit;
    bool               prepared;

    public inert int32_t TRUNCATE;
    public inert int32_t CREATE;

    /** Open a new Indexer.  If the index already exists, update it.
     *
     * @param schema A Schema.
     * @param index Either a string filepath or a Folder.
     * @param manager An IndexManager.
     * @param flags Flags governing behavior.
     */
    public inert incremented Indexer*
    new(Schema *schema = NULL, Obj *index, IndexManager *manager = NULL,
        int32_t flags = 0);

    /** Initialize an Indexer.
     *
     * @param schema A Schema.
     * @param index Either a string filepath or a Folder.
     * @param manager An IndexManager.
     * @param flags Flags governing behavior.
     */
    public inert Indexer*
    init(Indexer *self, Schema *schema = NULL, Obj *index,
         IndexManager *manager = NULL, int32_t flags = 0);

    /** Add a document to the index.
     *
     * @param doc A Lucy::Document::Doc object.
     * @param boost A floating point weight which affects how this document
     * scores.
     */
    public void
    Add_Doc(Indexer *self, Doc *doc, float boost = 1.0);

    /** Absorb an existing index into this one.  The two indexes must
     * have matching Schemas.
     *
     * @param index Either an index path name or a Folder.
     */
    public void
    Add_Index(Indexer *self, Obj *index);

    /** Mark documents which contain the supplied term as deleted, so that
     * they will be excluded from search results and eventually removed
     * altogether.  The change is not apparent to search apps until after
     * [](cfish:.Commit) succeeds.
     *
     * @param field The name of an indexed field. (If it is not spec'd as
     * `indexed`, an error will occur.)
     * @param term The term which identifies docs to be marked as deleted.  If
     * `field` is associated with an Analyzer, `term`
     * will be processed automatically (so don't pre-process it yourself).
     */
    public void
    Delete_By_Term(Indexer *self, String *field, Obj *term);

    /** Mark documents which match the supplied Query as deleted.
     *
     * @param query A [](cfish:Query).
     */
    public void
    Delete_By_Query(Indexer *self, Query *query);

    /** Mark the document identified by the supplied document ID as deleted.
     *
     * @param doc_id A [document id](DocIDs).
     */
    public void
    Delete_By_Doc_ID(Indexer *self, int32_t doc_id);

    /** Optimize the index for search-time performance.  This may take a
     * while, as it can involve rewriting large amounts of data.
     *
     * Every Indexer session which changes index content and ends in a
     * [](cfish:.Commit) creates a new segment.  Once written, segments are never
     * modified.  However, they are periodically recycled by feeding their
     * content into the segment currently being written.
     *
     * The [](cfish:.Optimize) method causes all existing index content to be fed back
     * into the Indexer.  When [](cfish:.Commit) completes after an [](cfish:.Optimize), the
     * index will consist of one segment.  So [](cfish:.Optimize) must be called
     * before [](cfish:.Commit).  Also, optimizing a fresh index created from scratch
     * has no effect.
     *
     * Historically, there was a significant search-time performance benefit
     * to collapsing down to a single segment versus even two segments.  Now
     * the effect of collapsing is much less significant, and calling
     * [](cfish:.Optimize) is rarely justified.
     */
    public void
    Optimize(Indexer *self);

    /** Commit any changes made to the index.  Until this is called, none of
     * the changes made during an indexing session are permanent.
     *
     * Calling [](cfish:.Commit) invalidates the Indexer, so if you want to make more
     * changes you'll need a new one.
     */
    public void
    Commit(Indexer *self);

    /** Perform the expensive setup for [](cfish:.Commit) in advance, so that [](cfish:.Commit)
     * completes quickly.  (If [](cfish:.Prepare_Commit) is not called explicitly by
     * the user, [](cfish:.Commit) will call it internally.)
     */
    public void
    Prepare_Commit(Indexer *self);

    /** Accessor for schema.
     */
    public Schema*
    Get_Schema(Indexer *self);

    /** Accessor for seg_writer member var.
     */
    SegWriter*
    Get_Seg_Writer(Indexer *self);

    Doc*
    Get_Stock_Doc(Indexer *self);

    public void
    Destroy(Indexer *self);
}