The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

parcel Lucy;

/** Build inverted indexes.
 *
 * The Indexer class is Apache Lucy's primary tool for managing the content of
 * inverted indexes, which may later be searched using
 * L<IndexSearcher|Lucy::Search::IndexSearcher>.
 *
 * In general, only one Indexer at a time may write to an index safely.  If a
 * write lock cannot be secured, new() will throw an exception.
 *
 * If an index is located on a shared volume, each writer application must
 * identify itself by supplying an
 * L<IndexManager|Lucy::Index::IndexManager> with a unique
 * <code>host</code> id to Indexer's constructor or index corruption will
 * occur.  See L<Lucy::Docs::FileLocking> for a detailed discussion.
 *
 * Note: at present, Delete_By_Term() and Delete_By_Query() only affect
 * documents which had been previously committed to the index -- and not any
 * documents added this indexing session but not yet committed.  This may
 * change in a future update.
 */
class Lucy::Index::Indexer inherits Lucy::Object::Obj {

    Schema            *schema;
    Folder            *folder;
    Segment           *segment;
    IndexManager      *manager;
    PolyReader        *polyreader;
    Snapshot          *snapshot;
    SegWriter         *seg_writer;
    DeletionsWriter   *del_writer;
    FilePurger        *file_purger;
    Lock              *write_lock;
    Lock              *merge_lock;
    Doc               *stock_doc;
    CharBuf           *snapfile;
    bool_t             truncate;
    bool_t             optimize;
    bool_t             needs_commit;
    bool_t             prepared;

    public inert int32_t TRUNCATE;
    public inert int32_t CREATE;

    public inert incremented Indexer*
    new(Schema *schema = NULL, Obj *index, IndexManager *manager = NULL,
        int32_t flags = 0);

    /** Open a new Indexer.  If the index already exists, update it.
     *
     * @param schema A Schema.
     * @param index Either a string filepath or a Folder.
     * @param manager An IndexManager.
     * @param flags Flags governing behavior.
     */
    public inert Indexer*
    init(Indexer *self, Schema *schema = NULL, Obj *index,
         IndexManager *manager = NULL, int32_t flags = 0);

    /** Add a document to the index.
     *
     * @param doc A Lucy::Document::Doc object.
     * @param boost A floating point weight which affects how this document
     * scores.
     */
    public void
    Add_Doc(Indexer *self, Doc *doc, float boost = 1.0);

    /** Absorb an existing index into this one.  The two indexes must
     * have matching Schemas.
     *
     * @param index Either an index path name or a Folder.
     */
    public void
    Add_Index(Indexer *self, Obj *index);

    /** Mark documents which contain the supplied term as deleted, so that
     * they will be excluded from search results and eventually removed
     * altogether.  The change is not apparent to search apps until after
     * Commit() succeeds.
     *
     * @param field The name of an indexed field. (If it is not spec'd as
     * <code>indexed</code>, an error will occur.)
     * @param term The term which identifies docs to be marked as deleted.  If
     * <code>field</code> is associated with an Analyzer, <code>term</code>
     * will be processed automatically (so don't pre-process it yourself).
     */
    public void
    Delete_By_Term(Indexer *self, CharBuf *field, Obj *term);

    /** Mark documents which match the supplied Query as deleted.
     *
     * @param query A L<Query|Lucy::Search::Query>.
     */
    public void
    Delete_By_Query(Indexer *self, Query *query);

    /** Optimize the index for search-time performance.  This may take a
     * while, as it can involve rewriting large amounts of data.
     */
    public void
    Optimize(Indexer *self);

    /** Commit any changes made to the index.  Until this is called, none of
     * the changes made during an indexing session are permanent.
     *
     * Calling Commit() invalidates the Indexer, so if you want to make more
     * changes you'll need a new one.
     */
    public void
    Commit(Indexer *self);

    /** Perform the expensive setup for Commit() in advance, so that Commit()
     * completes quickly.  (If Prepare_Commit() is not called explicitly by
     * the user, Commit() will call it internally.)
     */
    public void
    Prepare_Commit(Indexer *self);

    /** Accessor for seg_writer member var.
     */
    public SegWriter*
    Get_Seg_Writer(Indexer *self);

    Doc*
    Get_Stock_Doc(Indexer *self);

    public void
    Destroy(Indexer *self);
}