The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

parcel Lucy;

/** Query-to-Matcher compiler.
 *
 * The purpose of the Compiler class is to take a specification in the form of
 * a L<Query|Lucy::Search::Query> object and compile a
 * L<Matcher|Lucy::Search::Matcher> object that can do real work.
 *
 * The simplest Compiler subclasses -- such as those associated with
 * constant-scoring Query types -- might simply implement a Make_Matcher()
 * method which passes along information verbatim from the Query to the
 * Matcher's constructor.
 *
 * However it is common for the Compiler to perform some calculations which
 * affect it's "weight" -- a floating point multiplier that the Matcher will
 * factor into each document's score.  If that is the case, then the Compiler
 * subclass may wish to override Get_Weight(), Sum_Of_Squared_Weights(), and
 * Apply_Norm_Factor().
 *
 * Compiling a Matcher is a two stage process.
 *
 * The first stage takes place during the Compiler's construction, which is
 * where the Query object meets a L<Searcher|Lucy::Search::Searcher>
 * object for the first time.  Searchers operate on a specific document
 * collection and they can tell you certain statistical information about the
 * collection -- such as how many total documents are in the collection, or
 * how many documents in the collection a particular term is present in.
 * Lucy's core Compiler classes plug this information into the classic
 * TF/IDF weighting algorithm to adjust the Compiler's weight; custom
 * subclasses might do something similar.
 *
 * The second stage of compilation is Make_Matcher(), method, which is where
 * the Compiler meets a L<SegReader|Lucy::Index::SegReader> object.
 * SegReaders are associated with a single segment within a single index on a
 * single machine, and are thus lower-level than Searchers, which may
 * represent a document collection spread out over a search cluster
 * (comprising several indexes and many segments).  The Compiler object can
 * use new information supplied by the SegReader -- such as whether a term is
 * missing from the local index even though it is present within the larger
 * collection represented by the Searcher -- when figuring out what to feed to
 * the Matchers's constructor, or whether Make_Matcher() should return a
 * Matcher at all.
 */
public class Lucy::Search::Compiler inherits Lucy::Search::Query {

    Query        *parent;
    Similarity   *sim;

    /** Abstract constructor.
     *
     * @param parent The parent Query.
     * @param searcher A Lucy::Search::Searcher, such as an
     * IndexSearcher.
     * @param similarity A Similarity.
     * @param boost An arbitrary scoring multiplier.  Defaults to the boost of
     * the parent Query.
     */
    public inert Compiler*
    init(Compiler *self, Query *parent, Searcher *searcher,
         Similarity *similarity = NULL, float boost);

    /** Factory method returning a Matcher.
     *
     * @param reader A SegReader.
     * @param need_score Indicate whether the Matcher must implement Score().
     * @return a Matcher, or NULL if the Matcher would have matched no
     * documents.
     */
    public abstract incremented nullable Matcher*
    Make_Matcher(Compiler *self, SegReader *reader, bool need_score);

    /** Return the Compiler's numerical weight, a scoring multiplier.  By
     * default, returns the object's boost.
     */
    public float
    Get_Weight(Compiler *self);

    /** Accessor for the Compiler's Similarity object.
     */
    public nullable Similarity*
    Get_Similarity(Compiler *self);

    /** Accessor for the Compiler's parent Query object.
     */
    public Query*
    Get_Parent(Compiler *self);

    /** Compute and return a raw weighting factor.  (This quantity is used by
     * Normalize()).  By default, simply returns 1.0.
     */
    public float
    Sum_Of_Squared_Weights(Compiler *self);

    /** Apply a floating point normalization multiplier.  For a TermCompiler,
     * this involves multiplying its own weight by the supplied factor;
     * combining classes such as ORCompiler would apply the factor recursively
     * to their children.
     *
     * The default implementation is a no-op; subclasses may wish to multiply
     * their internal weight by the supplied factor.
     *
     * @param factor The multiplier.
     */
    public void
    Apply_Norm_Factor(Compiler *self, float factor);

    /**  Take a newly minted Compiler object and apply query-specific
     * normalization factors.  Should be invoked by Query subclasses during
     * Make_Compiler() for top-level nodes.
     *
     * For a TermQuery, the scoring formula is approximately:
     *
     *     (tf_d * idf_t / norm_d) * (tf_q * idf_t / norm_q)
     *
     * Normalize() is theoretically concerned with applying the second half of
     * that formula to a the Compiler's weight. What actually happens depends
     * on how the Compiler and Similarity methods called internally are
     * implemented.
     */
    public void
    Normalize(Compiler *self);

    /** Return an array of Span objects, indicating where in the given field
     * the text that matches the parent Query occurs and how well each snippet
     * matches.  The Span's offset and length are measured in Unicode code
     * points.
     *
     * The default implementation returns an empty array.
     *
     * @param searcher A Searcher.
     * @param doc_vec A DocVector.
     * @param field The name of the field.
     */
    public incremented VArray*
    Highlight_Spans(Compiler *self, Searcher *searcher,
                    DocVector *doc_vec, String *field);

    public void
    Serialize(Compiler *self, OutStream *outstream);

    public incremented Compiler*
    Deserialize(decremented Compiler *self, InStream *instream);

    public bool
    Equals(Compiler *self, Obj *other);

    public incremented String*
    To_String(Compiler *self);

    public void
    Destroy(Compiler *self);
}