The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

parcel Lucy;

/** Create and highlight excerpts.
 *
 * The Highlighter can be used to select relevant snippets from a document,
 * and to surround search terms with highlighting tags.  It handles both stems
 * and phrases correctly and efficiently, using special-purpose data generated
 * at index-time.
*/
class Lucy::Highlight::Highlighter inherits Lucy::Object::Obj {

    Searcher   *searcher;
    Query      *query;
    CharBuf    *field;
    uint32_t    excerpt_length;
    uint32_t    window_width;
    uint32_t    slop;
    CharBuf    *pre_tag;
    CharBuf    *post_tag;
    Compiler   *compiler;

    inert incremented Highlighter*
    new(Searcher *searcher, Obj *query, const CharBuf *field,
        uint32_t excerpt_length = 200);

    /**
     * @param searcher An object which inherits from
     * L<Searcher|Lucy::Search::Searcher>, such as an
     * L<IndexSearcher|Lucy::Search::IndexSearcher>.
     * @param query Query object or a query string.
     * @param field The name of the field from which to draw the excerpt.  The
     * field must marked as be C<highlightable> (see
     * L<FieldType|Lucy::Plan::FieldType>).
     * @param excerpt_length Maximum length of the excerpt, in characters.
     */
    public inert Highlighter*
    init(Highlighter *self, Searcher *searcher, Obj *query,
         const CharBuf *field, uint32_t excerpt_length = 200);

    /** Take a HitDoc object and return a highlighted excerpt as a string if
     * the HitDoc has a value for the specified <code>field</code>.
     */
    public incremented CharBuf*
    Create_Excerpt(Highlighter *self, HitDoc *hit_doc);

    /** Encode text with HTML entities. This method is called internally by
     * Create_Excerpt() for each text fragment when assembling an excerpt.  A
     * subclass can override this if the text should be encoded differently or
     * not at all.
     */
    public incremented CharBuf*
    Encode(Highlighter *self, CharBuf *text);

    /** Find sentence boundaries within the specified range, returning them as
     * an array of Spans.  The "offset" of each Span indicates the start of
     * the sentence, and is measured from 0, not from <code>offset</code>.
     * The Span's "length" member indicates the sentence length in code
     * points.
     *
     * @param text The string to scan.
     * @param offset The place to start looking for offsets, measured in
     * Unicode code points from the top of <code>text</code>.
     * @param length The number of code points from <code>offset</code> to
     * scan. The default value of 0 is a sentinel which indicates to scan
     * until the end of the string.
     */
    incremented VArray*
    Find_Sentences(Highlighter *self, CharBuf *text, int32_t offset = 0,
                   int32_t length = 0);

    /** Highlight a small section of text.  By default, prepends pre-tag and
     * appends post-tag.  This method is called internally by Create_Excerpt()
     * when assembling an excerpt.
     */
    public incremented CharBuf*
    Highlight(Highlighter *self, const CharBuf *text);

    /** Setter.  The default value is "<strong>".
     */
    public void
    Set_Pre_Tag(Highlighter *self, const CharBuf *pre_tag);

    /** Setter.  The default value is "</strong>".
     */
    public void
    Set_Post_Tag(Highlighter *self, const CharBuf *post_tag);

    /** Accessor.
     */
    public CharBuf*
    Get_Pre_Tag(Highlighter *self);

    /** Accessor.
     */
    public CharBuf*
    Get_Post_Tag(Highlighter *self);

    /** Accessor.
     */
    public CharBuf*
    Get_Field(Highlighter *self);

    /** Accessor.
     */
    public uint32_t
    Get_Excerpt_Length(Highlighter *self);

    /** Accessor.
     */
    public Searcher*
    Get_Searcher(Highlighter *self);

    /** Accessor.
     */
    public Query*
    Get_Query(Highlighter *self);

    /** Accessor for the Lucy::Search::Compiler object derived from
     * <code>query</code> and <code>searcher</code>.
     */
    public Compiler*
    Get_Compiler(Highlighter *self);

    /** Decide based on heat map the best fragment of field to concentrate on.
     * Place the result into <code>fragment<code> and return its offset in
     * code points from the top of the field.
     *
     * (Helper function for Create_Excerpt only exposed for testing purposes.)
     */
    int32_t
    Find_Best_Fragment(Highlighter *self, const CharBuf *field_val,
                       ViewCharBuf *fragment, HeatMap *heat_map);

    /** Take the fragment and determine the best edges for it based on
     * sentence boundaries when possible.  Add ellipses when boundaries cannot
     * be found.
     *
     * (Helper function for Create_Excerpt only exposed for testing purposes.)
     */
    int32_t
    Raw_Excerpt(Highlighter *self, const CharBuf *field_val,
                const CharBuf *fragment, CharBuf *raw_excerpt, int32_t top,
                HeatMap *heat_map, VArray *sentences);

    /** Take the text in raw_excerpt, add highlight tags, encode, and place
     * the result into <code>highlighted</code>.
     *
     * (Helper function for Create_Excerpt only exposed for testing purposes.)
     */
    void
    Highlight_Excerpt(Highlighter *self, VArray *spans, CharBuf *raw_excerpt,
                      CharBuf *highlighted, int32_t top);

    public void
    Destroy(Highlighter *self);
}