The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
parcel KinoSearch cnick Kino;

/** Create and highlight excerpts.
 * 
 * The Highlighter can be used to select relevant snippets from a document,
 * and to surround search terms with highlighting tags.  It handles both stems
 * and phrases correctly and efficiently, using special-purpose data generated
 * at index-time.  
*/
class KinoSearch::Highlight::Highlighter inherits KinoSearch::Object::Obj {

    Searcher   *searcher;
    Query      *query;
    CharBuf    *field;
    uint32_t    excerpt_length;
    uint32_t    window_width;
    uint32_t    slop;
    CharBuf    *pre_tag;
    CharBuf    *post_tag;
    Compiler   *compiler;

    inert incremented Highlighter*
    new(Searcher *searcher, Obj *query, const CharBuf *field, 
        uint32_t excerpt_length = 200);

    /**
     * @param searcher An object which inherits from 
     * L<Searcher|KinoSearch::Search::Searcher>, such as an
     * L<IndexSearcher|KinoSearch::Search::IndexSearcher>.
     * @param query Query object or a query string.
     * @param field The name of the field from which to draw the excerpt.  The
     * field must marked as be C<highlightable> (see
     * L<FieldType|KinoSearch::Plan::FieldType>).
     * @param excerpt_length Maximum length of the excerpt, in characters.
     */
    public inert Highlighter*
    init(Highlighter *self, Searcher *searcher, Obj *query, 
         const CharBuf *field, uint32_t excerpt_length = 200);

    /** Take a HitDoc object and return a highlighted excerpt as a string if
     * the HitDoc has a value for the specified <code>field</code>.
     */
    public incremented CharBuf*
    Create_Excerpt(Highlighter *self, HitDoc *hit_doc);

    /** Encode text with HTML entities. This method is called internally by
     * Create_Excerpt() for each text fragment when assembling an excerpt.  A
     * subclass can override this if the text should be encoded differently or
     * not at all.
     */
    public incremented CharBuf*
    Encode(Highlighter *self, CharBuf *text);

    /** Find sentence boundaries within the specified range, returning them as
     * an array of Spans.  The "offset" of each Span indicates the start of
     * the sentence, and is measured from 0, not from <code>offset</code>.
     * The Span's "length" member indicates the sentence length in code
     * points.
     * 
     * @param text The string to scan.
     * @param offset The place to start looking for offsets, measured in
     * Unicode code points from the top of <code>text</code>.
     * @param length The number of code points from <code>offset</code> to
     * scan. The default value of 0 is a sentinel which indicates to scan
     * until the end of the string.
     */
    incremented VArray*
    Find_Sentences(Highlighter *self, CharBuf *text, int32_t offset = 0, 
                   int32_t length = 0);

    /** Highlight a small section of text.  By default, prepends pre-tag and
     * appends post-tag.  This method is called internally by Create_Excerpt()
     * when assembling an excerpt.
     */
    public incremented CharBuf*
    Highlight(Highlighter *self, const CharBuf *text);

    /** Setter.  The default value is "<strong>".
     */
    public void
    Set_Pre_Tag(Highlighter *self, const CharBuf *pre_tag);

    /** Setter.  The default value is "</strong>".
     */
    public void
    Set_Post_Tag(Highlighter *self, const CharBuf *post_tag);

    /** Accessor. 
     */
    public CharBuf*
    Get_Pre_Tag(Highlighter *self);

    /** Accessor. 
     */
    public CharBuf*
    Get_Post_Tag(Highlighter *self);

    /** Accessor. 
     */
    public CharBuf*
    Get_Field(Highlighter *self);

    /** Accessor. 
     */
    public uint32_t 
    Get_Excerpt_Length(Highlighter *self);

    /** Accessor. 
     */
    public Searcher*
    Get_Searcher(Highlighter *self);

    /** Accessor. 
     */
    public Query*
    Get_Query(Highlighter *self);

    /** Accessor for the KinoSearch::Search::Compiler object derived from
     * <code>query</code> and <code>searcher</code>.
     */
    public Compiler*
    Get_Compiler(Highlighter *self);

    /** Decide based on heat map the best fragment of field to concentrate on.
     * Place the result into <code>fragment<code> and return its offset in
     * code points from the top of the field.
     *
     * (Helper function for Create_Excerpt only exposed for testing purposes.)
     */
    int32_t
    Find_Best_Fragment(Highlighter *self, const CharBuf *field_val, 
                       ViewCharBuf *fragment, HeatMap *heat_map);

    /** Take the fragment and determine the best edges for it based on
     * sentence boundaries when possible.  Add ellipses when boundaries cannot
     * be found.
     *
     * (Helper function for Create_Excerpt only exposed for testing purposes.)
     */
    int32_t
    Raw_Excerpt(Highlighter *self, const CharBuf *field_val, 
                const CharBuf *fragment, CharBuf *raw_excerpt, int32_t top, 
                HeatMap *heat_map, VArray *sentences);

    /** Take the text in raw_excerpt, add highlight tags, encode, and place
     * the result into <code>highlighted</code>. 
     *
     * (Helper function for Create_Excerpt only exposed for testing purposes.)
     */
    void
    Highlight_Excerpt(Highlighter *self, VArray *spans, CharBuf *raw_excerpt, 
                      CharBuf *highlighted, int32_t top);

    public void
    Destroy(Highlighter *self);
}

/* Copyright 2005-2011 Marvin Humphrey
 *
 * This program is free software; you can redistribute it and/or modify
 * under the same terms as Perl itself.
 */