The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
#define C_KINO_MATCHPOSTING
#define C_KINO_MATCHPOSTINGSCORER
#define C_KINO_MATCHPOSTINGWRITER
#define C_KINO_MATCHTERMINFOSTEPPER
#define C_KINO_RAWPOSTING
#define C_KINO_TERMINFO
#define C_KINO_TOKEN
#include "KinoSearch/Util/ToolSet.h"

#include "KinoSearch/Index/Posting/MatchPosting.h"
#include "KinoSearch/Analysis/Inversion.h"
#include "KinoSearch/Analysis/Token.h"
#include "KinoSearch/Index/Posting/RawPosting.h"
#include "KinoSearch/Index/PostingList.h"
#include "KinoSearch/Index/PostingPool.h"
#include "KinoSearch/Index/PolyReader.h"
#include "KinoSearch/Index/Segment.h"
#include "KinoSearch/Index/Similarity.h"
#include "KinoSearch/Index/Snapshot.h"
#include "KinoSearch/Index/TermInfo.h"
#include "KinoSearch/Plan/Architecture.h"
#include "KinoSearch/Plan/FieldType.h"
#include "KinoSearch/Plan/Schema.h"
#include "KinoSearch/Search/Compiler.h"
#include "KinoSearch/Store/Folder.h"
#include "KinoSearch/Store/InStream.h"
#include "KinoSearch/Store/OutStream.h"
#include "KinoSearch/Util/MemoryPool.h"

#define MAX_RAW_POSTING_LEN(_text_len) \
        (              sizeof(RawPosting) \
                     + _text_len + 1            /* term text content */ \
        )

MatchPosting*
MatchPost_new(Similarity *sim)
{
    MatchPosting *self = (MatchPosting*)VTable_Make_Obj(MATCHPOSTING);
    return MatchPost_init(self, sim);
}

MatchPosting*
MatchPost_init(MatchPosting *self, Similarity *sim)
{
    self->sim = (Similarity*)INCREF(sim);
    return (MatchPosting*)Post_init((Posting*)self);
}

void
MatchPost_destroy(MatchPosting *self)
{
    DECREF(self->sim);
    SUPER_DESTROY(self, MATCHPOSTING);
}

int32_t
MatchPost_get_freq(MatchPosting *self) { return self->freq; }

void
MatchPost_reset(MatchPosting *self)
{
    self->doc_id = 0;
}

void
MatchPost_read_record(MatchPosting *self, InStream *instream)
{
    const uint32_t doc_code = InStream_Read_C32(instream);
    const uint32_t doc_delta = doc_code >> 1;

    // Apply delta doc and retrieve freq. 
    self->doc_id   += doc_delta;
    if (doc_code & 1) 
        self->freq = 1;
    else
        self->freq = InStream_Read_C32(instream);
}

RawPosting*
MatchPost_read_raw(MatchPosting *self, InStream *instream, int32_t last_doc_id,
                   CharBuf *term_text, MemoryPool *mem_pool)
{
    char *const     text_buf      = (char*)CB_Get_Ptr8(term_text);
    const size_t    text_size     = CB_Get_Size(term_text);
    const uint32_t  doc_code      = InStream_Read_C32(instream);
    const uint32_t  delta_doc     = doc_code >> 1;
    const int32_t   doc_id        = last_doc_id + delta_doc;
    const uint32_t  freq          = (doc_code & 1) 
                                  ? 1 
                                  : InStream_Read_C32(instream);
    size_t raw_post_bytes         = MAX_RAW_POSTING_LEN(text_size);
    void *const allocation        = MemPool_Grab(mem_pool, raw_post_bytes);
    UNUSED_VAR(self);

    return RawPost_new(allocation, doc_id, freq, text_buf, text_size);
}

void
MatchPost_add_inversion_to_pool(MatchPosting *self, PostingPool *post_pool, 
                                Inversion *inversion, FieldType *type, 
                                int32_t doc_id, float doc_boost, 
                                float length_norm)
{
    MemoryPool  *mem_pool = PostPool_Get_Mem_Pool(post_pool);
    Token      **tokens;
    uint32_t     freq;

    UNUSED_VAR(self);
    UNUSED_VAR(type);
    UNUSED_VAR(doc_boost);
    UNUSED_VAR(length_norm);

    Inversion_Reset(inversion);
    while ( (tokens = Inversion_Next_Cluster(inversion, &freq)) != NULL ) {
        Token   *token          = *tokens;
        uint32_t raw_post_bytes = MAX_RAW_POSTING_LEN(token->len);
        RawPosting *raw_posting = RawPost_new(
            MemPool_Grab(mem_pool, raw_post_bytes), doc_id, freq,
            token->text, token->len
        );
        PostPool_Feed(post_pool, &raw_posting);
    }
}

MatchPostingScorer*
MatchPost_make_matcher(MatchPosting *self, Similarity *sim, 
                       PostingList *plist, Compiler *compiler,
                       bool_t need_score)
{
    MatchPostingScorer *matcher 
        = (MatchPostingScorer*)VTable_Make_Obj(MATCHPOSTINGSCORER);
    UNUSED_VAR(self);
    UNUSED_VAR(need_score);
    return MatchPostScorer_init(matcher, sim, plist, compiler);
}

/***************************************************************************/

MatchPostingScorer*
MatchPostScorer_init(MatchPostingScorer *self, Similarity *sim,
                     PostingList *plist, Compiler *compiler)
{
    TermScorer_init((TermScorer*)self, sim, plist, compiler);
    return self;
}

float
MatchPostScorer_score(MatchPostingScorer* self) 
{
    return self->weight;
}

/***************************************************************************/

MatchPostingWriter*
MatchPostWriter_new(Schema *schema, Snapshot *snapshot, Segment *segment, 
                    PolyReader *polyreader, int32_t field_num)
{
    MatchPostingWriter *self 
        = (MatchPostingWriter*)VTable_Make_Obj(MATCHPOSTINGWRITER);
    return MatchPostWriter_init(self, schema, snapshot, segment, polyreader,
        field_num);
}

MatchPostingWriter*
MatchPostWriter_init(MatchPostingWriter *self, Schema *schema, 
                     Snapshot *snapshot, Segment *segment, 
                     PolyReader *polyreader, int32_t field_num)
{
    Folder  *folder = PolyReader_Get_Folder(polyreader);
    CharBuf *filename 
        = CB_newf("%o/postings-%i32.dat", Seg_Get_Name(segment), field_num);
    PostWriter_init((PostingWriter*)self, schema, snapshot, segment,
        polyreader, field_num);
    self->outstream = Folder_Open_Out(folder, filename);
    if (!self->outstream) { RETHROW(INCREF(Err_get_error())); }
    DECREF(filename);
    return self;
}

void
MatchPostWriter_destroy(MatchPostingWriter *self)
{
    DECREF(self->outstream);
    SUPER_DESTROY(self, MATCHPOSTINGWRITER);
}

void
MatchPostWriter_write_posting(MatchPostingWriter *self, RawPosting *posting)
{
    OutStream *const outstream   = self->outstream;
    const int32_t    doc_id      = posting->doc_id;
    const uint32_t   delta_doc   = doc_id - self->last_doc_id;
    char  *const     aux_content = posting->blob + posting->content_len;
    if (posting->freq == 1) {
        const uint32_t doc_code = (delta_doc << 1) | 1;
        OutStream_Write_C32(outstream, doc_code);
    }
    else {
        const uint32_t doc_code = delta_doc << 1;
        OutStream_Write_C32(outstream, doc_code);
        OutStream_Write_C32(outstream, posting->freq);
    }
    OutStream_Write_Bytes(outstream, aux_content, posting->aux_len);
    self->last_doc_id = doc_id;
}

void
MatchPostWriter_start_term(MatchPostingWriter *self, TermInfo *tinfo)
{
    self->last_doc_id   = 0;
    tinfo->post_filepos = OutStream_Tell(self->outstream);
}

void
MatchPostWriter_update_skip_info(MatchPostingWriter *self, TermInfo *tinfo)
{
    tinfo->post_filepos = OutStream_Tell(self->outstream);
}

/***************************************************************************/

MatchTermInfoStepper*
MatchTInfoStepper_new(Schema *schema)
{
    MatchTermInfoStepper *self 
        = (MatchTermInfoStepper*)VTable_Make_Obj(MATCHTERMINFOSTEPPER);
    return MatchTInfoStepper_init(self, schema);
}

MatchTermInfoStepper*
MatchTInfoStepper_init(MatchTermInfoStepper *self, Schema *schema)
{
    Architecture *arch = Schema_Get_Architecture(schema);
    TermStepper_init((TermStepper*)self);
    self->skip_interval = Arch_Skip_Interval(arch);
    self->value = (Obj*)TInfo_new(0);
    return self;
}

void
MatchTInfoStepper_reset(MatchTermInfoStepper *self)
{
    TInfo_Reset((TermInfo*)self->value);
}

void
MatchTInfoStepper_write_key_frame(MatchTermInfoStepper *self, 
                                  OutStream *outstream, Obj *value)
{
    TermInfo *tinfo    = (TermInfo*)CERTIFY(value, TERMINFO);
    int32_t   doc_freq = TInfo_Get_Doc_Freq(tinfo);

    // Write doc_freq. 
    OutStream_Write_C32(outstream, doc_freq);

    // Write postings file pointer. 
    OutStream_Write_C64(outstream, tinfo->post_filepos);

    // Write skip file pointer (maybe). 
    if (doc_freq >= self->skip_interval) {
        OutStream_Write_C64(outstream, tinfo->skip_filepos);
    }

    TInfo_Mimic((TermInfo*)self->value, (Obj*)tinfo);
}

void
MatchTInfoStepper_write_delta(MatchTermInfoStepper *self, 
                              OutStream *outstream, Obj *value)
{
    TermInfo *tinfo      = (TermInfo*)CERTIFY(value, TERMINFO);
    TermInfo *last_tinfo = (TermInfo*)self->value;
    int32_t   doc_freq   = TInfo_Get_Doc_Freq(tinfo);
    int64_t   post_delta = tinfo->post_filepos - last_tinfo->post_filepos;

    // Write doc_freq. 
    OutStream_Write_C32(outstream, doc_freq);

    // Write postings file pointer delta. 
    OutStream_Write_C64(outstream, post_delta);

    // Write skip file pointer (maybe). 
    if (doc_freq >= self->skip_interval) {
        OutStream_Write_C64(outstream, tinfo->skip_filepos);
    }

    TInfo_Mimic((TermInfo*)self->value, (Obj*)tinfo);
}

void
MatchTInfoStepper_read_key_frame(MatchTermInfoStepper *self, 
                                 InStream *instream)
{ 
    TermInfo *const tinfo = (TermInfo*)self->value;

    // Read doc freq. 
    tinfo->doc_freq = InStream_Read_C32(instream);

    // Read postings file pointer. 
    tinfo->post_filepos = InStream_Read_C64(instream);

    // Maybe read skip pointer. 
    if (tinfo->doc_freq >= self->skip_interval) {
        tinfo->skip_filepos = InStream_Read_C64(instream);
    }
    else {
        tinfo->skip_filepos = 0;
    }
}

void
MatchTInfoStepper_read_delta(MatchTermInfoStepper *self, InStream *instream)
{ 
    TermInfo *const tinfo = (TermInfo*)self->value;

    // Read doc freq. 
    tinfo->doc_freq = InStream_Read_C32(instream);

    // Adjust postings file pointer. 
    tinfo->post_filepos += InStream_Read_C64(instream);

    // Maybe read skip pointer. 
    if (tinfo->doc_freq >= self->skip_interval) {
        tinfo->skip_filepos = InStream_Read_C64(instream);
    }
    else {
        tinfo->skip_filepos = 0;
    }
}

/* Copyright 2007-2011 Marvin Humphrey
 *
 * This program is free software; you can redistribute it and/or modify
 * under the same terms as Perl itself.
 */