The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define C_LUCY_MATCHPOSTING
#define C_LUCY_MATCHPOSTINGMATCHER
#define C_LUCY_MATCHPOSTINGWRITER
#define C_LUCY_MATCHTERMINFOSTEPPER
#define C_LUCY_RAWPOSTING
#define C_LUCY_TERMINFO
#define C_LUCY_TOKEN
#include "Lucy/Util/ToolSet.h"

#include "Lucy/Index/Posting/MatchPosting.h"
#include "Lucy/Analysis/Inversion.h"
#include "Lucy/Analysis/Token.h"
#include "Lucy/Index/Posting/RawPosting.h"
#include "Lucy/Index/PostingList.h"
#include "Lucy/Index/PostingPool.h"
#include "Lucy/Index/PolyReader.h"
#include "Lucy/Index/Segment.h"
#include "Lucy/Index/Similarity.h"
#include "Lucy/Index/Snapshot.h"
#include "Lucy/Index/TermInfo.h"
#include "Lucy/Plan/Architecture.h"
#include "Lucy/Plan/FieldType.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Search/Compiler.h"
#include "Lucy/Store/Folder.h"
#include "Lucy/Store/InStream.h"
#include "Lucy/Store/OutStream.h"
#include "Lucy/Util/MemoryPool.h"

#define MAX_RAW_POSTING_LEN(_raw_post_size, _text_len) \
    (              _raw_post_size \
                   + _text_len + 1            /* term text content */ \
    )

MatchPosting*
MatchPost_new(Similarity *sim) {
    MatchPosting *self = (MatchPosting*)Class_Make_Obj(MATCHPOSTING);
    return MatchPost_init(self, sim);
}

MatchPosting*
MatchPost_init(MatchPosting *self, Similarity *sim) {
    MatchPostingIVARS *const ivars = MatchPost_IVARS(self);
    ivars->sim = (Similarity*)INCREF(sim);
    return (MatchPosting*)Post_init((Posting*)self);
}

void
MatchPost_Destroy_IMP(MatchPosting *self) {
    MatchPostingIVARS *const ivars = MatchPost_IVARS(self);
    DECREF(ivars->sim);
    SUPER_DESTROY(self, MATCHPOSTING);
}

int32_t
MatchPost_Get_Freq_IMP(MatchPosting *self) {
    return (int32_t)MatchPost_IVARS(self)->freq;
}

void
MatchPost_Reset_IMP(MatchPosting *self) {
    MatchPost_IVARS(self)->doc_id = 0;
}

void
MatchPost_Read_Record_IMP(MatchPosting *self, InStream *instream) {
    MatchPostingIVARS *const ivars = MatchPost_IVARS(self);
    const uint32_t doc_code = InStream_Read_CU32(instream);
    const uint32_t doc_delta = doc_code >> 1;

    // Apply delta doc and retrieve freq.
    ivars->doc_id   += doc_delta;
    if (doc_code & 1) {
        ivars->freq = 1;
    }
    else {
        ivars->freq = InStream_Read_CU32(instream);
    }
}

RawPosting*
MatchPost_Read_Raw_IMP(MatchPosting *self, InStream *instream,
                       int32_t last_doc_id, String *term_text,
                       MemoryPool *mem_pool) {
    const char *const text_buf  = Str_Get_Ptr8(term_text);
    const size_t      text_size = Str_Get_Size(term_text);
    const uint32_t    doc_code  = InStream_Read_CU32(instream);
    const uint32_t    delta_doc = doc_code >> 1;
    const int32_t     doc_id    = last_doc_id + (int32_t)delta_doc;
    const uint32_t    freq      = (doc_code & 1)
                                  ? 1
                                  : InStream_Read_CU32(instream);
    const size_t base_size = Class_Get_Obj_Alloc_Size(RAWPOSTING);
    size_t raw_post_bytes  = MAX_RAW_POSTING_LEN(base_size, text_size);
    void *const allocation = MemPool_Grab(mem_pool, raw_post_bytes);
    UNUSED_VAR(self);

    return RawPost_new(allocation, doc_id, freq, text_buf, text_size);
}

void
MatchPost_Add_Inversion_To_Pool_IMP(MatchPosting *self,
                                    PostingPool *post_pool,
                                    Inversion *inversion, FieldType *type,
                                    int32_t doc_id, float doc_boost,
                                    float length_norm) {
    MemoryPool  *mem_pool = PostPool_Get_Mem_Pool(post_pool);
    const size_t base_size = Class_Get_Obj_Alloc_Size(RAWPOSTING);
    Token      **tokens;
    uint32_t     freq;

    UNUSED_VAR(self);
    UNUSED_VAR(type);
    UNUSED_VAR(doc_boost);
    UNUSED_VAR(length_norm);

    Inversion_Reset(inversion);
    while ((tokens = Inversion_Next_Cluster(inversion, &freq)) != NULL) {
        TokenIVARS *const token_ivars = Token_IVARS(*tokens);
        size_t raw_post_bytes
            = MAX_RAW_POSTING_LEN(base_size, token_ivars->len);
        RawPosting *raw_posting
            = RawPost_new(MemPool_Grab(mem_pool, raw_post_bytes), doc_id,
                          freq, token_ivars->text, token_ivars->len);
        PostPool_Feed(post_pool, (Obj*)raw_posting);
    }
}

MatchPostingMatcher*
MatchPost_Make_Matcher_IMP(MatchPosting *self, Similarity *sim,
                           PostingList *plist, Compiler *compiler,
                           bool need_score) {
    MatchPostingMatcher *matcher
        = (MatchPostingMatcher*)Class_Make_Obj(MATCHPOSTINGMATCHER);
    UNUSED_VAR(self);
    UNUSED_VAR(need_score);
    return MatchPostMatcher_init(matcher, sim, plist, compiler);
}

/***************************************************************************/

MatchPostingMatcher*
MatchPostMatcher_init(MatchPostingMatcher *self, Similarity *sim,
                      PostingList *plist, Compiler *compiler) {
    TermMatcher_init((TermMatcher*)self, sim, plist, compiler);
    return self;
}

float
MatchPostMatcher_Score_IMP(MatchPostingMatcher* self) {
    return MatchPostMatcher_IVARS(self)->weight;
}

/***************************************************************************/

MatchPostingWriter*
MatchPostWriter_new(Schema *schema, Snapshot *snapshot, Segment *segment,
                    PolyReader *polyreader, int32_t field_num) {
    MatchPostingWriter *self
        = (MatchPostingWriter*)Class_Make_Obj(MATCHPOSTINGWRITER);
    return MatchPostWriter_init(self, schema, snapshot, segment, polyreader,
                                field_num);
}

MatchPostingWriter*
MatchPostWriter_init(MatchPostingWriter *self, Schema *schema,
                     Snapshot *snapshot, Segment *segment,
                     PolyReader *polyreader, int32_t field_num) {
    Folder  *folder = PolyReader_Get_Folder(polyreader);
    String *filename
        = Str_newf("%o/postings-%i32.dat", Seg_Get_Name(segment), field_num);
    PostWriter_init((PostingWriter*)self, schema, snapshot, segment,
                    polyreader, field_num);
    MatchPostingWriterIVARS *const ivars = MatchPostWriter_IVARS(self);
    ivars->outstream = Folder_Open_Out(folder, filename);
    if (!ivars->outstream) { RETHROW(INCREF(Err_get_error())); }
    DECREF(filename);
    return self;
}

void
MatchPostWriter_Destroy_IMP(MatchPostingWriter *self) {
    MatchPostingWriterIVARS *const ivars = MatchPostWriter_IVARS(self);
    DECREF(ivars->outstream);
    SUPER_DESTROY(self, MATCHPOSTINGWRITER);
}

void
MatchPostWriter_Write_Posting_IMP(MatchPostingWriter *self, RawPosting *posting) {
    MatchPostingWriterIVARS *const ivars = MatchPostWriter_IVARS(self);
    RawPostingIVARS *const posting_ivars = RawPost_IVARS(posting);
    OutStream *const outstream   = ivars->outstream;
    const int32_t    doc_id      = posting_ivars->doc_id;
    const uint32_t   delta_doc   = (uint32_t)(doc_id - ivars->last_doc_id);
    char  *const     aux_content = posting_ivars->blob
                                   + posting_ivars->content_len;
    if (posting_ivars->freq == 1) {
        const uint32_t doc_code = (delta_doc << 1) | 1;
        OutStream_Write_CU32(outstream, doc_code);
    }
    else {
        const uint32_t doc_code = delta_doc << 1;
        OutStream_Write_CU32(outstream, doc_code);
        OutStream_Write_CU32(outstream, posting_ivars->freq);
    }
    OutStream_Write_Bytes(outstream, aux_content, posting_ivars->aux_len);
    ivars->last_doc_id = doc_id;
}

void
MatchPostWriter_Start_Term_IMP(MatchPostingWriter *self, TermInfo *tinfo) {
    MatchPostingWriterIVARS *const ivars = MatchPostWriter_IVARS(self);
    TermInfoIVARS *const tinfo_ivars = TInfo_IVARS(tinfo);
    ivars->last_doc_id   = 0;
    tinfo_ivars->post_filepos = OutStream_Tell(ivars->outstream);
}

void
MatchPostWriter_Update_Skip_Info_IMP(MatchPostingWriter *self, TermInfo *tinfo) {
    MatchPostingWriterIVARS *const ivars = MatchPostWriter_IVARS(self);
    TermInfoIVARS *const tinfo_ivars = TInfo_IVARS(tinfo);
    tinfo_ivars->post_filepos = OutStream_Tell(ivars->outstream);
}

/***************************************************************************/

MatchTermInfoStepper*
MatchTInfoStepper_new(Schema *schema) {
    MatchTermInfoStepper *self
        = (MatchTermInfoStepper*)Class_Make_Obj(MATCHTERMINFOSTEPPER);
    return MatchTInfoStepper_init(self, schema);
}

MatchTermInfoStepper*
MatchTInfoStepper_init(MatchTermInfoStepper *self, Schema *schema) {
    Architecture *arch = Schema_Get_Architecture(schema);
    TermStepper_init((TermStepper*)self);
    MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self);
    ivars->skip_interval = Arch_Skip_Interval(arch);
    ivars->value = (Obj*)TInfo_new(0);
    return self;
}

void
MatchTInfoStepper_Reset_IMP(MatchTermInfoStepper *self) {
    MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self);
    TInfo_Reset((TermInfo*)ivars->value);
}

void
MatchTInfoStepper_Write_Key_Frame_IMP(MatchTermInfoStepper *self,
                                      OutStream *outstream, Obj *value) {
    MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self);
    TermInfo *tinfo    = (TermInfo*)CERTIFY(value, TERMINFO);
    int32_t   doc_freq = TInfo_Get_Doc_Freq(tinfo);
    TermInfoIVARS *const tinfo_ivars = TInfo_IVARS((TermInfo*)value);

    // Write doc_freq.
    OutStream_Write_CI32(outstream, doc_freq);

    // Write postings file pointer.
    OutStream_Write_CI64(outstream, tinfo_ivars->post_filepos);

    // Write skip file pointer (maybe).
    if (doc_freq >= ivars->skip_interval) {
        OutStream_Write_CI64(outstream, tinfo_ivars->skip_filepos);
    }

    TInfo_Mimic((TermInfo*)ivars->value, (Obj*)tinfo);
}

void
MatchTInfoStepper_Write_Delta_IMP(MatchTermInfoStepper *self,
                                  OutStream *outstream, Obj *value) {
    MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self);
    TermInfo *tinfo      = (TermInfo*)CERTIFY(value, TERMINFO);
    TermInfo *last_tinfo = (TermInfo*)ivars->value;
    int32_t   doc_freq   = TInfo_Get_Doc_Freq(tinfo);
    int64_t   post_delta = TInfo_IVARS(tinfo)->post_filepos
                           - TInfo_IVARS(last_tinfo)->post_filepos;

    // Write doc_freq.
    OutStream_Write_CI32(outstream, doc_freq);

    // Write postings file pointer delta.
    OutStream_Write_CI64(outstream, post_delta);

    // Write skip file pointer (maybe).
    if (doc_freq >= ivars->skip_interval) {
        OutStream_Write_CI64(outstream, TInfo_IVARS(tinfo)->skip_filepos);
    }

    TInfo_Mimic((TermInfo*)ivars->value, (Obj*)tinfo);
}

void
MatchTInfoStepper_Read_Key_Frame_IMP(MatchTermInfoStepper *self,
                                     InStream *instream) {
    MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self);
    TermInfoIVARS *const tinfo_ivars = TInfo_IVARS((TermInfo*)ivars->value);

    // Read doc freq.
    tinfo_ivars->doc_freq = InStream_Read_CI32(instream);

    // Read postings file pointer.
    tinfo_ivars->post_filepos = InStream_Read_CI64(instream);

    // Maybe read skip pointer.
    if (tinfo_ivars->doc_freq >= ivars->skip_interval) {
        tinfo_ivars->skip_filepos = InStream_Read_CI64(instream);
    }
    else {
        tinfo_ivars->skip_filepos = 0;
    }
}

void
MatchTInfoStepper_Read_Delta_IMP(MatchTermInfoStepper *self, InStream *instream) {
    MatchTermInfoStepperIVARS *const ivars = MatchTInfoStepper_IVARS(self);
    TermInfoIVARS *const tinfo_ivars = TInfo_IVARS((TermInfo*)ivars->value);

    // Read doc freq.
    tinfo_ivars->doc_freq = InStream_Read_CI32(instream);

    // Adjust postings file pointer.
    tinfo_ivars->post_filepos += InStream_Read_CI64(instream);

    // Maybe read skip pointer.
    if (tinfo_ivars->doc_freq >= ivars->skip_interval) {
        tinfo_ivars->skip_filepos = InStream_Read_CI64(instream);
    }
    else {
        tinfo_ivars->skip_filepos = 0;
    }
}