The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <stdlib.h>

#define TESTLUCY_USE_SHORT_NAMES
#include "Lucy/Util/ToolSet.h"

#include "Lucy/Test/Index/TestSortWriter.h"

#include "Clownfish/TestHarness/TestBatchRunner.h"
#include "Lucy/Analysis/StandardTokenizer.h"
#include "Lucy/Document/Doc.h"
#include "Lucy/Document/HitDoc.h"
#include "Lucy/Index/DocReader.h"
#include "Lucy/Index/Indexer.h"
#include "Lucy/Index/IndexManager.h"
#include "Lucy/Index/PolyReader.h"
#include "Lucy/Index/Segment.h"
#include "Lucy/Index/SegReader.h"
#include "Lucy/Index/SortCache.h"
#include "Lucy/Index/SortReader.h"
#include "Lucy/Index/SortWriter.h"
#include "Lucy/Plan/FullTextType.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Plan/StringType.h"
#include "Lucy/Store/RAMFolder.h"

static String *name_str;
static String *speed_str;
static String *weight_str;
static String *home_str;
static String *cat_str;
static String *wheels_str;
static String *unused_str;
static String *nope_str;

TestSortWriter*
TestSortWriter_new() {
    return (TestSortWriter*)Class_Make_Obj(TESTSORTWRITER);
}

static void
S_init_strings() {
    name_str   = Str_newf("name");
    speed_str  = Str_newf("speed");
    weight_str = Str_newf("weight");
    home_str   = Str_newf("home");
    cat_str    = Str_newf("cat");
    wheels_str = Str_newf("wheels");
    unused_str = Str_newf("unused");
    nope_str   = Str_newf("nope");
}

static void
S_destroy_strings() {
    DECREF(name_str);
    DECREF(speed_str);
    DECREF(weight_str);
    DECREF(home_str);
    DECREF(cat_str);
    DECREF(wheels_str);
    DECREF(unused_str);
    DECREF(nope_str);
}

static Schema*
S_create_schema() {
    Schema *schema = Schema_new();

    StandardTokenizer *tokenizer = StandardTokenizer_new();
    FullTextType *full_text_type = FullTextType_new((Analyzer*)tokenizer);
    FullTextType_Set_Sortable(full_text_type, true);

    StringType *string_type = StringType_new();
    StringType_Set_Sortable(string_type, true);

    StringType *unsortable = StringType_new();

    Schema_Spec_Field(schema, name_str,   (FieldType*)full_text_type);
    Schema_Spec_Field(schema, speed_str,  (FieldType*)string_type);
    Schema_Spec_Field(schema, weight_str, (FieldType*)string_type);
    Schema_Spec_Field(schema, home_str,   (FieldType*)string_type);
    Schema_Spec_Field(schema, cat_str,    (FieldType*)string_type);
    Schema_Spec_Field(schema, wheels_str, (FieldType*)string_type);
    Schema_Spec_Field(schema, unused_str, (FieldType*)string_type);
    Schema_Spec_Field(schema, nope_str,   (FieldType*)unsortable);

    DECREF(unsortable);
    DECREF(string_type);
    DECREF(full_text_type);
    DECREF(tokenizer);

    return schema;
}

static void
S_store_field(Doc *doc, String *field, const char *value) {
    if (value) {
        String *string = SSTR_WRAP_C(value);
        Doc_Store(doc, field, (Obj*)string);
    }
}

static void
S_add_doc(Indexer *indexer, const char *name, const char *speed,
              const char *weight, const char *home, const char *wheels,
              const char *nope) {
    Doc *doc   = Doc_new(NULL, 0);

    S_store_field(doc, name_str,   name);
    S_store_field(doc, speed_str,  speed);
    S_store_field(doc, weight_str, weight);
    S_store_field(doc, home_str,   home);
    S_store_field(doc, cat_str,    "vehicle");
    S_store_field(doc, wheels_str, wheels);
    S_store_field(doc, nope_str,   nope);

    Indexer_Add_Doc(indexer, doc, 1.0f);

    DECREF(doc);
}

static void
S_test_sort_cache(TestBatchRunner *runner, RAMFolder *folder,
                  SegReader *seg_reader, const char *gen, bool is_used,
                  String *field) {
    Segment *segment   = SegReader_Get_Segment(seg_reader);
    int32_t  field_num = Seg_Field_Num(segment, field);
    char    *field_str = Str_To_Utf8(field);
    String  *filename  = Str_newf("seg_%s/sort-%i32.ord", gen, field_num);
    if (is_used) {
        TEST_TRUE(runner, RAMFolder_Exists(folder, filename),
                  "sort files written for %s", field_str);
    }
    else {
        TEST_TRUE(runner, !RAMFolder_Exists(folder, filename),
                  "no sort files written for %s", field_str);
    }
    DECREF(filename);

    if (!is_used) {
        free(field_str);
        return;
    }

    SortReader *sort_reader
        = (SortReader*)SegReader_Obtain(seg_reader,
                                        Class_Get_Name(SORTREADER));
    DocReader *doc_reader
        = (DocReader*)SegReader_Obtain(seg_reader, Class_Get_Name(DOCREADER));
    SortCache *sort_cache
        = SortReader_Fetch_Sort_Cache(sort_reader, field);

    int32_t doc_max = SegReader_Doc_Max(seg_reader);
    for (int32_t doc_id = 1; doc_id <= doc_max; ++doc_id) {
        int32_t  ord         = SortCache_Ordinal(sort_cache, doc_id);
        Obj     *cache_value = SortCache_Value(sort_cache, ord);
        HitDoc  *doc         = DocReader_Fetch_Doc(doc_reader, doc_id);
        Obj     *doc_value   = HitDoc_Extract(doc, field);

        bool is_equal;
        if (cache_value == NULL || doc_value == NULL) {
            is_equal = (cache_value == doc_value);
        }
        else {
            is_equal = Obj_Equals(cache_value, doc_value);
        }
        TEST_TRUE(runner, is_equal, "correct cached value field %s doc %d",
                  field_str, doc_id);

        DECREF(doc_value);
        DECREF(doc);
        DECREF(cache_value);
    }

    free(field_str);
}

static void
test_sort_writer(TestBatchRunner *runner) {
    Schema    *schema  = S_create_schema();
    RAMFolder *folder  = RAMFolder_new(NULL);

    {
        // Add vehicles.
        Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);

        S_add_doc(indexer, "airplane", "0200", "8000", "air", "3", "nyet");
        S_add_doc(indexer, "bike", "0015", "0025", "land", "2", NULL);
        S_add_doc(indexer, "car", "0070", "3000", "land",  "4", NULL);

        Indexer_Commit(indexer);
        DECREF(indexer);
    }

    {
        PolyReader *poly_reader = PolyReader_open((Obj*)folder, NULL, NULL);
        Vector     *seg_readers = PolyReader_Get_Seg_Readers(poly_reader);
        SegReader  *seg_reader  = (SegReader*)Vec_Fetch(seg_readers, 0);

        S_test_sort_cache(runner, folder, seg_reader, "1", true,  name_str);
        S_test_sort_cache(runner, folder, seg_reader, "1", true,  speed_str);
        S_test_sort_cache(runner, folder, seg_reader, "1", true,  weight_str);
        S_test_sort_cache(runner, folder, seg_reader, "1", true,  home_str);
        S_test_sort_cache(runner, folder, seg_reader, "1", true,  cat_str);
        S_test_sort_cache(runner, folder, seg_reader, "1", true,  wheels_str);
        S_test_sort_cache(runner, folder, seg_reader, "1", false, unused_str);
        S_test_sort_cache(runner, folder, seg_reader, "1", false, nope_str);

        DECREF(poly_reader);
    }

    {
        // Add a second segment.
        NonMergingIndexManager *manager = NMIxManager_new();
        Indexer *indexer
            = Indexer_new(schema, (Obj*)folder, (IndexManager*)manager, 0);
        // no "wheels" field -- test NULL/undef
        S_add_doc(indexer, "dirigible", "0040", "0000", "air", NULL, NULL);
        Indexer_Commit(indexer);
        DECREF(indexer);
        DECREF(manager);
    }

    {
        // Consolidate everything, to test merging.
        Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);
        String *bike_str = SSTR_WRAP_C("bike");
        Indexer_Delete_By_Term(indexer, name_str, (Obj*)bike_str);
        // no "wheels" field -- test NULL/undef
        S_add_doc(indexer, "elephant", "0020", "6000", "land", NULL, NULL);
        Indexer_Optimize(indexer);
        Indexer_Commit(indexer);
        DECREF(indexer);
    }

    {
        Vector *filenames = RAMFolder_List_R(folder, NULL);
        int num_old_seg_files = 0;
        for (size_t i = 0, size = Vec_Get_Size(filenames); i < size; ++i) {
            String *filename = (String*)Vec_Fetch(filenames, i);
            if (Str_Contains_Utf8(filename, "seg_1", 5)
                || Str_Contains_Utf8(filename, "seg_2", 5)
               ) {
                ++num_old_seg_files;
            }
        }
        TEST_INT_EQ(runner, num_old_seg_files, 0,
                    "all files from earlier segments zapped");
        DECREF(filenames);
    }

    {
        PolyReader *poly_reader = PolyReader_open((Obj*)folder, NULL, NULL);
        Vector     *seg_readers = PolyReader_Get_Seg_Readers(poly_reader);
        SegReader  *seg_reader  = (SegReader*)Vec_Fetch(seg_readers, 0);

        S_test_sort_cache(runner, folder, seg_reader, "3", true, name_str);
        S_test_sort_cache(runner, folder, seg_reader, "3", true, speed_str);
        S_test_sort_cache(runner, folder, seg_reader, "3", true, weight_str);
        S_test_sort_cache(runner, folder, seg_reader, "3", true, home_str);
        S_test_sort_cache(runner, folder, seg_reader, "3", true, cat_str);
        S_test_sort_cache(runner, folder, seg_reader, "3", true, wheels_str);

        DECREF(poly_reader);
    }

    DECREF(folder);
    DECREF(schema);
}

void
TestSortWriter_Run_IMP(TestSortWriter *self, TestBatchRunner *runner) {
    TestBatchRunner_Plan(runner, (TestBatch*)self, 57);

    // Force frequent flushes.
    SortWriter_set_default_mem_thresh(100);

    S_init_strings();
    test_sort_writer(runner);
    S_destroy_strings();
}

NonMergingIndexManager*
NMIxManager_new() {
    NonMergingIndexManager *self
        = (NonMergingIndexManager*)Class_Make_Obj(NONMERGINGINDEXMANAGER);
    return NMIxManager_init(self);
}

NonMergingIndexManager*
NMIxManager_init(NonMergingIndexManager *self) {
    IxManager_init((IndexManager*)self, NULL, NULL);
    return self;
}

Vector*
NMIxManager_Recycle_IMP(NonMergingIndexManager *self, PolyReader *reader,
                        lucy_DeletionsWriter *del_writer, int64_t cutoff,
                        bool optimize) {
    UNUSED_VAR(self);
    UNUSED_VAR(reader);
    UNUSED_VAR(del_writer);
    UNUSED_VAR(cutoff);
    UNUSED_VAR(optimize);
    return Vec_new(0);
}