The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <ctype.h>

#define C_LUCY_SEGMENT
#include "Lucy/Util/ToolSet.h"

#include "Lucy/Index/Segment.h"
#include "Lucy/Store/Folder.h"
#include "Lucy/Util/Json.h"
#include "Lucy/Util/StringHelper.h"
#include "Lucy/Util/IndexFileNames.h"

Segment*
Seg_new(int64_t number) {
    Segment *self = (Segment*)VTable_Make_Obj(SEGMENT);
    return Seg_init(self, number);
}

Segment*
Seg_init(Segment *self, int64_t number) {
    // Validate.
    if (number < 0) { THROW(ERR, "Segment number %i64 less than 0", number); }

    // Init.
    self->metadata  = Hash_new(0);
    self->count     = 0;
    self->by_num    = VA_new(2);
    self->by_name   = Hash_new(0);

    // Start field numbers at 1, not 0.
    VA_Push(self->by_num, INCREF(&EMPTY));

    // Assign.
    self->number = number;

    // Derive.
    self->name = Seg_num_to_name(number);

    return self;
}

CharBuf*
Seg_num_to_name(int64_t number) {
    char base36[StrHelp_MAX_BASE36_BYTES];
    StrHelp_to_base36(number, &base36);
    return CB_newf("seg_%s", &base36);
}

bool_t
Seg_valid_seg_name(const CharBuf *name) {
    if (CB_Starts_With_Str(name, "seg_", 4)) {
        ZombieCharBuf *scratch = ZCB_WRAP(name);
        ZCB_Nip(scratch, 4);
        uint32_t code_point;
        while (0 != (code_point = ZCB_Nip_One(scratch))) {
            if (!isalnum(code_point)) { return false; }
        }
        if (ZCB_Get_Size(scratch) == 0) { return true; } // Success!
    }
    return false;
}

void
Seg_destroy(Segment *self) {
    DECREF(self->name);
    DECREF(self->metadata);
    DECREF(self->by_name);
    DECREF(self->by_num);
    SUPER_DESTROY(self, SEGMENT);
}

bool_t
Seg_read_file(Segment *self, Folder *folder) {
    CharBuf *filename = CB_newf("%o/segmeta.json", self->name);
    Hash    *metadata = (Hash*)Json_slurp_json(folder, filename);
    Hash    *my_metadata;

    // Bail unless the segmeta file was read successfully.
    DECREF(filename);
    if (!metadata) { return false; }
    CERTIFY(metadata, HASH);

    // Grab metadata for the Segment object itself.
    DECREF(self->metadata);
    self->metadata = metadata;
    my_metadata
        = (Hash*)CERTIFY(Hash_Fetch_Str(self->metadata, "segmeta", 7), HASH);

    // Assign.
    Obj *count = Hash_Fetch_Str(my_metadata, "count", 5);
    if (!count) { count = Hash_Fetch_Str(my_metadata, "doc_count", 9); }
    if (!count) { THROW(ERR, "Missing 'count'"); }
    else { self->count = Obj_To_I64(count); }

    // Get list of field nums.
    uint32_t i;
    VArray *source_by_num = (VArray*)Hash_Fetch_Str(my_metadata,
                                                    "field_names", 11);
    uint32_t num_fields = source_by_num ? VA_Get_Size(source_by_num) : 0;
    if (source_by_num == NULL) {
        THROW(ERR, "Failed to extract 'field_names' from metadata");
    }

    // Init.
    DECREF(self->by_num);
    DECREF(self->by_name);
    self->by_num  = VA_new(num_fields);
    self->by_name = Hash_new(num_fields);

    // Copy the list of fields from the source.
    for (i = 0; i < num_fields; i++) {
        CharBuf *name = (CharBuf*)VA_Fetch(source_by_num, i);
        Seg_Add_Field(self, name);
    }

    return true;
}

void
Seg_write_file(Segment *self, Folder *folder) {
    Hash *my_metadata = Hash_new(16);

    // Store metadata specific to this Segment object.
    Hash_Store_Str(my_metadata, "count", 5,
                   (Obj*)CB_newf("%i64", self->count));
    Hash_Store_Str(my_metadata, "name", 4, (Obj*)CB_Clone(self->name));
    Hash_Store_Str(my_metadata, "field_names", 11, INCREF(self->by_num));
    Hash_Store_Str(my_metadata, "format", 6, (Obj*)CB_newf("%i32", 1));
    Hash_Store_Str(self->metadata, "segmeta", 7, (Obj*)my_metadata);

    CharBuf *filename = CB_newf("%o/segmeta.json", self->name);
    bool_t result = Json_spew_json((Obj*)self->metadata, folder, filename);
    DECREF(filename);
    if (!result) { RETHROW(INCREF(Err_get_error())); }
}

int32_t
Seg_add_field(Segment *self, const CharBuf *field) {
    Integer32 *num = (Integer32*)Hash_Fetch(self->by_name, (Obj*)field);
    if (num) {
        return Int32_Get_Value(num);
    }
    else {
        int32_t field_num = VA_Get_Size(self->by_num);
        Hash_Store(self->by_name, (Obj*)field, (Obj*)Int32_new(field_num));
        VA_Push(self->by_num, (Obj*)CB_Clone(field));
        return field_num;
    }
}

CharBuf*
Seg_get_name(Segment *self) {
    return self->name;
}

int64_t
Seg_get_number(Segment *self) {
    return self->number;
}

void
Seg_set_count(Segment *self, int64_t count) {
    self->count = count;
}

int64_t
Seg_get_count(Segment *self) {
    return self->count;
}

int64_t
Seg_increment_count(Segment *self, int64_t increment) {
    self->count += increment;
    return self->count;
}

void
Seg_store_metadata(Segment *self, const CharBuf *key, Obj *value) {
    if (Hash_Fetch(self->metadata, (Obj*)key)) {
        THROW(ERR, "Metadata key '%o' already registered", key);
    }
    Hash_Store(self->metadata, (Obj*)key, value);
}

void
Seg_store_metadata_str(Segment *self, const char *key, size_t key_len,
                       Obj *value) {
    ZombieCharBuf *k = ZCB_WRAP_STR((char*)key, key_len);
    Seg_Store_Metadata(self, (CharBuf*)k, value);
}

Obj*
Seg_fetch_metadata(Segment *self, const CharBuf *key) {
    return Hash_Fetch(self->metadata, (Obj*)key);
}

Obj*
Seg_fetch_metadata_str(Segment *self, const char *key, size_t len) {
    return Hash_Fetch_Str(self->metadata, key, len);
}

Hash*
Seg_get_metadata(Segment *self) {
    return self->metadata;
}

int32_t
Seg_compare_to(Segment *self, Obj *other) {
    Segment *other_seg = (Segment*)CERTIFY(other, SEGMENT);
    if (self->number <  other_seg->number)      { return -1; }
    else if (self->number == other_seg->number) { return 0;  }
    else                                        { return 1;  }
}

CharBuf*
Seg_field_name(Segment *self, int32_t field_num) {
    return field_num
           ? (CharBuf*)VA_Fetch(self->by_num, field_num)
           : NULL;
}

int32_t
Seg_field_num(Segment *self, const CharBuf *field) {
    if (field == NULL) {
        return 0;
    }
    else {
        Integer32 *num = (Integer32*)Hash_Fetch(self->by_name, (Obj*)field);
        return num ? Int32_Get_Value(num) : 0;
    }
}