The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define C_LUCY_INVERTER
#define C_LUCY_INVERTERENTRY
#include "XSBind.h"
#include "Lucy/Index/Inverter.h"
#include "Lucy/Document/Doc.h"
#include "Lucy/Index/Segment.h"
#include "Clownfish/ByteBuf.h"
#include "Lucy/Plan/FieldType.h"
#include "Lucy/Plan/BlobType.h"
#include "Lucy/Plan/NumericType.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Plan/TextType.h"
#include "Clownfish/Util/StringHelper.h"

static lucy_InverterEntry*
S_fetch_entry(lucy_Inverter *self, HE *hash_entry) {
    lucy_InverterIVARS *const ivars = lucy_Inverter_IVARS(self);
    lucy_Schema *const schema = ivars->schema;
    char *key;
    STRLEN key_len;
    STRLEN he_key_len = HeKLEN(hash_entry);

    // Force field name to UTF-8 if necessary.
    if (he_key_len == (STRLEN)HEf_SVKEY) {
        SV *key_sv = HeKEY_sv(hash_entry);
        key = SvPVutf8(key_sv, key_len);
    }
    else {
        key = HeKEY(hash_entry);
        key_len = he_key_len;
        if (!cfish_StrHelp_utf8_valid(key, key_len)) {
            SV *key_sv = HeSVKEY_force(hash_entry);
            key = SvPVutf8(key_sv, key_len);
        }
    }

    cfish_StackString *field = CFISH_SSTR_WRAP_UTF8(key, key_len);
    int32_t field_num
        = LUCY_Seg_Field_Num(ivars->segment, (cfish_String*)field);
    if (!field_num) {
        // This field seems not to be in the segment yet.  Try to find it in
        // the Schema.
        if (LUCY_Schema_Fetch_Type(schema, (cfish_String*)field)) {
            // The field is in the Schema.  Get a field num from the Segment.
            field_num = LUCY_Seg_Add_Field(ivars->segment,
                                           (cfish_String*)field);
        }
        else {
            // We've truly failed to find the field.  The user must
            // not have spec'd it.
            THROW(CFISH_ERR, "Unknown field name: '%s'", key);
        }
    }

    lucy_InverterEntry *entry
        = (lucy_InverterEntry*)CFISH_VA_Fetch(ivars->entry_pool, field_num);
    if (!entry) {
        entry = lucy_InvEntry_new(schema, (cfish_String*)field, field_num);
        CFISH_VA_Store(ivars->entry_pool, field_num, (cfish_Obj*)entry);
    }
    return entry;
}

void
LUCY_Inverter_Invert_Doc_IMP(lucy_Inverter *self, lucy_Doc *doc) {
    HV  *const fields = (HV*)LUCY_Doc_Get_Fields(doc);
    I32  num_keys     = hv_iterinit(fields);

    // Prepare for the new doc.
    LUCY_Inverter_Set_Doc(self, doc);

    // Extract and invert the doc's fields.
    while (num_keys--) {
        HE *hash_entry = hv_iternext(fields);
        lucy_InverterEntry *inv_entry = S_fetch_entry(self, hash_entry);
        SV *value_sv = HeVAL(hash_entry);
        lucy_InverterEntryIVARS *const entry_ivars
            = lucy_InvEntry_IVARS(inv_entry);
        lucy_FieldType *type = entry_ivars->type;

        // Get the field value, forcing text fields to UTF-8.
        switch (LUCY_FType_Primitive_ID(type) & lucy_FType_PRIMITIVE_ID_MASK) {
            case lucy_FType_TEXT: {
                    STRLEN val_len;
                    char *val_ptr = SvPVutf8(value_sv, val_len);
                    CFISH_DECREF(entry_ivars->value);
                    entry_ivars->value
                        = (cfish_Obj*)cfish_Str_new_wrap_trusted_utf8(
                                val_ptr, val_len);
                    break;
                }
            case lucy_FType_BLOB: {
                    STRLEN val_len;
                    char *val_ptr = SvPV(value_sv, val_len);
                    cfish_ViewByteBuf *value
                        = (cfish_ViewByteBuf*)entry_ivars->value;
                    CFISH_ViewBB_Assign_Bytes(value, val_ptr, val_len);
                    break;
                }
            case lucy_FType_INT32: {
                    cfish_Integer32* value = (cfish_Integer32*)entry_ivars->value;
                    CFISH_Int32_Set_Value(value, SvIV(value_sv));
                    break;
                }
            case lucy_FType_INT64: {
                    cfish_Integer64* value = (cfish_Integer64*)entry_ivars->value;
                    int64_t val = sizeof(IV) == 8
                                  ? SvIV(value_sv)
                                  : (int64_t)SvNV(value_sv); // lossy
                    CFISH_Int64_Set_Value(value, val);
                    break;
                }
            case lucy_FType_FLOAT32: {
                    cfish_Float32* value = (cfish_Float32*)entry_ivars->value;
                    CFISH_Float32_Set_Value(value, (float)SvNV(value_sv));
                    break;
                }
            case lucy_FType_FLOAT64: {
                    cfish_Float64* value = (cfish_Float64*)entry_ivars->value;
                    CFISH_Float64_Set_Value(value, SvNV(value_sv));
                    break;
                }
            default:
                THROW(CFISH_ERR, "Unrecognized type: %o", type);
        }

        LUCY_Inverter_Add_Field(self, inv_entry);
    }
}