The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define C_LUCY_INVERTER
#define C_LUCY_ZOMBIECHARBUF
#define C_LUCY_INVERTERENTRY
#include "XSBind.h"
#include "Lucy/Index/Inverter.h"
#include "Lucy/Document/Doc.h"
#include "Lucy/Index/Segment.h"
#include "Lucy/Object/ByteBuf.h"
#include "Lucy/Plan/FieldType.h"
#include "Lucy/Plan/BlobType.h"
#include "Lucy/Plan/NumericType.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Plan/TextType.h"
#include "Lucy/Util/StringHelper.h"

static lucy_InverterEntry*
S_fetch_entry(lucy_Inverter *self, HE *hash_entry) {
    lucy_Schema *const schema = self->schema;
    char *key;
    STRLEN key_len;
    STRLEN he_key_len = HeKLEN(hash_entry);

    // Force field name to UTF-8 if necessary.
    if (he_key_len == (STRLEN)HEf_SVKEY) {
        SV *key_sv = HeKEY_sv(hash_entry);
        key = SvPVutf8(key_sv, key_len);
    }
    else {
        key = HeKEY(hash_entry);
        key_len = he_key_len;
        if (!lucy_StrHelp_utf8_valid(key, key_len)) {
            SV *key_sv = HeSVKEY_force(hash_entry);
            key = SvPVutf8(key_sv, key_len);
        }
    }

    lucy_ZombieCharBuf *field = CFISH_ZCB_WRAP_STR(key, key_len);
    int32_t field_num
        = Lucy_Seg_Field_Num(self->segment, (lucy_CharBuf*)field);
    if (!field_num) {
        // This field seems not to be in the segment yet.  Try to find it in
        // the Schema.
        if (Lucy_Schema_Fetch_Type(schema, (lucy_CharBuf*)field)) {
            // The field is in the Schema.  Get a field num from the Segment.
            field_num = Lucy_Seg_Add_Field(self->segment,
                                           (lucy_CharBuf*)field);
        }
        else {
            // We've truly failed to find the field.  The user must
            // not have spec'd it.
            THROW(LUCY_ERR, "Unknown field name: '%s'", key);
        }
    }

    lucy_InverterEntry *entry
        = (lucy_InverterEntry*)Lucy_VA_Fetch(self->entry_pool, field_num);
    if (!entry) {
        entry = lucy_InvEntry_new(schema, (lucy_CharBuf*)field, field_num);
        Lucy_VA_Store(self->entry_pool, field_num, (lucy_Obj*)entry);
    }
    return entry;
}

void
lucy_Inverter_invert_doc(lucy_Inverter *self, lucy_Doc *doc) {
    HV  *const fields = (HV*)Lucy_Doc_Get_Fields(doc);
    I32  num_keys     = hv_iterinit(fields);

    // Prepare for the new doc.
    Lucy_Inverter_Set_Doc(self, doc);

    // Extract and invert the doc's fields.
    while (num_keys--) {
        HE *hash_entry = hv_iternext(fields);
        lucy_InverterEntry *inv_entry = S_fetch_entry(self, hash_entry);
        SV *value_sv = HeVAL(hash_entry);
        lucy_FieldType *type = inv_entry->type;

        // Get the field value, forcing text fields to UTF-8.
        switch (Lucy_FType_Primitive_ID(type) & lucy_FType_PRIMITIVE_ID_MASK) {
            case lucy_FType_TEXT: {
                    STRLEN val_len;
                    char *val_ptr = SvPVutf8(value_sv, val_len);
                    lucy_ViewCharBuf *value
                        = (lucy_ViewCharBuf*)inv_entry->value;
                    Lucy_ViewCB_Assign_Str(value, val_ptr, val_len);
                    break;
                }
            case lucy_FType_BLOB: {
                    STRLEN val_len;
                    char *val_ptr = SvPV(value_sv, val_len);
                    lucy_ViewByteBuf *value
                        = (lucy_ViewByteBuf*)inv_entry->value;
                    Lucy_ViewBB_Assign_Bytes(value, val_ptr, val_len);
                    break;
                }
            case lucy_FType_INT32: {
                    lucy_Integer32* value = (lucy_Integer32*)inv_entry->value;
                    Lucy_Int32_Set_Value(value, SvIV(value_sv));
                    break;
                }
            case lucy_FType_INT64: {
                    lucy_Integer64* value = (lucy_Integer64*)inv_entry->value;
                    int64_t val = sizeof(IV) == 8
                                  ? SvIV(value_sv)
                                  : (int64_t)SvNV(value_sv); // lossy
                    Lucy_Int64_Set_Value(value, val);
                    break;
                }
            case lucy_FType_FLOAT32: {
                    lucy_Float32* value = (lucy_Float32*)inv_entry->value;
                    Lucy_Float32_Set_Value(value, (float)SvNV(value_sv));
                    break;
                }
            case lucy_FType_FLOAT64: {
                    lucy_Float64* value = (lucy_Float64*)inv_entry->value;
                    Lucy_Float64_Set_Value(value, SvNV(value_sv));
                    break;
                }
            default:
                THROW(LUCY_ERR, "Unrecognized type: %o", type);
        }

        Lucy_Inverter_Add_Field(self, inv_entry);
    }
}