The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define C_LUCY_POLYANALYZER
#include "Lucy/Util/ToolSet.h"

#include "Lucy/Analysis/PolyAnalyzer.h"
#include "Lucy/Analysis/CaseFolder.h"
#include "Lucy/Analysis/Token.h"
#include "Lucy/Analysis/Inversion.h"
#include "Lucy/Analysis/SnowballStemmer.h"
#include "Lucy/Analysis/RegexTokenizer.h"
#include "Lucy/Util/Freezer.h"

PolyAnalyzer*
PolyAnalyzer_new(String *language, VArray *analyzers) {
    PolyAnalyzer *self = (PolyAnalyzer*)Class_Make_Obj(POLYANALYZER);
    return PolyAnalyzer_init(self, language, analyzers);
}

PolyAnalyzer*
PolyAnalyzer_init(PolyAnalyzer *self, String *language,
                  VArray *analyzers) {
    Analyzer_init((Analyzer*)self);
    PolyAnalyzerIVARS *const ivars = PolyAnalyzer_IVARS(self);

    if (analyzers) {
        for (uint32_t i = 0, max = VA_Get_Size(analyzers); i < max; i++) {
            CERTIFY(VA_Fetch(analyzers, i), ANALYZER);
        }
        ivars->analyzers = (VArray*)INCREF(analyzers);
    }
    else if (language) {
        ivars->analyzers = VA_new(3);
        VA_Push(ivars->analyzers, (Obj*)CaseFolder_new());
        VA_Push(ivars->analyzers, (Obj*)RegexTokenizer_new(NULL));
        VA_Push(ivars->analyzers, (Obj*)SnowStemmer_new(language));
    }
    else {
        THROW(ERR, "Must specify either 'language' or 'analyzers'");
    }

    return self;
}

void
PolyAnalyzer_Destroy_IMP(PolyAnalyzer *self) {
    PolyAnalyzerIVARS *const ivars = PolyAnalyzer_IVARS(self);
    DECREF(ivars->analyzers);
    SUPER_DESTROY(self, POLYANALYZER);
}

VArray*
PolyAnalyzer_Get_Analyzers_IMP(PolyAnalyzer *self) {
    return PolyAnalyzer_IVARS(self)->analyzers;
}

Inversion*
PolyAnalyzer_Transform_IMP(PolyAnalyzer *self, Inversion *inversion) {
    VArray *const analyzers = PolyAnalyzer_IVARS(self)->analyzers;
    (void)INCREF(inversion);

    // Iterate through each of the analyzers in order.
    for (uint32_t i = 0, max = VA_Get_Size(analyzers); i < max; i++) {
        Analyzer *analyzer = (Analyzer*)VA_Fetch(analyzers, i);
        Inversion *new_inversion = Analyzer_Transform(analyzer, inversion);
        DECREF(inversion);
        inversion = new_inversion;
    }

    return inversion;
}

Inversion*
PolyAnalyzer_Transform_Text_IMP(PolyAnalyzer *self, String *text) {
    VArray *const   analyzers     = PolyAnalyzer_IVARS(self)->analyzers;
    const uint32_t  num_analyzers = VA_Get_Size(analyzers);
    Inversion      *retval;

    if (num_analyzers == 0) {
        size_t      token_len = Str_Get_Size(text);
        const char *buf       = Str_Get_Ptr8(text);
        Token *seed = Token_new(buf, token_len, 0, token_len, 1.0f, 1);
        retval = Inversion_new(seed);
        DECREF(seed);
    }
    else {
        Analyzer *first_analyzer = (Analyzer*)VA_Fetch(analyzers, 0);
        retval = Analyzer_Transform_Text(first_analyzer, text);
        for (uint32_t i = 1; i < num_analyzers; i++) {
            Analyzer *analyzer = (Analyzer*)VA_Fetch(analyzers, i);
            Inversion *new_inversion = Analyzer_Transform(analyzer, retval);
            DECREF(retval);
            retval = new_inversion;
        }
    }

    return retval;
}

bool
PolyAnalyzer_Equals_IMP(PolyAnalyzer *self, Obj *other) {
    if ((PolyAnalyzer*)other == self)                         { return true; }
    if (!Obj_Is_A(other, POLYANALYZER))                       { return false; }
    PolyAnalyzerIVARS *const ivars = PolyAnalyzer_IVARS(self);
    PolyAnalyzerIVARS *const ovars = PolyAnalyzer_IVARS((PolyAnalyzer*)other);
    if (!VA_Equals(ovars->analyzers, (Obj*)ivars->analyzers)) { return false; }
    return true;
}

Obj*
PolyAnalyzer_Dump_IMP(PolyAnalyzer *self) {
    PolyAnalyzerIVARS *const ivars = PolyAnalyzer_IVARS(self);
    PolyAnalyzer_Dump_t super_dump
        = SUPER_METHOD_PTR(POLYANALYZER, LUCY_PolyAnalyzer_Dump);
    Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH);
    if (ivars->analyzers) {
        Hash_Store_Utf8(dump, "analyzers", 9,
                        Freezer_dump((Obj*)ivars->analyzers));
    }
    return (Obj*)dump;
}

PolyAnalyzer*
PolyAnalyzer_Load_IMP(PolyAnalyzer *self, Obj *dump) {
    Hash *source = (Hash*)CERTIFY(dump, HASH);
    PolyAnalyzer_Load_t super_load 
        = SUPER_METHOD_PTR(POLYANALYZER, LUCY_PolyAnalyzer_Load);
    PolyAnalyzer *loaded = super_load(self, dump);
    VArray *analyzer_dumps
        = (VArray*)CERTIFY(Hash_Fetch_Utf8(source, "analyzers", 9), VARRAY);
    VArray *analyzers
        = (VArray*)CERTIFY(Freezer_load((Obj*)analyzer_dumps), VARRAY);
    PolyAnalyzer_init(loaded, NULL, analyzers);
    DECREF(analyzers);
    return loaded;
}