The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

parcel Lucy;

/** Multiple Analyzers in series.
 *
 * A PolyAnalyzer is a series of L<Analyzers|Lucy::Analysis::Analyzer>,
 * each of which will be called upon to "analyze" text in turn.  You can
 * either provide the Analyzers yourself, or you can specify a supported
 * language, in which case a PolyAnalyzer consisting of a
 * L<CaseFolder|Lucy::Analysis::CaseFolder>, a
 * L<RegexTokenizer|Lucy::Analysis::RegexTokenizer>, and a
 * L<SnowballStemmer|Lucy::Analysis::SnowballStemmer> will be generated for you.
 *
 * Supported languages:
 *
 *     en => English,
 *     da => Danish,
 *     de => German,
 *     es => Spanish,
 *     fi => Finnish,
 *     fr => French,
 *     hu => Hungarian,
 *     it => Italian,
 *     nl => Dutch,
 *     no => Norwegian,
 *     pt => Portuguese,
 *     ro => Romanian,
 *     ru => Russian,
 *     sv => Swedish,
 *     tr => Turkish,
 */
class Lucy::Analysis::PolyAnalyzer
    inherits Lucy::Analysis::Analyzer : dumpable {

    VArray  *analyzers;

    inert incremented PolyAnalyzer*
    new(const CharBuf *language = NULL, VArray *analyzers = NULL);

    /**
     * @param language An ISO code from the list of supported languages.
     * @param analyzers An array of Analyzers.  The order of the analyzers
     * matters.  Don't put a SnowballStemmer before a RegexTokenizer (can't stem whole
     * documents or paragraphs -- just individual words), or a SnowballStopFilter
     * after a SnowballStemmer (stemmed words, e.g. "themselv", will not appear in a
     * stoplist).  In general, the sequence should be: normalize, tokenize,
     * stopalize, stem.
     */
    public inert PolyAnalyzer*
    init(PolyAnalyzer *self, const CharBuf *language = NULL,
         VArray *analyzers = NULL);

    /** Getter for "analyzers" member.
     */
    public VArray*
    Get_Analyzers(PolyAnalyzer *self);

    public incremented Inversion*
    Transform(PolyAnalyzer *self, Inversion *inversion);

    public incremented Inversion*
    Transform_Text(PolyAnalyzer *self, CharBuf *text);

    public bool_t
    Equals(PolyAnalyzer *self, Obj *other);

    public incremented PolyAnalyzer*
    Load(PolyAnalyzer *self, Obj *dump);

    public void
    Destroy(PolyAnalyzer *self);
}