The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

parcel Lucy;

/** Tokenize/modify/filter text.
 *
 * An Analyzer is a filter which processes text, transforming it from one form
 * into another.  For instance, an analyzer might break up a long text into
 * smaller pieces ([](cfish:RegexTokenizer)), or it
 * might perform case folding to facilitate case-insensitive search
 * ([](cfish:Normalizer)).
 */
public abstract class Lucy::Analysis::Analyzer inherits Clownfish::Obj {

    /** Abstract initializer.
     */
    public inert Analyzer*
    init(Analyzer *self);

    /** Take a single [](cfish:Inversion) as input
     * and returns an Inversion, either the same one (presumably transformed
     * in some way), or a new one.
     *
     * @param inversion An inversion.
     */
    public abstract incremented Inversion*
    Transform(Analyzer *self, Inversion *inversion);

    /** Kick off an analysis chain, creating an Inversion from string input.
     * The default implementation simply creates an initial Inversion with a
     * single Token, then calls [](cfish:.Transform), but occasionally subclasses will
     * provide an optimized implementation which minimizes string copies.
     *
     * @param text A string.
     */
    public incremented Inversion*
    Transform_Text(Analyzer *self, String *text);

    /** Analyze text and return an array of token texts.
     *
     * @param text A string.
     */
    public incremented Vector*
    Split(Analyzer *self, String *text);

    /** Dump the analyzer as hash.
     *
     * Subclasses should call [](.Dump) on the superclass. The returned
     * object is a hash which should be populated with parameters of
     * the analyzer.
     *
     * @return A hash containing a description of the analyzer.
     */
    public incremented Obj*
    Dump(Analyzer *self);

    /** Reconstruct an analyzer from a dump.
     *
     * Subclasses should first call [](.Load) on the superclass. The
     * returned object is an analyzer which should be reconstructed by
     * setting the dumped parameters from the hash contained in `dump`.
     *
     * Note that the invocant analyzer is unused.
     *
     * @param dump A hash.
     * @return An analyzer.
     */
    public incremented Obj*
    Load(Analyzer *self, Obj *dump);
}