The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

parcel Lucy;

/** Tokenize/modify/filter text.
 *
 * An Analyzer is a filter which processes text, transforming it from one form
 * into another.  For instance, an analyzer might break up a long text into
 * smaller pieces (L<RegexTokenizer|Lucy::Analysis::RegexTokenizer>), or it might
 * perform case folding to facilitate case-insensitive search
 * (L<CaseFolder|Lucy::Analysis::CaseFolder>).
 */
abstract class Lucy::Analysis::Analyzer
    inherits Lucy::Object::Obj : dumpable {

    public inert Analyzer*
    init(Analyzer *self);

    /** Take a single L<Inversion|Lucy::Analysis::Inversion> as input
     * and returns an Inversion, either the same one (presumably transformed
     * in some way), or a new one.
     */
    public abstract incremented Inversion*
    Transform(Analyzer *self, Inversion *inversion);

    /** Kick off an analysis chain, creating an Inversion from string input.
     * The default implementation simply creates an initial Inversion with a
     * single Token, then calls Transform(), but occasionally subclasses will
     * provide an optimized implementation which minimizes string copies.
     */
    public incremented Inversion*
    Transform_Text(Analyzer *self, CharBuf *text);

    /** Analyze text and return an array of token texts.
     */
    public incremented VArray*
    Split(Analyzer *self, CharBuf *text);
}