/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
parcel Lucy;
/** Tokenize/modify/filter text.
*
* An Analyzer is a filter which processes text, transforming it from one form
* into another. For instance, an analyzer might break up a long text into
* smaller pieces (L<RegexTokenizer|Lucy::Analysis::RegexTokenizer>), or it might
* perform case folding to facilitate case-insensitive search
* (L<CaseFolder|Lucy::Analysis::CaseFolder>).
*/
abstract class Lucy::Analysis::Analyzer
inherits Lucy::Object::Obj : dumpable {
public inert Analyzer*
init(Analyzer *self);
/** Take a single L<Inversion|Lucy::Analysis::Inversion> as input
* and returns an Inversion, either the same one (presumably transformed
* in some way), or a new one.
*/
public abstract incremented Inversion*
Transform(Analyzer *self, Inversion *inversion);
/** Kick off an analysis chain, creating an Inversion from string input.
* The default implementation simply creates an initial Inversion with a
* single Token, then calls Transform(), but occasionally subclasses will
* provide an optimized implementation which minimizes string copies.
*/
public incremented Inversion*
Transform_Text(Analyzer *self, CharBuf *text);
/** Analyze text and return an array of token texts.
*/
public incremented VArray*
Split(Analyzer *self, CharBuf *text);
}