parcel KinoSearch cnick Kino;
/** Multiple Analyzers in series.
*
* A PolyAnalyzer is a series of L<Analyzers|KinoSearch::Analysis::Analyzer>,
* each of which will be called upon to "analyze" text in turn. You can
* either provide the Analyzers yourself, or you can specify a supported
* language, in which case a PolyAnalyzer consisting of a
* L<CaseFolder|KinoSearch::Analysis::CaseFolder>, a
* L<Tokenizer|KinoSearch::Analysis::Tokenizer>, and a
* L<Stemmer|KinoSearch::Analysis::Stemmer> will be generated for you.
*
* Supported languages:
*
* en => English,
* da => Danish,
* de => German,
* es => Spanish,
* fi => Finnish,
* fr => French,
* hu => Hungarian,
* it => Italian,
* nl => Dutch,
* no => Norwegian,
* pt => Portuguese,
* ro => Romanian,
* ru => Russian,
* sv => Swedish,
* tr => Turkish,
*/
class KinoSearch::Analysis::PolyAnalyzer
inherits KinoSearch::Analysis::Analyzer : dumpable {
VArray *analyzers;
inert incremented PolyAnalyzer*
new(const CharBuf *language = NULL, VArray *analyzers = NULL);
/**
* @param language An ISO code from the list of supported languages.
* @param analyzers An array of Analyzers. The order of the analyzers
* matters. Don't put a Stemmer before a Tokenizer (can't stem whole
* documents or paragraphs -- just individual words), or a Stopalizer
* after a Stemmer (stemmed words, e.g. "themselv", will not appear in a
* stoplist). In general, the sequence should be: normalize, tokenize,
* stopalize, stem.
*/
public inert PolyAnalyzer*
init(PolyAnalyzer *self, const CharBuf *language = NULL,
VArray *analyzers = NULL);
/** Getter for "analyzers" member.
*/
public VArray*
Get_Analyzers(PolyAnalyzer *self);
public incremented Inversion*
Transform(PolyAnalyzer *self, Inversion *inversion);
public incremented Inversion*
Transform_Text(PolyAnalyzer *self, CharBuf *text);
public bool_t
Equals(PolyAnalyzer *self, Obj *other);
public incremented PolyAnalyzer*
Load(PolyAnalyzer *self, Obj *dump);
public void
Destroy(PolyAnalyzer *self);
}
/* Copyright 2005-2011 Marvin Humphrey
*
* This program is free software; you can redistribute it and/or modify
* under the same terms as Perl itself.
*/