The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

parcel Lucy;

/** Suppress a "stoplist" of common words.
 *
 * A "stoplist" is collection of "stopwords": words which are common enough to
 * be of little value when determining search results.  For example, so many
 * documents in English contain "the", "if", and "maybe" that it may improve
 * both performance and relevance to block them.
 *
 * Before filtering stopwords:
 *
 *     ("i", "am", "the", "walrus")
 *
 * After filtering stopwords:
 *
 *     ("walrus")
 *
 * SnowballStopFilter provides default stoplists for several languages, courtesy of
 * the Snowball project (<http://snowball.tartarus.org>), or you may supply
 * your own.
 *
 *     |-----------------------|
 *     | ISO CODE | LANGUAGE   |
 *     |-----------------------|
 *     | da       | Danish     |
 *     | de       | German     |
 *     | en       | English    |
 *     | es       | Spanish    |
 *     | fi       | Finnish    |
 *     | fr       | French     |
 *     | hu       | Hungarian  |
 *     | it       | Italian    |
 *     | nl       | Dutch      |
 *     | no       | Norwegian  |
 *     | pt       | Portuguese |
 *     | sv       | Swedish    |
 *     | ru       | Russian    |
 *     |-----------------------|
 */

public class Lucy::Analysis::SnowballStopFilter nickname SnowStop
    inherits Lucy::Analysis::Analyzer {

    Hash *stoplist;

    inert const uint8_t** snow_da;
    inert const uint8_t** snow_de;
    inert const uint8_t** snow_en;
    inert const uint8_t** snow_es;
    inert const uint8_t** snow_fi;
    inert const uint8_t** snow_fr;
    inert const uint8_t** snow_hu;
    inert const uint8_t** snow_it;
    inert const uint8_t** snow_nl;
    inert const uint8_t** snow_no;
    inert const uint8_t** snow_pt;
    inert const uint8_t** snow_ru;
    inert const uint8_t** snow_sv;

    inert incremented SnowballStopFilter*
    new(String *language = NULL, Hash *stoplist = NULL);

    /**
     * @param stoplist A hash with stopwords as the keys.
     * @param language The ISO code for a supported language.
     */
    public inert SnowballStopFilter*
    init(SnowballStopFilter *self, String *language = NULL,
         Hash *stoplist = NULL);

    /** Return a Hash with the Snowball stoplist for the supplied language.
     */
    inert incremented Hash*
    gen_stoplist(String *language);

    public incremented Inversion*
    Transform(SnowballStopFilter *self, Inversion *inversion);

    public bool
    Equals(SnowballStopFilter *self, Obj *other);

    public incremented Obj*
    Dump(SnowballStopFilter *self);

    public incremented Obj*
    Load(SnowballStopFilter *self, Obj *dump);

    public void
    Destroy(SnowballStopFilter *self);
}