The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define C_LUCY_QUERYPARSER
#include <stdlib.h>
#include <ctype.h>
#include "Lucy/Util/ToolSet.h"

#include "Lucy/Search/QueryParser.h"

#include "Clownfish/CharBuf.h"
#include "Lucy/Search/QueryParser/ParserElem.h"
#include "Lucy/Search/QueryParser/QueryLexer.h"
#include "Lucy/Analysis/Analyzer.h"
#include "Lucy/Plan/FieldType.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Search/LeafQuery.h"
#include "Lucy/Search/ANDQuery.h"
#include "Lucy/Search/MatchAllQuery.h"
#include "Lucy/Search/NoMatchQuery.h"
#include "Lucy/Search/NOTQuery.h"
#include "Lucy/Search/ORQuery.h"
#include "Lucy/Search/PhraseQuery.h"
#include "Lucy/Search/RequiredOptionalQuery.h"
#include "Lucy/Search/TermQuery.h"
#include "Lucy/Search/Query.h"

#define SHOULD            LUCY_QPARSER_SHOULD
#define MUST              LUCY_QPARSER_MUST
#define MUST_NOT          LUCY_QPARSER_MUST_NOT
#define TOKEN_OPEN_PAREN  LUCY_QPARSER_TOKEN_OPEN_PAREN
#define TOKEN_CLOSE_PAREN LUCY_QPARSER_TOKEN_CLOSE_PAREN
#define TOKEN_MINUS       LUCY_QPARSER_TOKEN_MINUS
#define TOKEN_PLUS        LUCY_QPARSER_TOKEN_PLUS
#define TOKEN_NOT         LUCY_QPARSER_TOKEN_NOT
#define TOKEN_OR          LUCY_QPARSER_TOKEN_OR
#define TOKEN_AND         LUCY_QPARSER_TOKEN_AND
#define TOKEN_FIELD       LUCY_QPARSER_TOKEN_FIELD
#define TOKEN_STRING      LUCY_QPARSER_TOKEN_STRING
#define TOKEN_QUERY       LUCY_QPARSER_TOKEN_QUERY

// Helper function for Tree().
static Query*
S_parse_subquery(QueryParser *self, VArray *elems, String *default_field,
                 bool enclosed);

// Drop unmatched right parens and add matching right parens at end to
// close paren groups implicitly.
static void
S_balance_parens(QueryParser *self, VArray *elems);

// Work from the inside out, reducing the leftmost, innermost paren groups
// first, until the array of elems contains no parens.
static void
S_parse_subqueries(QueryParser *self, VArray *elems);

static void
S_compose_inner_queries(QueryParser *self, VArray *elems,
                        String *default_field);

// Apply +, -, NOT.
static void
S_apply_plusses_and_negations(QueryParser *self, VArray *elems);

// Wrap negated queries with NOTQuery objects.
static void
S_compose_not_queries(QueryParser *self, VArray *elems);

// Silently discard non-sensical combos of AND and OR, e.g.
// 'OR a AND AND OR b AND'.
static void
S_winnow_boolops(QueryParser *self, VArray *elems);

// Join ANDQueries.
static void
S_compose_and_queries(QueryParser *self, VArray *elems);

// Join ORQueries.
static void
S_compose_or_queries(QueryParser *self, VArray *elems);

// Derive a single subquery from all Query objects in the clause.
static Query*
S_compose_subquery(QueryParser *self, VArray *elems, bool enclosed);

QueryParser*
QParser_new(Schema *schema, Analyzer *analyzer, String *default_boolop,
            VArray *fields) {
    QueryParser *self = (QueryParser*)Class_Make_Obj(QUERYPARSER);
    return QParser_init(self, schema, analyzer, default_boolop, fields);
}

QueryParser*
QParser_init(QueryParser *self, Schema *schema, Analyzer *analyzer,
             String *default_boolop, VArray *fields) {
    QueryParserIVARS *const ivars = QParser_IVARS(self);
    // Init.
    ivars->heed_colons = false;
    ivars->lexer       = QueryLexer_new();

    // Assign.
    ivars->schema         = (Schema*)INCREF(schema);
    ivars->analyzer       = (Analyzer*)INCREF(analyzer);
    ivars->default_boolop = default_boolop
                           ? Str_Clone(default_boolop)
                           : Str_new_from_trusted_utf8("OR", 2);

    if (fields) {
        ivars->fields = VA_Shallow_Copy(fields);
        for (uint32_t i = 0, max = VA_Get_Size(fields); i < max; i++) {
            CERTIFY(VA_Fetch(fields, i), STRING);
        }
        VA_Sort(ivars->fields, NULL, NULL);
    }
    else {
        VArray *all_fields = Schema_All_Fields(schema);
        uint32_t num_fields = VA_Get_Size(all_fields);
        ivars->fields = VA_new(num_fields);
        for (uint32_t i = 0; i < num_fields; i++) {
            String *field = (String*)VA_Fetch(all_fields, i);
            FieldType *type = Schema_Fetch_Type(schema, field);
            if (type && FType_Indexed(type)) {
                VA_Push(ivars->fields, INCREF(field));
            }
        }
        DECREF(all_fields);
    }
    VA_Sort(ivars->fields, NULL, NULL);

    // Derive default "occur" from default boolean operator.
    if (Str_Equals_Utf8(ivars->default_boolop, "OR", 2)) {
        ivars->default_occur = SHOULD;
    }
    else if (Str_Equals_Utf8(ivars->default_boolop, "AND", 3)) {
        ivars->default_occur = MUST;
    }
    else {
        THROW(ERR, "Invalid value for default_boolop: %o", ivars->default_boolop);
    }

    return self;
}

void
QParser_Destroy_IMP(QueryParser *self) {
    QueryParserIVARS *const ivars = QParser_IVARS(self);
    DECREF(ivars->schema);
    DECREF(ivars->analyzer);
    DECREF(ivars->default_boolop);
    DECREF(ivars->fields);
    DECREF(ivars->lexer);
    SUPER_DESTROY(self, QUERYPARSER);
}

Analyzer*
QParser_Get_Analyzer_IMP(QueryParser *self) {
    return QParser_IVARS(self)->analyzer;
}

Schema*
QParser_Get_Schema_IMP(QueryParser *self) {
    return QParser_IVARS(self)->schema;
}

String*
QParser_Get_Default_BoolOp_IMP(QueryParser *self) {
    return QParser_IVARS(self)->default_boolop;
}

VArray*
QParser_Get_Fields_IMP(QueryParser *self) {
    return QParser_IVARS(self)->fields;
}

bool
QParser_Heed_Colons_IMP(QueryParser *self) {
    return QParser_IVARS(self)->heed_colons;
}

void
QParser_Set_Heed_Colons_IMP(QueryParser *self, bool heed_colons) {
    QueryParserIVARS *const ivars = QParser_IVARS(self);
    ivars->heed_colons = heed_colons;
    QueryLexer_Set_Heed_Colons(ivars->lexer, heed_colons);
}


Query*
QParser_Parse_IMP(QueryParser *self, String *query_string) {
    String *qstring = query_string
                      ? Str_Clone(query_string)
                      : Str_new_from_trusted_utf8("", 0);
    Query *tree     = QParser_Tree(self, qstring);
    Query *expanded = QParser_Expand(self, tree);
    Query *pruned   = QParser_Prune(self, expanded);
    DECREF(expanded);
    DECREF(tree);
    DECREF(qstring);
    return pruned;
}

Query*
QParser_Tree_IMP(QueryParser *self, String *query_string) {
    QueryParserIVARS *const ivars = QParser_IVARS(self);
    VArray *elems = QueryLexer_Tokenize(ivars->lexer, query_string);
    S_balance_parens(self, elems);
    S_parse_subqueries(self, elems);
    Query *query = S_parse_subquery(self, elems, NULL, false);
    DECREF(elems);
    return query;
}

static void
S_parse_subqueries(QueryParser *self, VArray *elems) {
    const int32_t default_occur = QParser_IVARS(self)->default_occur;
    while (1) {
        // Work from the inside out, starting with the leftmost innermost
        // paren group.
        size_t left = SIZE_MAX;
        size_t right = SIZE_MAX;
        String *field = NULL;
        for (size_t i = 0, max = VA_Get_Size(elems); i < max; i++) {
            ParserElem *elem = (ParserElem*)VA_Fetch(elems, i);
            uint32_t type = ParserElem_Get_Type(elem);
            if (type == TOKEN_OPEN_PAREN) {
                left = i;
            }
            else if (type == TOKEN_CLOSE_PAREN) {
                right = i;
                break;
            }
            else if (type == TOKEN_FIELD && i < max - 1) {
                // If a field applies to an enclosing paren, pass it along.
                ParserElem *next_elem = (ParserElem*)VA_Fetch(elems, i + 1);
                uint32_t next_type = ParserElem_Get_Type(next_elem);
                if (next_type == TOKEN_OPEN_PAREN) {
                    field = (String*)ParserElem_As(elem, STRING);
                }
            }
        }

        // Break out of loop when there are no parens left.
        if (right == SIZE_MAX) {
            break;
        }

        // Create the subquery.
        VArray *sub_elems = VA_Slice(elems, left + 1, right - left - 1);
        Query *subquery = S_parse_subquery(self, sub_elems, field, true);
        ParserElem *new_elem = ParserElem_new(TOKEN_QUERY, (Obj*)subquery);
        if (default_occur == MUST) {
            ParserElem_Require(new_elem);
        }
        DECREF(sub_elems);

        // Replace the elements used to create the subquery with the subquery
        // itself.
        if (left > 0) {
            ParserElem *maybe_field = (ParserElem*)VA_Fetch(elems, left - 1);
            uint32_t maybe_field_type = ParserElem_Get_Type(maybe_field);
            if (maybe_field_type == TOKEN_FIELD) {
                left -= 1;
            }
        }
        VA_Excise(elems, left + 1, right - left);
        VA_Store(elems, left, (Obj*)new_elem);
    }
}

static void
S_discard_elems(VArray *elems, uint32_t type) {
    for (size_t i = VA_Get_Size(elems); i--;) {
        ParserElem *elem = (ParserElem*)VA_Fetch(elems, i);
        if (ParserElem_Get_Type(elem) == type) { VA_Excise(elems, i, 1); }
    }
}

static Query*
S_parse_subquery(QueryParser *self, VArray *elems, String *default_field,
                 bool enclosed) {
    if (VA_Get_Size(elems)) {
        ParserElem *first = (ParserElem*)VA_Fetch(elems, 0);
        if (ParserElem_Get_Type(first) == TOKEN_OPEN_PAREN) {
            enclosed = true;
            DECREF(VA_Shift(elems));
            DECREF(VA_Pop(elems));
        }
    }
    S_compose_inner_queries(self, elems, default_field);
    S_discard_elems(elems, TOKEN_FIELD);
    S_discard_elems(elems, TOKEN_STRING);
    S_apply_plusses_and_negations(self, elems);
    S_discard_elems(elems, TOKEN_PLUS);
    S_discard_elems(elems, TOKEN_MINUS);
    S_discard_elems(elems, TOKEN_NOT);
    S_compose_not_queries(self, elems);
    S_winnow_boolops(self, elems);
    if (VA_Get_Size(elems) > 2) {
        S_compose_and_queries(self, elems);
        // Don't double wrap '(a AND b)'.
        if (VA_Get_Size(elems) == 1) { enclosed = false; }
    }
    S_discard_elems(elems, TOKEN_AND);
    if (VA_Get_Size(elems) > 2) {
        S_compose_or_queries(self, elems);
        // Don't double wrap '(a OR b)'.
        if (VA_Get_Size(elems) == 1) { enclosed = false; }
    }
    S_discard_elems(elems, TOKEN_OR);
    Query *retval = S_compose_subquery(self, elems, enclosed);

    return retval;
}

static void
S_balance_parens(QueryParser *self, VArray *elems) {
    UNUSED_VAR(self);
    // Count paren balance, eliminate unbalanced right parens.
    int64_t paren_depth = 0;
    size_t i = 0;
    while (i < VA_Get_Size(elems)) {
        ParserElem *elem = (ParserElem*)VA_Fetch(elems, i);
        if (ParserElem_Get_Type(elem) == TOKEN_OPEN_PAREN) {
            paren_depth++;
        }
        else if (ParserElem_Get_Type(elem) == TOKEN_CLOSE_PAREN) {
            if (paren_depth > 0) {
                paren_depth--;
            }
            else {
                VA_Excise(elems, i, 1);
                continue;
            }
        }
        i++;
    }

    // Insert implicit parens.
    while (paren_depth--) {
        ParserElem *elem = ParserElem_new(TOKEN_CLOSE_PAREN, NULL);
        VA_Push(elems, (Obj*)elem);
    }
}

static void
S_compose_inner_queries(QueryParser *self, VArray *elems,
                        String *default_field) {
    const int32_t default_occur = QParser_IVARS(self)->default_occur;

    // Generate all queries.  Apply any fields.
    for (uint32_t i = VA_Get_Size(elems); i--;) {
        String *field = default_field;
        ParserElem *elem = (ParserElem*)VA_Fetch(elems, i);

        // Apply field.
        if (i > 0) {
            // Field specifier must immediately precede any query.
            ParserElem* maybe_field_elem
                = (ParserElem*)VA_Fetch(elems, i - 1);
            if (ParserElem_Get_Type(maybe_field_elem) == TOKEN_FIELD) {
                field = (String*)ParserElem_As(maybe_field_elem, STRING);
            }
        }

        if (ParserElem_Get_Type(elem) == TOKEN_STRING) {
            String *text = (String*)ParserElem_As(elem, STRING);
            LeafQuery *query = LeafQuery_new(field, text);
            ParserElem *new_elem
                = ParserElem_new(TOKEN_QUERY, (Obj*)query);
            if (default_occur == MUST) {
                ParserElem_Require(new_elem);
            }
            VA_Store(elems, i, (Obj*)new_elem);
        }
    }
}

static void
S_apply_plusses_and_negations(QueryParser *self, VArray *elems) {
    UNUSED_VAR(self);
    for (uint32_t i = VA_Get_Size(elems); i--;) {
        ParserElem *elem = (ParserElem*)VA_Fetch(elems, i);
        if (ParserElem_Get_Type(elem) == TOKEN_QUERY) {
            for (uint32_t j = i; j--;) {
                ParserElem *prev = (ParserElem*)VA_Fetch(elems, j);
                uint32_t prev_type = ParserElem_Get_Type(prev);
                if (prev_type == TOKEN_MINUS || prev_type == TOKEN_NOT) {
                    ParserElem_Negate(elem);
                }
                else if (prev_type == TOKEN_PLUS) {
                    ParserElem_Require(elem);
                }
                else {
                    break;
                }
            }
        }
    }
}

static void
S_compose_not_queries(QueryParser *self, VArray *elems) {
    for (uint32_t i = 0, max = VA_Get_Size(elems); i < max; i++) {
        ParserElem *elem = (ParserElem*)VA_Fetch(elems, i);
        if (ParserElem_Get_Type(elem) == TOKEN_QUERY
            && ParserElem_Negated(elem)
           ) {
            Query *inner_query = (Query*)ParserElem_As(elem, QUERY);
            Query *not_query = QParser_Make_NOT_Query(self, inner_query);
            ParserElem_Set_Value(elem, (Obj*)not_query);
            DECREF(not_query);
        }
    }
}

static void
S_winnow_boolops(QueryParser *self, VArray *elems) {
    UNUSED_VAR(self);
    for (uint32_t i = 0; i < VA_Get_Size(elems); i++) {
        ParserElem *elem = (ParserElem*)VA_Fetch(elems, i);
        if (ParserElem_Get_Type(elem) != TOKEN_QUERY) {
            uint32_t num_to_zap = 0;
            ParserElem *preceding = (ParserElem*)VA_Fetch(elems, i - 1);
            ParserElem *following = (ParserElem*)VA_Fetch(elems, i + 1);
            if (!preceding || ParserElem_Get_Type(preceding) != TOKEN_QUERY) {
                num_to_zap = 1;
            }
            if (!following || ParserElem_Get_Type(following) != TOKEN_QUERY) {
                num_to_zap = 1;
            }
            for (uint32_t j = i + 1, jmax = VA_Get_Size(elems); j < jmax; j++) {
                ParserElem *maybe = (ParserElem*)VA_Fetch(elems, j);
                if (ParserElem_Get_Type(maybe) == TOKEN_QUERY) { break; }
                else { num_to_zap++; }
            }
            if (num_to_zap) { VA_Excise(elems, i, num_to_zap); }
        }
    }
}

// Apply AND.
static void
S_compose_and_queries(QueryParser *self, VArray *elems) {
    const int32_t default_occur = QParser_IVARS(self)->default_occur;

    for (uint32_t i = 0; i + 2 < VA_Get_Size(elems); i++) {
        ParserElem *elem = (ParserElem*)VA_Fetch(elems, i + 1);
        if (ParserElem_Get_Type(elem) == TOKEN_AND) {
            ParserElem   *preceding  = (ParserElem*)VA_Fetch(elems, i);
            VArray       *children   = VA_new(2);
            uint32_t      num_to_zap = 0;

            // Add first clause.
            Query *preceding_query = (Query*)ParserElem_As(preceding, QUERY);
            VA_Push(children, INCREF(preceding_query));

            // Add following clauses.
            for (uint32_t j = i + 1, jmax = VA_Get_Size(elems);
                 j < jmax;
                 j += 2, num_to_zap += 2
                ) {
                ParserElem *maybe_and = (ParserElem*)VA_Fetch(elems, j);
                ParserElem *following = (ParserElem*)VA_Fetch(elems, j + 1);
                if (ParserElem_Get_Type(maybe_and) != TOKEN_AND) {
                    break;
                }
                else if (ParserElem_Get_Type(following) == TOKEN_QUERY) {
                    Query *next = (Query*)ParserElem_As(following, QUERY);
                    VA_Push(children, INCREF(next));
                }
                else {
                    THROW(ERR, "Unexpected type: %u32",
                          ParserElem_Get_Type(following));
                }
            }
            Query *and_query = QParser_Make_AND_Query(self, children);
            ParserElem_Set_Value(preceding, (Obj*)and_query);
            if (default_occur == MUST) {
                ParserElem_Require(preceding);
            }
            DECREF(and_query);
            DECREF(children);

            VA_Excise(elems, i + 1, num_to_zap);
        }
    }
}

static void
S_compose_or_queries(QueryParser *self, VArray *elems) {
    const int32_t default_occur = QParser_IVARS(self)->default_occur;

    for (uint32_t i = 0; i + 2 < VA_Get_Size(elems); i++) {
        ParserElem *elem = (ParserElem*)VA_Fetch(elems, i + 1);
        if (ParserElem_Get_Type(elem) == TOKEN_OR) {
            ParserElem   *preceding  = (ParserElem*)VA_Fetch(elems, i);
            VArray       *children   = VA_new(2);
            uint32_t      num_to_zap = 0;

            // Add first clause.
            Query *preceding_query = (Query*)ParserElem_As(preceding, QUERY);
            VA_Push(children, INCREF(preceding_query));

            // Add following clauses.
            for (uint32_t j = i + 1, jmax = VA_Get_Size(elems);
                 j < jmax;
                 j += 2, num_to_zap += 2
                ) {
                ParserElem *maybe_or  = (ParserElem*)VA_Fetch(elems, j);
                ParserElem *following = (ParserElem*)VA_Fetch(elems, j + 1);
                if (ParserElem_Get_Type(maybe_or) != TOKEN_OR) {
                    break;
                }
                else if (ParserElem_Get_Type(following) == TOKEN_QUERY) {
                    Query *next = (Query*)ParserElem_As(following, QUERY);
                    VA_Push(children, INCREF(next));
                }
                else {
                    THROW(ERR, "Unexpected type: %u32",
                          ParserElem_Get_Type(following));
                }
            }
            Query *or_query = QParser_Make_OR_Query(self, children);
            ParserElem_Set_Value(preceding, (Obj*)or_query);
            if (default_occur == MUST) {
                ParserElem_Require(preceding);
            }
            DECREF(or_query);
            DECREF(children);

            VA_Excise(elems, i + 1, num_to_zap);
        }
    }
}

static Query*
S_compose_subquery(QueryParser *self, VArray *elems, bool enclosed) {
    const int32_t default_occur = QParser_IVARS(self)->default_occur;
    Query *retval;

    if (VA_Get_Size(elems) == 0) {
        // No elems means no query. Maybe the search string was something
        // like 'NOT AND'
        if (enclosed) {
            retval = default_occur == SHOULD
                     ? QParser_Make_OR_Query(self, NULL)
                     : QParser_Make_AND_Query(self, NULL);
        }
        else {
            retval = (Query*)NoMatchQuery_new();
        }
    }
    else if (VA_Get_Size(elems) == 1 && !enclosed) {
        ParserElem *elem = (ParserElem*)VA_Fetch(elems, 0);
        Query *query = (Query*)ParserElem_As(elem, QUERY);
        retval = (Query*)INCREF(query);
    }
    else {
        uint32_t  num_elems = VA_Get_Size(elems);
        VArray   *required  = VA_new(num_elems);
        VArray   *optional  = VA_new(num_elems);
        VArray   *negated   = VA_new(num_elems);
        Query    *req_query = NULL;
        Query    *opt_query = NULL;

        // Demux elems into bins.
        for (uint32_t i = 0; i < num_elems; i++) {
            ParserElem *elem = (ParserElem*)VA_Fetch(elems, i);
            if (ParserElem_Required(elem)) {
                VA_Push(required, INCREF(ParserElem_As(elem, QUERY)));
            }
            else if (ParserElem_Optional(elem)) {
                VA_Push(optional, INCREF(ParserElem_As(elem, QUERY)));
            }
            else if (ParserElem_Negated(elem)) {
                VA_Push(negated, INCREF(ParserElem_As(elem, QUERY)));
            }
        }
        uint32_t num_required = VA_Get_Size(required);
        uint32_t num_negated  = VA_Get_Size(negated);
        uint32_t num_optional = VA_Get_Size(optional);

        // Bind all mandatory matchers together in one Query.
        if (num_required || num_negated) {
            if (enclosed || num_required + num_negated > 1) {
                VArray *children = VA_Shallow_Copy(required);
                VA_Push_VArray(children, negated);
                req_query = QParser_Make_AND_Query(self, children);
                DECREF(children);
            }
            else if (num_required) {
                req_query = (Query*)INCREF(VA_Fetch(required, 0));
            }
            else if (num_negated) {
                req_query = (Query*)INCREF(VA_Fetch(negated, 0));
            }
        }

        // Bind all optional matchers together in one Query.
        if (num_optional) {
            if (!enclosed && num_optional == 1) {
                opt_query = (Query*)INCREF(VA_Fetch(optional, 0));
            }
            else {
                opt_query = QParser_Make_OR_Query(self, optional);
            }
        }

        // Unify required and optional.
        if (req_query && opt_query) {
            if (num_required) { // not just negated elems
                retval = QParser_Make_Req_Opt_Query(self, req_query,
                                                    opt_query);
            }
            else {
                // req_query has only negated queries.
                VArray *children = VA_new(2);
                VA_Push(children, INCREF(req_query));
                VA_Push(children, INCREF(opt_query));
                retval = QParser_Make_AND_Query(self, children);
                DECREF(children);
            }
        }
        else if (opt_query) {
            // Only optional elems.
            retval = (Query*)INCREF(opt_query);
        }
        else if (req_query) {
            // Only required elems.
            retval = (Query*)INCREF(req_query);
        }
        else {
            retval = NULL; // kill "uninitialized" compiler warning
            THROW(ERR, "Unexpected error");
        }

        DECREF(opt_query);
        DECREF(req_query);
        DECREF(negated);
        DECREF(optional);
        DECREF(required);
    }

    return retval;
}

static bool
S_has_valid_clauses(Query *query) {
    if (Query_Is_A(query, NOTQUERY)) {
        return false;
    }
    else if (Query_Is_A(query, MATCHALLQUERY)) {
        return false;
    }
    else if (Query_Is_A(query, ORQUERY) || Query_Is_A(query, ANDQUERY)) {
        PolyQuery *polyquery = (PolyQuery*)query;
        VArray    *children  = PolyQuery_Get_Children(polyquery);
        for (uint32_t i = 0, max = VA_Get_Size(children); i < max; i++) {
            Query *child = (Query*)VA_Fetch(children, i);
            if (S_has_valid_clauses(child)) {
                return true;
            }
        }
        return false;
    }
    return true;
}

static void
S_do_prune(QueryParser *self, Query *query) {
    if (Query_Is_A(query, NOTQUERY)) {
        // Don't allow double negatives.
        NOTQuery *not_query = (NOTQuery*)query;
        Query *neg_query = NOTQuery_Get_Negated_Query(not_query);
        if (!Query_Is_A(neg_query, MATCHALLQUERY)
            && !S_has_valid_clauses(neg_query)
           ) {
            MatchAllQuery *matchall = MatchAllQuery_new();
            NOTQuery_Set_Negated_Query(not_query, (Query*)matchall);
            DECREF(matchall);
        }
    }
    else if (Query_Is_A(query, POLYQUERY)) {
        PolyQuery *polyquery = (PolyQuery*)query;
        VArray    *children  = PolyQuery_Get_Children(polyquery);

        // Recurse.
        for (uint32_t i = 0, max = VA_Get_Size(children); i < max; i++) {
            Query *child = (Query*)VA_Fetch(children, i);
            S_do_prune(self, child);
        }

        if (PolyQuery_Is_A(polyquery, REQUIREDOPTIONALQUERY)
            || PolyQuery_Is_A(polyquery, ORQUERY)
           ) {
            // Don't allow 'foo OR (-bar)'.
            VArray *children = PolyQuery_Get_Children(polyquery);
            for (uint32_t i = 0, max = VA_Get_Size(children); i < max; i++) {
                Query *child = (Query*)VA_Fetch(children, i);
                if (!S_has_valid_clauses(child)) {
                    VA_Store(children, i, (Obj*)NoMatchQuery_new());
                }
            }
        }
        else if (PolyQuery_Is_A(polyquery, ANDQUERY)) {
            // Don't allow '(-bar AND -baz)'.
            if (!S_has_valid_clauses((Query*)polyquery)) {
                VArray *children = PolyQuery_Get_Children(polyquery);
                VA_Clear(children);
            }
        }
    }
}

Query*
QParser_Prune_IMP(QueryParser *self, Query *query) {
    if (!query
        || Query_Is_A(query, NOTQUERY)
        || Query_Is_A(query, MATCHALLQUERY)
       ) {
        return (Query*)NoMatchQuery_new();
    }
    else if (Query_Is_A(query, POLYQUERY)) {
        S_do_prune(self, query);
    }
    return (Query*)INCREF(query);
}

Query*
QParser_Expand_IMP(QueryParser *self, Query *query) {
    Query *retval = NULL;

    if (Query_Is_A(query, LEAFQUERY)) {
        retval = QParser_Expand_Leaf(self, query);
    }
    else if (Query_Is_A(query, ORQUERY) || Query_Is_A(query, ANDQUERY)) {
        PolyQuery *polyquery = (PolyQuery*)query;
        VArray *children = PolyQuery_Get_Children(polyquery);
        VArray *new_kids = VA_new(VA_Get_Size(children));

        for (uint32_t i = 0, max = VA_Get_Size(children); i < max; i++) {
            Query *child = (Query*)VA_Fetch(children, i);
            Query *new_child = QParser_Expand(self, child); // recurse
            if (new_child) {
                if (Query_Is_A(new_child, NOMATCHQUERY)) {
                    bool fails = NoMatchQuery_Get_Fails_To_Match(
                                       (NoMatchQuery*)new_child);
                    if (fails) {
                        VA_Push(new_kids, (Obj*)new_child);
                    }
                    else {
                        DECREF(new_child);
                    }
                }
                else {
                    VA_Push(new_kids, (Obj*)new_child);
                }
            }
        }

        if (VA_Get_Size(new_kids) == 0) {
            retval = (Query*)NoMatchQuery_new();
        }
        else if (VA_Get_Size(new_kids) == 1) {
            retval = (Query*)INCREF(VA_Fetch(new_kids, 0));
        }
        else {
            PolyQuery_Set_Children(polyquery, new_kids);
            retval = (Query*)INCREF(query);
        }

        DECREF(new_kids);
    }
    else if (Query_Is_A(query, NOTQUERY)) {
        NOTQuery *not_query     = (NOTQuery*)query;
        Query    *negated_query = NOTQuery_Get_Negated_Query(not_query);
        negated_query = QParser_Expand(self, negated_query);
        if (negated_query) {
            NOTQuery_Set_Negated_Query(not_query, negated_query);
            DECREF(negated_query);
            retval = (Query*)INCREF(query);
        }
        else {
            retval = (Query*)MatchAllQuery_new();
        }
    }
    else if (Query_Is_A(query, REQUIREDOPTIONALQUERY)) {
        RequiredOptionalQuery *req_opt_query = (RequiredOptionalQuery*)query;
        Query *req_query = ReqOptQuery_Get_Required_Query(req_opt_query);
        Query *opt_query = ReqOptQuery_Get_Optional_Query(req_opt_query);

        req_query = QParser_Expand(self, req_query);
        opt_query = QParser_Expand(self, opt_query);

        if (req_query && opt_query) {
            ReqOptQuery_Set_Required_Query(req_opt_query, req_query);
            ReqOptQuery_Set_Optional_Query(req_opt_query, opt_query);
            retval = (Query*)INCREF(query);
        }
        else if (req_query) { retval = (Query*)INCREF(req_query); }
        else if (opt_query) { retval = (Query*)INCREF(opt_query); }
        else { retval = (Query*)NoMatchQuery_new(); }

        DECREF(opt_query);
        DECREF(req_query);
    }
    else {
        retval = (Query*)INCREF(query);
    }

    return retval;
}

static String*
S_unescape(QueryParser *self, String *orig, CharBuf *buf) {
    StringIterator *iter = Str_Top(orig);
    int32_t code_point;
    UNUSED_VAR(self);

    CB_Set_Size(buf, 0);
    CB_Grow(buf, Str_Get_Size(orig) + 4);

    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point == '\\') {
            int32_t next_code_point = StrIter_Next(iter);
            if (next_code_point == ':'
                || next_code_point == '"'
                || next_code_point == '\\'
               ) {
                CB_Cat_Char(buf, next_code_point);
            }
            else {
                CB_Cat_Char(buf, code_point);
                if (next_code_point != STRITER_DONE) {
                    CB_Cat_Char(buf, next_code_point);
                }
            }
        }
        else {
            CB_Cat_Char(buf, code_point);
        }
    }

    DECREF(iter);
    return CB_To_String(buf);
}

Query*
QParser_Expand_Leaf_IMP(QueryParser *self, Query *query) {
    QueryParserIVARS *const ivars = QParser_IVARS(self);
    LeafQuery *leaf_query = (LeafQuery*)query;
    Schema    *schema     = ivars->schema;
    bool       is_phrase  = false;
    bool       ambiguous  = false;

    // Determine whether we can actually process the input.
    if (!Query_Is_A(query, LEAFQUERY)) { return NULL; }
    String *full_text = LeafQuery_Get_Text(leaf_query);
    if (!Str_Get_Size(full_text)) { return NULL; }

    // If quoted, always generate PhraseQuery.
    StringIterator *top  = Str_Top(full_text);
    StringIterator *tail = Str_Tail(full_text);
    StrIter_Skip_Next_Whitespace(top);
    StrIter_Skip_Prev_Whitespace(tail);
    if (StrIter_Starts_With_Utf8(top, "\"", 1)) {
        is_phrase = true;
        StrIter_Advance(top, 1);
        if (StrIter_Ends_With_Utf8(tail, "\"", 1)
            && !StrIter_Ends_With_Utf8(tail, "\\\"", 2)
        ) {
            StrIter_Recede(tail, 1);
        }
    }
    String *source_text = StrIter_substring(top, tail);

    // Either use LeafQuery's field or default to Parser's list.
    VArray *fields;
    if (LeafQuery_Get_Field(leaf_query)) {
        fields = VA_new(1);
        VA_Push(fields, INCREF(LeafQuery_Get_Field(leaf_query)));
    }
    else {
        fields = (VArray*)INCREF(ivars->fields);
    }

    CharBuf *unescape_buf = CB_new(Str_Get_Size(source_text));
    VArray  *queries      = VA_new(VA_Get_Size(fields));
    for (uint32_t i = 0, max = VA_Get_Size(fields); i < max; i++) {
        String   *field    = (String*)VA_Fetch(fields, i);
        Analyzer *analyzer = ivars->analyzer
                             ? ivars->analyzer
                             : Schema_Fetch_Analyzer(schema, field);

        if (!analyzer) {
            VA_Push(queries,
                    (Obj*)QParser_Make_Term_Query(self, field,
                                                  (Obj*)source_text));
        }
        else {
            // Extract token texts.
            String *split_source = S_unescape(self, source_text, unescape_buf);
            VArray *maybe_texts = Analyzer_Split(analyzer, split_source);
            uint32_t num_maybe_texts = VA_Get_Size(maybe_texts);
            VArray *token_texts = VA_new(num_maybe_texts);

            // Filter out zero-length token texts.
            for (uint32_t j = 0; j < num_maybe_texts; j++) {
                String *token_text = (String*)VA_Fetch(maybe_texts, j);
                if (Str_Get_Size(token_text)) {
                    VA_Push(token_texts, INCREF(token_text));
                }
            }

            if (VA_Get_Size(token_texts) == 0) {
                /* Query might include stop words.  Who knows? */
                ambiguous = true;
            }

            // Add either a TermQuery or a PhraseQuery.
            if (is_phrase || VA_Get_Size(token_texts) > 1) {
                VA_Push(queries, (Obj*)
                        QParser_Make_Phrase_Query(self, field, token_texts));
            }
            else if (VA_Get_Size(token_texts) == 1) {
                VA_Push(queries,
                        (Obj*)QParser_Make_Term_Query(self, field, VA_Fetch(token_texts, 0)));
            }

            DECREF(token_texts);
            DECREF(maybe_texts);
            DECREF(split_source);
        }
    }

    Query *retval;
    if (VA_Get_Size(queries) == 0) {
        retval = (Query*)NoMatchQuery_new();
        if (ambiguous) {
            NoMatchQuery_Set_Fails_To_Match((NoMatchQuery*)retval, false);
        }
    }
    else if (VA_Get_Size(queries) == 1) {
        retval = (Query*)INCREF(VA_Fetch(queries, 0));
    }
    else {
        retval = QParser_Make_OR_Query(self, queries);
    }

    // Clean up.
    DECREF(unescape_buf);
    DECREF(queries);
    DECREF(fields);
    DECREF(source_text);
    DECREF(tail);
    DECREF(top);

    return retval;
}

Query*
QParser_Make_Term_Query_IMP(QueryParser *self, String *field,
                            Obj *term) {
    UNUSED_VAR(self);
    return (Query*)TermQuery_new(field, term);
}

Query*
QParser_Make_Phrase_Query_IMP(QueryParser *self, String *field,
                              VArray *terms) {
    UNUSED_VAR(self);
    return (Query*)PhraseQuery_new(field, terms);
}

Query*
QParser_Make_OR_Query_IMP(QueryParser *self, VArray *children) {
    UNUSED_VAR(self);
    return (Query*)ORQuery_new(children);
}

Query*
QParser_Make_AND_Query_IMP(QueryParser *self, VArray *children) {
    UNUSED_VAR(self);
    return (Query*)ANDQuery_new(children);
}

Query*
QParser_Make_NOT_Query_IMP(QueryParser *self, Query *negated_query) {
    UNUSED_VAR(self);
    return (Query*)NOTQuery_new(negated_query);
}

Query*
QParser_Make_Req_Opt_Query_IMP(QueryParser *self, Query *required_query,
                               Query *optional_query) {
    UNUSED_VAR(self);
    return (Query*)ReqOptQuery_new(required_query, optional_query);
}