The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

parcel Lucy;

/**
 * Growable buffer holding Unicode characters.
 */

class Lucy::Object::CharBuf cnick CB
    inherits Lucy::Object::Obj {

    char    *ptr;
    size_t   size;
    size_t   cap;  /* allocated bytes, including terminating null */

    inert incremented CharBuf*
    new(size_t size);

    inert CharBuf*
    init(CharBuf *self, size_t size);

    /** Return a new CharBuf which holds a copy of the passed-in string.
     * Check for UTF-8 validity.
     */
    inert incremented CharBuf*
    new_from_utf8(const char *utf8, size_t size);

    /** Return a new CharBuf which holds a copy of the passed-in string.  No
     * validity checking is performed.
     */
    inert incremented CharBuf*
    new_from_trusted_utf8(const char *utf8, size_t size);

    /** Return a pointer to a new CharBuf which assumes ownership of the
     * passed-in string.  Check validity of supplied UTF-8.
     */
    inert incremented CharBuf*
    new_steal_str(char *ptr, size_t size, size_t cap);

    /** Return a pointer to a new CharBuf which assumes ownership of the
     * passed-in string.  Do not check validity of supplied UTF-8.
     */
    inert incremented CharBuf*
    new_steal_from_trusted_str(char *ptr, size_t size, size_t cap);

    /** Return a pointer to a new CharBuf which contains formatted data
     * expanded according to CB_VCatF.
     *
     * Note: a user-supplied <code>pattern</code> string is a security hole
     * and must not be allowed.
     */
    inert incremented CharBuf*
    newf(const char *pattern, ...);

    /** Perform lexical comparison of two CharBufs, with level of indirection
     * set to please qsort and friends.
     */
    inert int
    compare(const void *va, const void *vb);

    /** Perform lexical comparison of two CharBufs, with level of indirection
     * set to please qsort and friends, and return true if <code>a</code> is
     * less than <code>b</code>.
     */
    inert bool_t
    less_than(const void *va, const void *vb);

    public void
    Mimic(CharBuf *self, Obj *other);

    void
    Mimic_Str(CharBuf *self, const char *ptr, size_t size);

    /** Concatenate the passed-in string onto the end of the CharBuf.
     */
    void
    Cat_Str(CharBuf *self, const char *ptr, size_t size);

    /** Concatenate the contents of <code>other</code> onto the end of the
     * caller.
     */
    void
    Cat(CharBuf *self, const CharBuf *other);

    /** Concatenate formatted arguments.  Similar to the printf family, but
     * only accepts minimal options (just enough for decent error messages).
     *
     * Objects:  %o
     * char*:    %s
     * integers: %i8 %i32 %i64 %u8 %u32 %u64
     * floats:   %f64
     * hex:      %x32
     *
     * Note that all Clownfish Objects, including CharBufs, are printed via
     * %o (which invokes Obj_To_String()).
     */
    void
    VCatF(CharBuf *self, const char *pattern, va_list args);

    /** Invokes CB_VCatF to concatenate formatted arguments.  Note that this
     * is only a function and not a method.
     */
    inert void
    catf(CharBuf *self, const char *pattern, ...);

    /** Replaces the contents of the CharBuf using formatted arguments.
     */
    inert void
    setf(CharBuf *self, const char *pattern, ...);

    /** Concatenate one Unicode character onto the end of the CharBuf.
     */
    void
    Cat_Char(CharBuf *self, uint32_t code_point);

    /** Replace all instances of one character for the other.  For now, both
     * the source and replacement code points must be ASCII.
     */
    int32_t
    Swap_Chars(CharBuf *self, uint32_t match, uint32_t replacement);

    public int64_t
    To_I64(CharBuf *self);

    /** Extract a 64-bit integer from a variable-base stringified version.
     */
    int64_t
    BaseX_To_I64(CharBuf *self, uint32_t base);

    public double
    To_F64(CharBuf *self);

    /** Assign more memory to the CharBuf, if it doesn't already have enough
     * room to hold a string of <code>size</code> bytes.  Cannot shrink the
     * allocation.
     *
     * @return a pointer to the raw buffer.
     */
    char*
    Grow(CharBuf *self, size_t size);

    /** Test whether the CharBuf starts with the content of another.
     */
    bool_t
    Starts_With(CharBuf *self, const CharBuf *prefix);

    /** Test whether the CharBuf starts with the passed-in string.
     */
    bool_t
    Starts_With_Str(CharBuf *self, const char *prefix, size_t size);

    /** Test whether the CharBuf ends with the content of another.
     */
    bool_t
    Ends_With(CharBuf *self, const CharBuf *postfix);

    /** Test whether the CharBuf ends with the passed-in string.
     */
    bool_t
    Ends_With_Str(CharBuf *self, const char *postfix, size_t size);

    /** Return the location of the substring within the CharBuf (measured in
     * code points), or -1 if the substring does not match.
     */
    int64_t
    Find(CharBuf *self, const CharBuf *substring);

    int64_t
    Find_Str(CharBuf *self, const char *ptr, size_t size);

    /** Test whether the CharBuf matches the passed-in string.
     */
    bool_t
    Equals_Str(CharBuf *self, const char *ptr, size_t size);

    /** Return the number of Unicode code points in the object's string.
     */
    size_t
    Length(CharBuf *self);

    /** Set the CharBuf's <code>size</code> attribute.
     */
    void
    Set_Size(CharBuf *self, size_t size);

    /** Get the CharBuf's <code>size</code> attribute.
     */
    size_t
    Get_Size(CharBuf *self);

    /** Return the internal backing array for the CharBuf if its internal
     * encoding is UTF-8.  If it is not encoded as UTF-8 throw an exception.
     */
    uint8_t*
    Get_Ptr8(CharBuf *self);

    /** Return a fresh copy of the string data in a CharBuf with an internal
     * encoding of UTF-8.
     */
    CharBuf*
    To_CB8(CharBuf *self);

    public incremented CharBuf*
    Clone(CharBuf *self);

    public void
    Destroy(CharBuf *self);

    public bool_t
    Equals(CharBuf *self, Obj *other);

    public int32_t
    Compare_To(CharBuf *self, Obj *other);

    public int32_t
    Hash_Sum(CharBuf *self);

    public incremented CharBuf*
    To_String(CharBuf *self);

    public incremented CharBuf*
    Load(CharBuf *self, Obj *dump);

    public void
    Serialize(CharBuf *self, OutStream *outstream);

    public incremented CharBuf*
    Deserialize(CharBuf *self, InStream *instream);

    /** Remove Unicode whitespace characters from both top and tail.
     */
    uint32_t
    Trim(CharBuf *self);

    /** Remove leading Unicode whitespace.
     */
    uint32_t
    Trim_Top(CharBuf *self);

    /** Remove trailing Unicode whitespace.
     */
    uint32_t
    Trim_Tail(CharBuf *self);

    /** Remove characters (measured in code points) from the top of the
     * CharBuf.  Returns the number nipped.
     */
    size_t
    Nip(CharBuf *self, size_t count);

    /** Remove one character from the top of the CharBuf.  Returns the code
     * point, or 0 if the string was empty.
     */
    int32_t
    Nip_One(CharBuf *self);

    /** Remove characters (measured in code points) from the end of the
     * CharBuf.  Returns the number chopped.
     */
    size_t
    Chop(CharBuf *self, size_t count);

    /** Truncate the CharBuf so that it contains no more than
     * <code>count</code>characters.
     *
     * @param count Maximum new length, in Unicode code points.
     * @return The number of code points left in the string after truncation.
     */
    size_t
    Truncate(CharBuf *self, size_t count);

    /** Return the Unicode code point at the specified number of code points
     * in.  Return 0 if the string length is exceeded.  (XXX It would be
     * better to throw an exception, but that's not practical with UTF-8 and
     * no cached length.)
     */
    uint32_t
    Code_Point_At(CharBuf *self, size_t tick);

    /** Return the Unicode code point at the specified number of code points
     * counted backwards from the end of the string.  Return 0 if outside the
     * string.
     */
    uint32_t
    Code_Point_From(CharBuf *self, size_t tick);

    /** Return a newly allocated CharBuf containing a copy of the indicated
     * substring.
     * @param offset Offset from the top, in code points.
     * @param len The desired length of the substring, in code points.
     */
    incremented CharBuf*
    SubString(CharBuf *self, size_t offset, size_t len);

    /** Concatenate the supplied text onto the end of the CharBuf.  Don't
     * check for UTF-8 validity.
     */
    void
    Cat_Trusted_Str(CharBuf *self, const char *ptr, size_t size);
}

class Lucy::Object::ViewCharBuf cnick ViewCB
    inherits Lucy::Object::CharBuf {

    inert incremented ViewCharBuf*
    new_from_utf8(const char *utf8, size_t size);

    inert incremented ViewCharBuf*
    new_from_trusted_utf8(const char *utf8, size_t size);

    inert ViewCharBuf*
    init(ViewCharBuf *self, const char *utf8, size_t size);

    void
    Assign(ViewCharBuf *self, const CharBuf *other);

    void
    Assign_Str(ViewCharBuf *self, const char *utf8, size_t size);

    void
    Assign_Trusted_Str(ViewCharBuf *self, const char *utf8, size_t size);

    uint32_t
    Trim_Top(ViewCharBuf *self);

    size_t
    Nip(ViewCharBuf *self, size_t count);

    int32_t
    Nip_One(ViewCharBuf *self);

    /** Throws an error. */
    char*
    Grow(ViewCharBuf *self, size_t size);

    public void
    Destroy(ViewCharBuf *self);
}

class Lucy::Object::ZombieCharBuf cnick ZCB
    inherits Lucy::Object::ViewCharBuf {

    /** Return a ZombieCharBuf with a blank string.
     */
    inert incremented ZombieCharBuf*
    new(void *allocation);

    /**
     * @param allocation A single block of memory which will be used for both
     * the ZombieCharBuf object and its buffer.
     * @param alloc_size The size of the allocation.
     * @param pattern A format pattern.
     */
    inert incremented ZombieCharBuf*
    newf(void *allocation, size_t alloc_size, const char *pattern, ...);

    inert incremented ZombieCharBuf*
    wrap(void *allocation, const CharBuf *source);

    inert incremented ZombieCharBuf*
    wrap_str(void *allocation, const char *ptr, size_t size);

    /** Return the size for a ZombieCharBuf struct.
     */
    inert size_t
    size();

    /** Throws an error.
     */
    public void
    Destroy(ZombieCharBuf *self);
}

__C__

#define CFISH_ZCB_BLANK() lucy_ZCB_new(alloca(lucy_ZCB_size()))

#define CFISH_ZCB_LITERAL(string) \
    { LUCY_ZOMBIECHARBUF, {1}, string "", sizeof(string) -1, sizeof(string) }

#define CFISH_ZCB_WRAP(source) \
    lucy_ZCB_wrap(alloca(lucy_ZCB_size()), source)

#define CFISH_ZCB_WRAP_STR(ptr, size) \
    lucy_ZCB_wrap_str(alloca(lucy_ZCB_size()), ptr, size)

extern lucy_ZombieCharBuf CFISH_ZCB_EMPTY;

#ifdef LUCY_USE_SHORT_NAMES
  #define ZCB_BLANK             CFISH_ZCB_BLANK
  #define ZCB_LITERAL(_string)  CFISH_ZCB_LITERAL(_string)
  #define EMPTY                 CFISH_ZCB_EMPTY
  #define ZCB_WRAP              CFISH_ZCB_WRAP
  #define ZCB_WRAP_STR          CFISH_ZCB_WRAP_STR
#endif
__END_C__