The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

parcel Lucy;

/** Provide various number-related utilies.
 *
 * Provide utilities for dealing with endian issues, sub-byte-width arrays,
 * compressed integers, and so on.
 */
inert class Lucy::Util::NumberUtils nickname NumUtil {

    /** Encode an unsigned 16-bit integer as 2 bytes in the buffer provided,
     * using big-endian byte order.
     */
    inert inline void
    encode_bigend_u16(uint16_t value, void *dest);

    /** Encode an unsigned 32-bit integer as 4 bytes in the buffer provided,
     * using big-endian byte order.
     */
    inert inline void
    encode_bigend_u32(uint32_t value, void *dest);

    /** Encode an unsigned 64-bit integer as 8 bytes in the buffer provided,
     * using big-endian byte order.
     */
    inert inline void
    encode_bigend_u64(uint64_t value, void *dest);

    /** Interpret a sequence of bytes as a big-endian unsigned 16-bit int.
     */
    inert inline uint16_t
    decode_bigend_u16(const void *source);

    /** Interpret a sequence of bytes as a big-endian unsigned 32-bit int.
     */
    inert inline uint32_t
    decode_bigend_u32(const void *source);

    /** Interpret a sequence of bytes as a big-endian unsigned 64-bit int.
     */
    inert inline uint64_t
    decode_bigend_u64(const void *source);

    /** Encode a 32-bit floating point number as 4 bytes in the buffer
     * provided, using big-endian byte order.
     */
    inert inline void
    encode_bigend_f32(float value, void *dest);

    /** Encode a 64-bit floating point number as 8 bytes in the buffer
     * provided, using big-endian byte order.
     */
    inert inline void
    encode_bigend_f64(double value, void *dest);

    /** Interpret a sequence of bytes as a 32-bit float stored in big-endian
     * byte order.
     */
    inert inline float
    decode_bigend_f32(const void *source);

    /** Interpret a sequence of bytes as a 64-bit float stored in big-endian
     * byte order.
     */
    inert inline double
    decode_bigend_f64(const void *source);

    /** Encode a compressed 32-bit signed integer at the space pointed to by
     * `dest`. As a side effect, `dest` will be advanced to immediately after
     * the end of the compressed data.
     */
    inert inline void
    encode_ci32(int32_t value, char **dest);

    /** Encode a compressed 32-bit unsigned integer at the space pointed to by
     * `dest`. As a side effect, `dest` will be advanced to immediately after
     * the end of the compressed data.
     */
    inert inline void
    encode_cu32(uint32_t value, char **dest);

    /** Encode a compressed 32-bit unsigned integer at the space pointed to by
     * `dest`, but add "leading zeroes" so that the space consumed will always
     * be 5 bytes.  As a side effect, `dest` will be advanced to immediately
     * after the end of the compressed data.
     */
    inert inline void
    encode_padded_cu32(uint32_t value, char **dest);

    /** Encode a compressed 64-bit signed integer at the space pointed to by
     * `dest`. As a side effect, `dest` will be advanced to immediately after
     * the end of the compressed data.
     */
    inert inline void
    encode_ci64(int64_t value, char **dest);

    /** Encode a compressed 64-bit unsigned integer at the space pointed to by
     * `dest`. As a side effect, `dest` will be advanced to immediately after
     * the end of the compressed data.
     */
    inert inline void
    encode_cu64(uint64_t value, char **dest);

    /** Read a compressed 32-bit signed integer from the buffer pointed to
     * by `source`.  As a side effect, advance the pointer, consuming the
     * bytes occupied by the compressed number.
     */
    inert inline int32_t
    decode_ci32(const char **source);

    /** Read a compressed 32-bit unsigned integer from the buffer pointed to
     * by `source`.  As a side effect, advance the pointer, consuming the
     * bytes occupied by the compressed number.
     */
    inert inline uint32_t
    decode_cu32(const char **source);

    /** Read a compressed 64-bit signed integer from the buffer pointed to
     * by `source`.  As a side effect, advance the pointer, consuming the
     * bytes occupied by the compressed number.
     */
    inert inline int64_t
    decode_ci64(const char **source);

    /** Read a compressed 64-bit unsigned integer from the buffer pointed to
     * by `source`.  As a side effect, advance the pointer, consuming the
     * bytes occupied by the compressed number.
     */
    inert inline uint64_t
    decode_cu64(const char **source);

    /** Advance `source` past one encoded C32 or C64.
     */
    inert inline void
    skip_cint(const char **source);

    /** Interpret `array` as an array of bits; return true if the
     * bit at `tick` is set, false otherwise.
     */
    inert inline bool
    u1get(const void *array, size_t tick);

    /** Interpret `array` as an array of bits; set the bit at
     * `tick`.
     */
    inert inline void
    u1set(void *array, size_t tick);

    /** Interpret `array` as an array of bits; clear the bit at
     * `tick`.
     */
    inert inline void
    u1clear(void *array, size_t tick);

    /** Interpret `array` as an array of bits; flip the bit at
     * `tick`.
     */
    inert inline void
    u1flip(void *array, size_t tick);

    /** Interpret `array` as an array of two-bit integers; return
     * the value at `tick`.
     */
    inert inline uint8_t
    u2get(const void *array, size_t tick);

    /** Interpret `array` as an array of two-bit integers; set the
     * element at `tick` to `value`.
     */
    inert inline void
    u2set(void *array, size_t tick, uint8_t value);

    /** Interpret `array` as an array of four-bit integers; return
     * the value at `tick`.
     */
    inert inline uint8_t
    u4get(const void *array, size_t tick);

    /** Interpret `array` as an array of four-bit integers; set the
     * element at `tick` to `value`.
     */
    inert inline void
    u4set(void *array, size_t tick, uint8_t value);
}

__C__

#include <string.h>

static CFISH_INLINE void
lucy_NumUtil_encode_bigend_u16(uint16_t value, void *dest_ptr) {
    uint8_t *dest = *(uint8_t**)dest_ptr;
#ifdef CFISH_BIG_END
    memcpy(dest, &value, sizeof(uint16_t));
#else /* little endian */
    uint8_t *source = (uint8_t*)&value;
    dest[0] = source[1];
    dest[1] = source[0];
#endif /* CFISH_BIG_END (and little endian) */
}

static CFISH_INLINE void
lucy_NumUtil_encode_bigend_u32(uint32_t value, void *dest_ptr) {
    uint8_t *dest = *(uint8_t**)dest_ptr;
#ifdef CFISH_BIG_END
    memcpy(dest, &value, sizeof(uint32_t));
#else /* little endian */
    uint8_t *source = (uint8_t*)&value;
    dest[0] = source[3];
    dest[1] = source[2];
    dest[2] = source[1];
    dest[3] = source[0];
#endif /* CFISH_BIG_END (and little endian) */
}

static CFISH_INLINE void
lucy_NumUtil_encode_bigend_u64(uint64_t value, void *dest_ptr) {
    uint8_t *dest = *(uint8_t**)dest_ptr;
#ifdef CFISH_BIG_END
    memcpy(dest, &value, sizeof(uint64_t));
#else /* little endian */
    uint8_t *source = (uint8_t*)&value;
    dest[0] = source[7];
    dest[1] = source[6];
    dest[2] = source[5];
    dest[3] = source[4];
    dest[4] = source[3];
    dest[5] = source[2];
    dest[6] = source[1];
    dest[7] = source[0];
#endif /* CFISH_BIG_END (and little endian) */
}

static CFISH_INLINE uint16_t
lucy_NumUtil_decode_bigend_u16(const void *source) {
    const uint8_t *const buf = (const uint8_t*)source;
    return  (uint16_t)((buf[0] << 8) | buf[1]);
}

static CFISH_INLINE uint32_t
lucy_NumUtil_decode_bigend_u32(const void *source) {
    const uint8_t *const buf = (const uint8_t*)source;
    return  ((uint32_t)buf[0]  << 24) |
            ((uint32_t)buf[1]  << 16) |
            ((uint32_t)buf[2]  << 8)  |
            ((uint32_t)buf[3]);
}

static CFISH_INLINE uint64_t
lucy_NumUtil_decode_bigend_u64(const void *source) {
    const uint8_t *const buf = (const uint8_t*)source;
    uint64_t high_bits = ((uint32_t)buf[0]  << 24) |
                         ((uint32_t)buf[1]  << 16) |
                         ((uint32_t)buf[2]  << 8)  |
                         ((uint32_t)buf[3]);
    uint32_t low_bits  = ((uint32_t)buf[4]  << 24) |
                         ((uint32_t)buf[5]  << 16) |
                         ((uint32_t)buf[6]  << 8)  |
                         ((uint32_t)buf[7]);
    uint64_t retval = high_bits << 32;
    retval |= low_bits;
    return retval;
}

static CFISH_INLINE void
lucy_NumUtil_encode_bigend_f32(float value, void *dest_ptr) {
    uint8_t *dest = *(uint8_t**)dest_ptr;
#ifdef CFISH_BIG_END
    memcpy(dest, &value, sizeof(float));
#else
    union { float f; uint32_t u32; } duo;
    duo.f = value;
    lucy_NumUtil_encode_bigend_u32(duo.u32, &dest);
#endif
}

static CFISH_INLINE void
lucy_NumUtil_encode_bigend_f64(double value, void *dest_ptr) {
    uint8_t *dest = *(uint8_t**)dest_ptr;
#ifdef CFISH_BIG_END
    memcpy(dest, &value, sizeof(double));
#else
    union { double d; uint64_t u64; } duo;
    duo.d = value;
    lucy_NumUtil_encode_bigend_u64(duo.u64, &dest);
#endif
}

static CFISH_INLINE float
lucy_NumUtil_decode_bigend_f32(const void *source) {
    union { float f; uint32_t u32; } duo;
    memcpy(&duo, source, sizeof(float));
#ifdef CFISH_LITTLE_END
    duo.u32 = lucy_NumUtil_decode_bigend_u32(&duo.u32);
#endif
    return duo.f;
}

static CFISH_INLINE double
lucy_NumUtil_decode_bigend_f64(const void *source) {
    union { double d; uint64_t u64; } duo;
    memcpy(&duo, source, sizeof(double));
#ifdef CFISH_LITTLE_END
    duo.u64 = lucy_NumUtil_decode_bigend_u64(&duo.u64);
#endif
    return duo.d;
}

#define LUCY_NUMUTIL_CI32_MAX_BYTES ((sizeof(int32_t) * 8 / 7) + 1)  /*  5 */
#define LUCY_NUMUTIL_CU32_MAX_BYTES ((sizeof(uint32_t) * 8 / 7) + 1) /*  5 */
#define LUCY_NUMUTIL_CI64_MAX_BYTES ((sizeof(int64_t) * 8 / 7) + 1)  /* 10 */
#define LUCY_NUMUTIL_CU64_MAX_BYTES ((sizeof(uint64_t) * 8 / 7) + 1) /* 10 */

static CFISH_INLINE void
lucy_NumUtil_encode_ci32(int32_t value, char **out_buf) {
    lucy_NumUtil_encode_cu32((uint32_t)value, out_buf);
}

static CFISH_INLINE void
lucy_NumUtil_encode_cu32(uint32_t value, char **out_buf) {
    uint8_t   buf[LUCY_NUMUTIL_CU32_MAX_BYTES];
    uint8_t  *const limit = buf + sizeof(buf);
    uint8_t  *ptr         = limit - 1;
    ptrdiff_t num_bytes;
    /* Write last byte first, which has no continue bit. */
    *ptr = value & 0x7f;
    value >>= 7;
    while (value) {
        /* Work backwards, writing bytes with continue bits set. */
        *--ptr = ((value & 0x7f) | 0x80);
        value >>= 7;
    }
    num_bytes = limit - ptr;
    memcpy(*out_buf, ptr, (size_t)num_bytes);
    *out_buf += num_bytes;
}

static CFISH_INLINE void
lucy_NumUtil_encode_ci64(int64_t value, char **out_buf) {
    lucy_NumUtil_encode_cu64((uint64_t)value, out_buf);
}

static CFISH_INLINE void
lucy_NumUtil_encode_cu64(uint64_t value, char **out_buf) {
    uint8_t   buf[LUCY_NUMUTIL_CU64_MAX_BYTES];
    uint8_t  *const limit = buf + sizeof(buf);
    uint8_t  *ptr         = limit - 1;
    ptrdiff_t num_bytes;
    /* Write last byte first, which has no continue bit. */
    *ptr = value & 0x7f;
    value >>= 7;
    while (value) {
        /* Work backwards, writing bytes with continue bits set. */
        *--ptr = ((value & 0x7f) | 0x80);
        value >>= 7;
    }
    num_bytes = limit - ptr;
    memcpy(*out_buf, ptr, (size_t)num_bytes);
    *out_buf += num_bytes;
}

static CFISH_INLINE void
lucy_NumUtil_encode_padded_cu32(uint32_t value, char **out_buf) {
    uint8_t buf[LUCY_NUMUTIL_CU32_MAX_BYTES]
        = { 0x80, 0x80, 0x80, 0x80, 0x80 };
    uint8_t *const limit = buf + sizeof(buf);
    uint8_t *ptr         = limit - 1;
    /* Write last byte first, which has no continue bit. */
    *ptr = value & 0x7f;
    value >>= 7;
    while (value) {
        /* Work backwards, writing bytes with continue bits set. */
        *--ptr = ((value & 0x7f) | 0x80);
        value >>= 7;
    }
    memcpy(*out_buf, buf, LUCY_NUMUTIL_CU32_MAX_BYTES);
    *out_buf += sizeof(buf);
}

/* Decode a compressed integer up to size of 'var', advancing 'source' */
#define LUCY_NUMUTIL_DECODE_CINT(var, source) \
    do { \
        var = (*source & 0x7f); \
        while (*source++ & 0x80) { \
            var = (*source & 0x7f) | (var << 7); \
        }  \
    } while (0)

static CFISH_INLINE int32_t
lucy_NumUtil_decode_ci32(const char **source_ptr) {
    return (int32_t)lucy_NumUtil_decode_cu32(source_ptr);
}

static CFISH_INLINE uint32_t
lucy_NumUtil_decode_cu32(const char **source_ptr) {
    const char *source = *source_ptr;
    uint32_t decoded;
    LUCY_NUMUTIL_DECODE_CINT(decoded, source);
    *source_ptr = source;
    return decoded;
}

static CFISH_INLINE int64_t
lucy_NumUtil_decode_ci64(const char **source_ptr) {
    return (int64_t)lucy_NumUtil_decode_cu64(source_ptr);
}

static CFISH_INLINE uint64_t
lucy_NumUtil_decode_cu64(const char **source_ptr) {
    const char *source = *source_ptr;
    uint64_t decoded;
    LUCY_NUMUTIL_DECODE_CINT(decoded, source);
    *source_ptr = source;
    return decoded;
}

static CFISH_INLINE void
lucy_NumUtil_skip_cint(const char **source_ptr) {
    const uint8_t *ptr = *(const uint8_t**)source_ptr;
    while ((*ptr++ & 0x80) != 0) { }
    *source_ptr = (const char*)ptr;
}

static CFISH_INLINE bool
lucy_NumUtil_u1get(const void *array, size_t tick) {
    uint8_t *const u8bits      = (uint8_t*)array;
    const size_t   byte_offset = tick >> 3;
    const unsigned mask        = 1 << (tick & 0x7);
    return !((u8bits[byte_offset] & mask) == 0);
}

static CFISH_INLINE void
lucy_NumUtil_u1set(void *array, size_t tick) {
    uint8_t *const u8bits      = (uint8_t*)array;
    const size_t   byte_offset = tick >> 3;
    const unsigned mask        = 1 << (tick & 0x7);
    u8bits[byte_offset] |= mask;
}

static CFISH_INLINE void
lucy_NumUtil_u1clear(void *array, size_t tick) {
    uint8_t *const u8bits      = (uint8_t*)array;
    const size_t   byte_offset = tick >> 3;
    const unsigned mask        = 1 << (tick & 0x7);
    u8bits[byte_offset] &= ~mask;
}

static CFISH_INLINE void
lucy_NumUtil_u1flip(void *array, size_t tick) {
    uint8_t *const u8bits      = (uint8_t*)array;
    const size_t   byte_offset = tick >> 3;
    const unsigned mask        = 1 << (tick & 0x7);
    u8bits[byte_offset] ^= mask;
}

static CFISH_INLINE uint8_t
lucy_NumUtil_u2get(const void *array, size_t tick) {
    uint8_t *ints  = (uint8_t*)array;
    uint8_t  byte  = ints[(tick >> 2)];
    int      shift = 2 * (tick & 0x3);
    return (byte >> shift) & 0x3;
}

static CFISH_INLINE void
lucy_NumUtil_u2set(void *array, size_t tick, uint8_t value) {
    uint8_t *ints     = (uint8_t*)array;
    unsigned shift    = 2 * (tick & 0x3);
    unsigned mask     = (unsigned)0x3 << shift;
    uint8_t  new_val  = value & 0x3;
    uint8_t  new_bits = (uint8_t)(new_val << shift);
    ints[(tick >> 2)]  = (ints[(tick >> 2)] & ~mask) | new_bits;
}


static CFISH_INLINE uint8_t
lucy_NumUtil_u4get(const void *array, size_t tick) {
    uint8_t *ints  = (uint8_t*)array;
    uint8_t  byte  = ints[(tick >> 1)];
    int      shift = 4 * (tick & 1);
    return (byte >> shift) & 0xF;
}

static CFISH_INLINE void
lucy_NumUtil_u4set(void *array, size_t tick, uint8_t value) {
    uint8_t  *ints     = (uint8_t*)array;
    unsigned  shift    = 4 * (tick & 0x1);
    unsigned  mask     = (unsigned)0xF << shift;
    uint8_t   new_val  = value & 0xF;
    uint8_t   new_bits = (uint8_t)(new_val << shift);
    ints[(tick >> 1)]  = (ints[(tick >> 1)] & ~mask) | new_bits;
}

#ifdef LUCY_USE_SHORT_NAMES
  #define CI32_MAX_BYTES               LUCY_NUMUTIL_CI32_MAX_BYTES
  #define CI64_MAX_BYTES               LUCY_NUMUTIL_CI64_MAX_BYTES
  #define CU32_MAX_BYTES               LUCY_NUMUTIL_CU32_MAX_BYTES
  #define CU64_MAX_BYTES               LUCY_NUMUTIL_CU64_MAX_BYTES
#endif

__END_C__