The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.

/* Our API for random numbers.
 *
 * We can use ISAAC, ChaCha20, or something else.
 *
 * 3700    ns/word  ChaCha20 in Perl
 * 3100    ns/word  Salsa20 in Perl
 * 1600    ns/word  ChaCha8 in Perl
 *  760    ns/word  ISAAC in Perl
 *
 *   11.20 ns/word  ChaCha20 (openbsd)
 *   10.31 ns/word  ChaCha20 (dj)
 *    8.66 ns/word  ChaCha20 (sse2 Peters)
 *    6.85 ns/word  ChaCha12 (dj)
 *    5.99 ns/word  Tyche
 *    5.11 ns/word  ChaCha8 (dj)
 *    4.37 ns/word  MT19937 (Cokus)
 *    4.14 ns/word  Tyche-i
 *    3.26 ns/word  ISAAC
 *    3.18 ns/word  PCG64 (64-bit state, 64-bit types)
 *    1.95 ns/word  PCG64 (64-bit state, 128-bit types)
 *    1.84 ns/word  ChaCha20 (AVX2 chacha-opt)
 *    1.48 ns/word  Xoroshiro128+
 *    1.16 ns/word  SplitMix64
 *
 * These functions do locking, the underlying library does not.
 */

#include <stdio.h>
#include <stddef.h>
#include <string.h>
#include "ptypes.h"
#include "threadlock.h"
#include "csprng.h"

#define USE_ISAAC      0
#define USE_CHACHA20   1

static int mutex_init = 0;
static int good_seed = 0;
MUTEX_DECL(state);

#if (USE_ISAAC + USE_CHACHA20) != 1
#error Exactly one CSPRNG should been selected
#endif

#if USE_ISAAC

#include "isaac.h"
#define SEED_BYTES 1024
#define CSEED     isaac_seed
#define CRBYTES   isaac_rand_bytes
#define CIRAND32  isaac_irand32
#define CIRAND64  isaac_irand64
#define CSELFTEST isaac_selftest

#elif USE_CHACHA20

#include "chacha.h"
#define SEED_BYTES (32+8)
#define CSEED     chacha_seed
#define CRBYTES   chacha_rand_bytes
#define CIRAND32  chacha_irand32
#define CIRAND64  chacha_irand64
#define CSELFTEST chacha_selftest

#endif

/* Helper macros, similar to ChaCha, so we're consistent. */
#ifndef U8TO32_LE
#define U8TO32_LE(p) \
  (((uint32_t)((p)[0])      ) | \
   ((uint32_t)((p)[1]) <<  8) | \
   ((uint32_t)((p)[2]) << 16) | \
   ((uint32_t)((p)[3]) << 24))
#endif
#define U32TO8_LE(p, v) \
  do { \
    uint32_t _v = v; \
    (p)[0] = (((_v)      ) & 0xFFU); \
    (p)[1] = (((_v) >>  8) & 0xFFU); \
    (p)[2] = (((_v) >> 16) & 0xFFU); \
    (p)[3] = (((_v) >> 24) & 0xFFU); \
  } while (0)

/*****************************************************************************/

/* We put a simple non-CS PRNG here to help fill small seeds.
 * I'd like to use PCG64 or Xoroshiro128+ here, but to be maximally
 * portable and reproducible, we'll use PCG (RXS M XS 32).
 */
#if 0
/* PCG XSH RR 64/32.  32-bit output, 64-bit state and types. */
uint32_t prng_next(char* rng) {
  uint32_t xorshifted, rot;
  uint64_t *state = (uint64_t*) rng;
  uint64_t oldstate = state[0];
  pcg_state = oldstate * 6364136223846793005ULL + state[1];
  xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u;
  rot = oldstate >> 59u;
  return (xorshifted >> rot) | (xorshifted << ((-rot) & 31));
}
char* prng_new(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
  uint64_t *state, initstate, initseq;
  New(0, state, 2, uint64_t);
  initstate = (((uint64_t)a) << 32) | b;
  initseq = (((uint64_t)c) << 32) | d;
  state[0] = 0U;
  state[1] = (initseq << 1u) | 1u;
  (void) prng_next((char*)state);
  state[0] += initstate;
  (void) prng_next((char*)state);
  return (char*) state;
}
#else
/* PCG RXS M XS 32.  32-bit output, 32-bit state and types. */
uint32_t prng_next(char* ctx) {
  uint32_t *rng = (uint32_t*) ctx;
  uint32_t word, oldstate = rng[0];
  rng[0] = rng[0] * 747796405U + rng[1];
  word = ((oldstate >> ((oldstate >> 28u) + 4u)) ^ oldstate) * 277803737u;
  return (word >> 22u) ^ word;
}
char* prng_new(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
  uint32_t *state;
  New(0, state, 2, uint32_t);
  state[0] = 0U;
  state[1] = (b << 1u) | 1u;
  (void) prng_next((char*)state);
  state[0] += a;
  (void) prng_next((char*)state);
  state[0] ^= c;
  (void) prng_next((char*)state);
  state[0] ^= d;
  (void) prng_next((char*)state);
  return (char*) state;
}
#endif

/*****************************************************************************/

void csprng_seed(uint32_t bytes, const unsigned char* data)
{
  unsigned char seed[SEED_BYTES + 4];

  /* If given a short seed, minimize zeros in state */
  if (bytes >= SEED_BYTES) {
    memcpy(seed, data, SEED_BYTES);
  } else {
    char* rng;
    uint32_t a, b, c, d, i;
    memcpy(seed, data, bytes);
    memset(seed+bytes, 0, sizeof(seed)-bytes);
    a = U8TO32_LE((seed +  0));
    b = U8TO32_LE((seed +  4));
    c = U8TO32_LE((seed +  8));
    d = U8TO32_LE((seed + 12));
    rng = prng_new(a,b,c,d);
    for (i = 4*((bytes+3)/4); i < SEED_BYTES; i += 4)
      U32TO8_LE(seed + i, prng_next(rng));
    Safefree(rng);
#if 0
    printf("got %u bytes in expanded to %u\n", bytes, SEED_BYTES);
    printf("from: ");for(i=0;i<bytes;i++)printf("%02x",data[i]);printf("\n");
    printf("to:   ");for(i=0;i<SEED_BYTES;i++)printf("%02x",seed[i]);printf("\n");
#endif
  }

  if (!mutex_init) {
    MUTEX_INIT(&state_mutex);
    mutex_init = 1;
    MUTEX_LOCK(&state_mutex);
    CSELFTEST();
    MUTEX_UNLOCK(&state_mutex);
  }
  MUTEX_LOCK(&state_mutex);
  CSEED(SEED_BYTES, seed);
  good_seed = (bytes >= 16);
  MUTEX_UNLOCK(&state_mutex);
}

extern void csprng_srand(UV insecure_seed)
{
#if BITS_PER_WORD == 32
  unsigned char seed[4] = {0};
  U32TO8_LE(seed, insecure_seed);
  csprng_seed(4, seed);
#else
  unsigned char seed[8] = {0};
  if (insecure_seed <= UVCONST(4294967295)) {
    U32TO8_LE(seed, insecure_seed);
    csprng_seed(4, seed);
  } else {
    U32TO8_LE(seed, insecure_seed);
    U32TO8_LE(seed + 4, (insecure_seed >> 32));
    csprng_seed(8, seed);
  }
#endif
}

void csprng_rand_bytes(uint32_t bytes, unsigned char* data)
{
  if (!mutex_init) croak("CSPRNG used before init");
  MUTEX_LOCK(&state_mutex);
  CRBYTES(bytes, data);
  MUTEX_UNLOCK(&state_mutex);
}

uint32_t irand32(void)
{
  uint32_t a;
  MUTEX_LOCK(&state_mutex);
  a = CIRAND32();
  MUTEX_UNLOCK(&state_mutex);
  return a;
}
UV irand64(void)
{
#if BITS_PER_WORD < 64
  croak("irand64 too many bits for UV");
#else
  UV a;
  MUTEX_LOCK(&state_mutex);
  a = CIRAND64();
  MUTEX_UNLOCK(&state_mutex);
  return a;
#endif
}


/*****************************************************************************/

int is_csprng_well_seeded(void) { return good_seed; }

/* There are many ways to get floats from integers.  A few good, many bad.
 *
 * Vigna recommends (x64 >> 11) * (1.0 / (1ULL<<53)).
 * http://xoroshiro.di.unimi.it
 * Also see alternatives discussed:
 * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/speed-up-real.html
 *
 * Melissa O'Neill notes the problem is harder than it looks, doesn't address.
 * http://www.pcg-random.org/pdf/toms-oneill-pcg-family-v1.02.pdf
 *
 * randomstate for numpy uses separate code for each generator.
 * With the exception of dSFMT, they each one one of:
 *     (x64 >> 11) * (1 / 9007199254740992.0)
 *     ((x32 >> 5) * 67108864.0 + (y32 >> 6)) / 9007199254740992.0
 * where the first one is identical to Vigna.
 *
 * David Jones recommends the minor 32-bit variant:
 *     ((x32 >> 6) * 134217728.0 + (y32 >> 5)) / 9007199254740992.0
 * http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf
 *
 * Taylor Campbell discusses this in:
 * http://mumble.net/~campbell/tmp/random_real.c
 * He points out that there are two common non-broken choices,
 * div by 2^-53  or   div by 2^-64, and each are slightly flawed in
 * different ways.  He shows a theoretically better method.
 */

/*
 * We prefer the x64 / 2^-64 method.  It seems to produce the best results
 * and is easiest for ensuring we fill up all the bits.
 * It is similar to what Geoff Kuenning does in MTwist, though he computes
 * the constants at runtime to ensure a dodgy compiler won't munge them.
 */
#define TO_NV_32    2.3283064365386962890625000000000000000E-10L
#define TO_NV_64    5.4210108624275221700372640043497085571E-20L
#define TO_NV_96    1.2621774483536188886587657044524579675E-29L
#define TO_NV_128   2.9387358770557187699218413430556141945E-39L

#define DRAND_32_32  (CIRAND32() * TO_NV_32)
#define DRAND_64_32  (((CIRAND32()>>5) * 67108864.0 + (CIRAND32()>>6)) / 9007199254740992.0)
#define DRAND_64_64  (CIRAND64() * TO_NV_64)
#define DRAND_128_32 (CIRAND32() * TO_NV_32 + CIRAND32() * TO_NV_64 + CIRAND32() * TO_NV_96 + CIRAND32() * TO_NV_128)
#define DRAND_128_64 (CIRAND64() * TO_NV_64 + CIRAND64() * TO_NV_128)

NV drand64(void)
{
  NV r;
  MUTEX_LOCK(&state_mutex);
#if NVMANTBITS <= 32
  r = DRAND_32_32;
#elif NVMANTBITS <= 64
  r = (BITS_PER_WORD <= 32)  ?  DRAND_64_32  :  DRAND_64_64;
#else
  r = (BITS_PER_WORD <= 32)  ?  DRAND_128_32 :  DRAND_128_64;
#endif
  MUTEX_UNLOCK(&state_mutex);
  return r;
}

/* Return rand 32-bit integer between 0 to n-1 inclusive */
uint32_t urandomm32(uint32_t n)
{
  uint32_t r, rmin;

  if (n <= 1)
    return 0;

  rmin = -n % n;
  MUTEX_LOCK(&state_mutex);
  while (1) {
    r = CIRAND32();
    if (r >= rmin)
      break;
  }
  MUTEX_UNLOCK(&state_mutex);
  return r % n;
}

UV urandomm64(UV n)
{
  UV r, rmin;

  if (n <= 1)
    return 0;

  rmin = -n % n;
  MUTEX_LOCK(&state_mutex);
  while (1) {
    r = CIRAND64();
    if (r >= rmin)
      break;
  }
  MUTEX_UNLOCK(&state_mutex);
  return r % n;
}

UV urandomb(int nbits)
{
  if (nbits == 0) {
    return 0;
  } else if (nbits <= 32) {
    return irand32() >> (32-nbits);
#if BITS_PER_WORD == 64
  } else if (nbits <= 64) {
    return irand64() >> (64-nbits);
#endif
  }
  croak("irand64 too many bits for UV");
}