The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#ifndef __DHASH_H__
#define __DHASH_H__

/* A special hash-type for use in part(). It is a store-only
 * hash in that all key/value pairs are put into the hash. Then it is sorted by
 * keys ascending with dhash_sort_final where the empty elements come at the end
 * of the internal array. This need for sorting is actually what prevents us from
 * using a dhash-based implementaion right now as it is the bottleneck for cases
 * with many very small partitions.
 *
 * It doesn't use a linked list for collision recovery. Instead, on collision it will
 * walk right in the array to find the first free spot. This search should never take
 * too long as it uses a fairly good integer-hash function.
 *
 * The 'step' parameter isn't currently used. 
 */

#include <stdlib.h>	/* for qsort() */

#define INITIAL_SIZE 4

typedef unsigned int hash_t;

typedef struct {
    int key;
    AV *val;
} dhash_val_t;

typedef struct {
    int max;
    int size;
    int count;
    int step;
    dhash_val_t *ary;
} dhash_t;

void dhash_dump(dhash_t *h);

int cmp (dhash_val_t *a, dhash_val_t *b) {
    /* all empty buckets should be at the end of the array */
    if (!a->val)
	return 1;
    if (!b->val)
	return -1;
    return a->key - b->key;
}

dhash_t * dhash_init() {
    dhash_t *h;
    New(0, h, 1, dhash_t);
    Newz(0, h->ary, INITIAL_SIZE, dhash_val_t);
    h->max = 0;
    h->size = INITIAL_SIZE;
    h->count = 0;
    return h;
}

void dhash_destroy(dhash_t *h) {
    Safefree(h->ary);
    Safefree(h);
}

inline hash_t HASH(register hash_t k) {
    k += (k << 12);
    k ^= (k >> 22);
    k += (k << 4);
    k ^= (k >> 9);
    k += (k << 10);
    k ^= (k >> 2);
    k += (k << 7);
    k ^= (k >> 12);
    return k;
}

void dhash_insert(dhash_t *h, int key, SV *sv, register hash_t hash) {

    while (h->ary[hash].val && h->ary[hash].key != key)
	hash = (hash + 1) % h->size;
    
    if (!h->ary[hash].val) {
	h->ary[hash].val = newAV();
	h->ary[hash].key = key;
	h->count++;
    }
    
    av_push(h->ary[hash].val, sv);
    SvREFCNT_inc(sv);
}

void dhash_resize(dhash_t *h) {
    
    register int i;
    register hash_t hash;
    dhash_val_t *old = h->ary;
    
    h->size <<= 1;
    h->count = 0;
    Newz(0, h->ary, h->size, dhash_val_t);
    
    for (i = 0; i < h->size>>1; ++i) {
	if (!old[i].val)
	    continue;
	hash = HASH(old[i].key) % h->size;
	while (h->ary[hash].val)
	    hash = (hash + 1) % h->size;
	h->ary[hash] = old[i];
	++h->count;
    }
    Safefree(old);
}

void dhash_store(dhash_t *h, int key, SV *val) {
    hash_t hash;
    if ((double)h->count / (double)h->size > 0.75)
	dhash_resize(h);
    hash = HASH(key) % h->size;
    dhash_insert(h, key, val, hash);
    if (key > h->max)
	h->max = key;
}

/* Once this is called, the hash is no longer useable. The only thing
 * that may be done with it is iterate over h->ary to get the values
 * sorted by keys */
void dhash_sort_final(dhash_t *h) {
    qsort(h->ary, h->size, sizeof(dhash_val_t), (int(*)(const void*,const void*))cmp);
}

void dhash_dump(dhash_t *h) {
    int i;
    fprintf(stderr, "max=%i, size=%i, count=%i, ary=%p\n", h->max, h->size, h->count, h->ary);
    for (i = 0; i < h->size; i++) {
	fprintf(stderr, "%2i:  key=%-5i => val=(AV*)%p\n", i, h->ary[i].key, h->ary[i].val);
    }
}

#endif