The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.

#ifndef _BLOOM
#define _BLOOM


#include <limits.h>

/* leopold, a c implementation of bloom filters 
 *
 * this technique is very useful dealing with large streams of data in
 * limited memory.  
 *
 * by Peter Alvaro, 2006
 */


/* if your arch/os support the "long long" datatype, you can handle
 * VERY large vectors.  it's worth setting BIGNUM to long long and
 * seeing what happens.  
 */

#define BIGNUM unsigned long long
#define BIGNUM_STR "unsigned long"
#define BIGCAST long long
#define TOPLIMIT LONG_MAX

#if !HAVE_SQRTL
#define sqrtl(val) ((long double)sqrt((double)val))
#endif

#define SALTRAND 99999999
#define CONS 1234567

#define HASH_FUNC hash5

typedef struct
{
	BIGNUM index;
	char spot;

} deref;

typedef BIGNUM (*hash_t)(char *str);

typedef struct 
{
	int *num;
	int cnt;
} randoms;

struct bloomstat
{
	BIGNUM elements; /* size of array */
	int ideal_hashes; /* num hash functions */
	BIGNUM capacity; /* number of elements */
	float e; /* max error rate */
} ;

typedef struct
{
	char *vector;
	hash_t hash;
 	BIGNUM inserts;
  //	int ideal_hashes;
        struct bloomstat stat;
	randoms random_nums;
} bloom;

//int verbose;


/* these are modes to test_all() */
#define RO 0
#define SET 1
#define BVERBOSE 2

/* errors */

#define ERR_MALLOC 1
#define ERR_OVERFLOW 2
#define ERR_UNKNOWN 3

BIGNUM mkprime(BIGNUM startval);

/* public interface */
int bloom_init(bloom *bloom,BIGNUM size,BIGNUM capacity, float error_rate,
	       int hashes,hash_t hash,int flags);
int bloom_check(bloom *bloom,char *str);
int bloom_add(bloom *bloom,char *str);
int bloom_test(bloom *bloom,char *str,int MODE);
void bloom_destroy(bloom *bloom);
int bloom_serialize(bloom *bloom, char *fname);
bloom * bloom_deserialize(char *fname);

/* private interface */
int sketchy_randoms(randoms *rands,int cnt);
int finder (BIGNUM index,deref *dr);
int set(char *big,BIGNUM index);
int test (char *big, BIGNUM index);
BIGNUM bloom_hash(bloom *bloom,char *str, int i);
int bloom_hash_old(bloom *bloom,char *str, int i);

BIGNUM find_close_prime(BIGNUM m);
int get_suggestion(struct bloomstat *stats, BIGNUM n,double e);
int is_prime(BIGNUM m);
void get_rec (struct bloomstat *stat);
BIGNUM report_capacity(bloom *bloom);

#endif