The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/*  File: dict.c
 *  Author: Richard Durbin (rd@sanger.ac.uk)
 *  Copyright (C) J Thierry-Mieg and R Durbin, 1995
 *-------------------------------------------------------------------
 * This file is part of the ACEDB genome database package, written by
 * 	Richard Durbin (MRC LMB, UK) rd@mrc-lmb.cam.ac.uk, and
 *	Jean Thierry-Mieg (CRBM du CNRS, France) mieg@kaa.cnrs-mop.fr
 *
 * Description: 
 * Exported functions:
 * HISTORY:
 * Last edited: Sep 16 23:43 1997 (rd)
 * Created: Tue Jan 17 17:33:44 1995 (rd)
 *-------------------------------------------------------------------
 */

/* $Id: dict.c,v 1.1 2002/11/14 20:00:06 lstein Exp $ */

#include "dict.h"

/************* standard utility from Jean *************/

static int hashString (char *cp, int n, BOOL isDiff)
{
  register int i ;
  register unsigned int j, x = 0 ;
  register int rotate = isDiff ? 21 : 13 ;
  register int leftover = 8*sizeof(int) - rotate ;

  while (*cp)
    x = freeupper (*cp++) ^ (( x >> leftover) | (x << rotate)) ; 

				/* compress down to n bits */
  for (j = x, i = n ; i < sizeof(int) ; i += n)
    j ^= (x >> i) ;
  j &= (1 << n) - 1 ;

  if (isDiff)			/* return odd number */
    j |= 1 ;

  return j ;
}

#ifdef DAZ
static int hashString (char *cp, int n, BOOL isDiff)
{
  register int x;
  register int mask = (1 << n) - 1 ;

  x = 0;
  if (isDiff)	
    { for (;*cp;*cp++) { x = ((x * 5) + *cp) & mask ; }
      x |= 1 ;                   /* return odd number */
    }
  else
    for (;*cp;*cp++) { x = ((x * 3) + *cp) & mask ; }

  return x ;
}
#endif

/******************************************************/

static void dictFinalise (void *x) ;

DICT *dictHandleCreate (int size, STORE_HANDLE handle)
{
  DICT *dict = (DICT*) halloc (sizeof (DICT), handle) ;

  blockSetFinalise (dict, dictFinalise) ;
  for (dict->dim = 6, dict->max = 64 ; 
       dict->max < size ; 
       ++dict->dim, dict->max *= 2) ;
  dict->table = arrayCreate (dict->max, int) ;
  array (dict->table, dict->max-1, int) = 0 ;	/* set arrayMax */
  dict->names = arrayCreate (dict->dim/4, int) ;
  array(dict->names, 0, int) = 0 ;		/* IMPORTANT - dummy 0 entry */
  dict->nameText = stackCreate (dict->dim) ;
  return dict ;
}

DICT *dictCreate (int size) { return dictHandleCreate (size, 0) ; }
void uDictDestroy (DICT *dict) { messfree (dict) ; }

static void dictFinalise (void *x) /* does the work */
{
  DICT *dict = (DICT*)x ;

  arrayDestroy (dict->table) ;
  arrayDestroy (dict->names) ;
  stackDestroy (dict->nameText) ;
}

DICT *dictCopy (DICT *dict)
{
  DICT *new ;

  if (!dict)
    return 0 ;
  new = (DICT*) messalloc (sizeof (DICT)) ;
  new->table = arrayCopy (dict->table) ;
  new->names = arrayCopy (dict->names) ;
  new->nameText = stackCopy (dict->nameText, 0) ;
  new->dim = dict->dim ;
  new->max = dict->max ;
  return new ;
}

/**********************************************/

static int newPos ;		/* local for dictAdd, dictReHash */

BOOL dictFind (DICT *dict, char *s, int *ip)
{
  register int i, h, dh = 0 ;

  if (!dict || !s)
    return FALSE ;

  h = hashString (s, dict->dim, FALSE) ;
  
  while (TRUE)
    { i = arr(dict->table, h, int) ;
      if (!i)			/* empty slot, s is unknown */
	{ newPos = h ;
	  return FALSE ;
	}
      if (!strcmp (s, stackText(dict->nameText, arr(dict->names, i, int))))
	{ if (ip) 
	    *ip = i-1 ;
	  return TRUE ;
	}
      if (!dh)
	dh = hashString (s, dict->dim, TRUE) ;
      h += dh ;
      if (h >= dict->max)
	h -= dict->max ;
    }
}

/*****************/

static void dictReHash (DICT *dict, int newDim)
{
  int i ;

  if (newDim <= dict->dim)
    return ;
  dict->dim = newDim ;
  dict->max = 1 << newDim ;
				/* remake the table */
  arrayDestroy (dict->table) ;
  dict->table = arrayCreate (dict->max, int) ;
  array (dict->table, dict->max-1, int) = 0 ;	/* set arrayMax */

				/* reinsert all the names */
  for (i = 1 ; i < arrayMax(dict->names) ; ++i)
    { dictFind (dict, stackText(dict->nameText, arr(dict->names, i, int)), 0) ;
				/* will fail, but sets newPos */
      arr(dict->table, newPos, int) = i ;
    }
}

/****************/

BOOL dictAdd (DICT *dict, char *s, int *ip)
      /* always fills ip, returns TRUE if added, FALSE if known */
{
  int i ;

  if (!dict || !s)
    return FALSE ;

  if (dictFind (dict, s, ip))	/* word already known */
    return FALSE ;

  i = arrayMax(dict->names) ;
  arr (dict->table, newPos, int) = i ;
  array (dict->names, i, int) = stackMark(dict->nameText) ;
  pushText (dict->nameText, s) ;

  if (arrayMax(dict->names) > 0.4 * dict->max)
    dictReHash (dict, dict->dim+1) ;

  if (ip)
    *ip = i-1 ;
  return TRUE ;
}

/********************** utilities ***********************/

char *dictName (DICT *dict, int i)
{
  if (i < 0 || ++i >= arrayMax(dict->names))
    messcrash ("Call to dictName() out of bounds: %d %d", i-1, dictMax(dict)) ;

  return stackText (dict->nameText, arr(dict->names, i, int)) ;
}

int dictMax (DICT *dict)
{
  return arrayMax(dict->names) - 1 ;
}

/******************** end of file **********************/