The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/*
 * tree.c - a hash style dictionary for user's personal words
 *
 * Pace Willisson, 1983
 * Hash support added by Geoff Kuenning, 1987
 *
 * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
 * All rights reserved.
 */

#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include "jsconfig.h"
#include "jspell.h"
#include "proto.h"
#include "msgs.h"

void         treeinit(char *p, char *LibDict);
static FILE *trydict(char *dictname, char *home, char *prefix, char *suffix);
static void  treeload(FILE * dictf);
void         treeinsert(char * word, int wordlen, int keep);
static struct dent *tinsert(struct dent *proto, hash_info *dic);
struct dent *treelookup(register ichar_t *word, hash_info *dic);
#if SORTPERSONAL != 0
static int   pdictcmp(struct dent ** enta, struct dent **entb);
#endif /* SORTPERSONAL != 0 */
void         treeoutput(void);
VOID *       mymalloc(unsigned int size);
void         myfree(VOID * ptr);
#ifdef REGEX_LOOKUP
char *       do_regex_lookup(char * expr, int whence);
#endif /* REGEX_LOOKUP */



/*
 * Hash table sizes.  Prime is probably a good idea, though in truth I
 * whipped the algorithm up on the spot rather than looking it up, so
 * who knows what's really best?  If we overflow the table, we just
 * use a double-and-add-1 algorithm.
 */
static int goodsizes[] =
    {
    53, 223, 907, 3631
    };

static char  personaldict[MAXPATHLEN];
static FILE *dictf;
static int   newwords = 0;

static int repl_inited = 0;

/*---------------------------------------------------------------------------*/

void init_dic(hash_info *dic)
{
   dic->cantexpand = 0;
   dic->hsize = 0;
   dic->hcount = 0;
}

/*
 * p - Value specified in -p switch
 * LibDict - Root of default dict name
 */
void treeinit(char *p, char *LibDict) {
   int   abspath;                 /* NZ if p is abs path name */
   char *h;                       /* Home directory name */
   char  seconddict[MAXPATHLEN];  /* Name of secondary dict */
   FILE *secondf;                 /* Access to second dict file */

   init_dic(&pers);
   /*
   ** If -p was not specified, try to get a default name from the
   ** environment.  After this point, if p is null, the the value in
   ** personaldict is the only possible name for the personal dictionary.
   ** If p is non-null, then there is a possibility that we should
   ** prepend HOME to get the correct dictionary name.
   */
   if (p == NULL)
      p = getenv(PDICTVAR);
   /*
   ** if p exists and begins with '/' we don't really need HOME,
   ** but it's not very likely that HOME isn't set anyway.
   */
   if ((h = getenv("HOME")) == NULL)
      return;

   if (p == NULL) {
      /*
       * No -p and no PDICTVAR.  We will use LibDict and DEFPAFF to
       * figure out the name of the personal dictionary and where it
       * is.  The rules are as follows:
       *
       * (1) If there is a local dictionary and a HOME dictionary,
       *     both are loaded, but changes are saved in the local one.
       *     The dictionary to save changes in is named "personaldict".
       * (2) Dictionaries named after the affix file take precedence
       *     over dictionaries with the default suffix (DEFPAFF).
       * (3) Dictionaries named with the new default names
       *     (DEFPDICT/DEFPAFF) take precedence over the old ones
       *     (OLDPDICT/OLDPAFF).
       * (4) Dictionaries aren't combined unless they follow the same
       *     naming scheme.
       * (5) If no dictionary can be found, a new one is created in
       *     the home directory, named after DEFPDICT and the affix
       *     file.
       */
      dictf = trydict(personaldict, (char *) NULL, DEFPDICT, LibDict);
      secondf = trydict(seconddict, h, DEFPDICT, LibDict);
      if (dictf == NULL  &&  secondf == NULL) {
         dictf = trydict(personaldict, (char *) NULL, DEFPDICT, DEFPAFF);
         secondf = trydict(seconddict, h, DEFPDICT, DEFPAFF);
      }
      if (dictf == NULL  &&  secondf == NULL) {
         dictf = trydict(personaldict, (char *) NULL, OLDPDICT, LibDict);
         secondf = trydict(seconddict, h, OLDPDICT, LibDict);
      }
      if (dictf == NULL  &&  secondf == NULL) {
         dictf = trydict(personaldict, (char *) NULL, OLDPDICT, OLDPAFF);
         secondf = trydict(seconddict, h, OLDPDICT, OLDPAFF);
      }
      if (personaldict[0] == '\0') {
         if (seconddict[0] != '\0')
            strcpy(personaldict, seconddict);
         else
            sprintf(personaldict, "%s/%s%s", h, DEFPDICT, LibDict);
      }
      if (dictf != NULL) {
         treeload(dictf);
         fclose(dictf);
      }
      if (secondf != NULL) {
         treeload(secondf);
         fclose(secondf);
      }
   }
   else {
      /*
      ** Figure out if p is an absolute path name.  Note that beginning
      ** with "./" and "../" is considered an absolute path, since this
      ** still means we can't prepend HOME.
      */
      abspath = (*p == '/'  ||  strncmp(p, "./", 2) == 0
                 ||  strncmp(p, "../", 3) == 0);
      if (abspath) {
         strcpy(personaldict, p);
         if ((dictf = fopen(personaldict, "r")) != NULL) {
            treeload(dictf);
            fclose(dictf);
         }
      }
      else {
         /*
         ** The user gave us a relative pathname.  We will try it
         ** locally, and if that doesn't work, we'll try the home
         ** directory.  If neither exists, it will be created in
         ** the home directory if words are added.
         */
         strcpy(personaldict, p);
         if ((dictf = fopen(personaldict, "r")) != NULL) {
            treeload(dictf);
            fclose(dictf);
         }
         else if (!abspath) {
            /* Try the home */
            sprintf(personaldict, "%s/%s", h, p);
            if ((dictf = fopen(personaldict, "r")) != NULL) {
               treeload(dictf);
               fclose(dictf);
            }
         }
         /*
          * If dictf is null, we couldn't open the dictionary
          * specified in the -p switch.  Complain.
          */
         if (dictf == NULL) {
            fprintf(stderr, CANT_OPEN, p);
            perror("");
            return;
         }
      }
   }

   if (!lflag  &&  !aflag
       &&  access(personaldict, 2) < 0  &&  errno != ENOENT) {
       fprintf(stderr, TREE_C_CANT_UPDATE, personaldict);
       sleep((unsigned) 2);
   }
}

/*---------------------------------------------------------------------------*/
/*
 * Try to open a dictionary.  As a side effect, leaves the dictionary
 * name in "filename" if one is found, and leaves a null string there
 * otherwise.
 */
static FILE *trydict(
     char *filename,        /* Where to store the file name */
     char *home,            /* Home directory */
     char *prefix,          /* Prefix for dictionary */
     char *suffix)          /* Suffix for dictionary */
{
   FILE *dictf;           /* Access to dictionary file */

   if (home == NULL)
      sprintf(filename, "%s%s", prefix, suffix);
   else
      sprintf(filename, "%s/%s%s", home, prefix, suffix);
   dictf = fopen(filename, "r");
   if (dictf == NULL)
      filename[0] = '\0';
   return dictf;
}

/*---------------------------------------------------------------------------*/

static void treeload(register FILE *loadfile  /* File to load words from */)
{
   char buf[BUFSIZ];        /* Buffer for reading pers dict */

   while (fgets(buf, sizeof buf, loadfile) != NULL)
      treeinsert(buf, sizeof buf, 1);
   newwords = 0;
}

/*---------------------------------------------------------------------------*/

void treat_overflow(hash_info *dic, int oldhsize, struct dent *oldhtab)
{
   fprintf(stderr, TREE_C_NO_SPACE);
   /*
    * Try to continue anyway, since our overflow
    * algorithm can handle an overfull (100%+) table,
    * and the malloc very likely failed because we
    * already have such a huge table, so small mallocs
    * for overflow entries will still work.
    */
   if (oldhtab == NULL)
      exit(1);                  /* No old table, can't go on */
   fprintf(stderr, TREE_C_TRY_ANYWAY);
   dic->cantexpand = 1;              /* Suppress further messages */
   dic->hsize = oldhsize;        /* Put things back */
   dic->htab = oldhtab;                /* ... */
   newwords = 1;                /* And pretend it worked */
}

/*---------------------------------------------------------------------------*/

/* Re-insert old entries into new table */
void reinsert_old_entries(hash_info *dic, int oldhsize, struct dent *oldhtab)
{
   int i;
   register struct dent * dp;
   struct dent *olddp;
#ifndef NO_CAPITALIZATION_SUPPORT
   struct dent *newdp;
   int isvariant;
#endif

   for (i = 0;  i < oldhsize;  i++) {
      dp = &oldhtab[i];
      if (dp->flagfield & USED) {
#ifdef NO_CAPITALIZATION_SUPPORT
         tinsert(dp, dic);
#else
         newdp = tinsert(dp, dic);
         isvariant = (dp->flagfield & MOREVARIANTS);
#endif
         dp = dp->next;
#ifdef NO_CAPITALIZATION_SUPPORT
         while (dp != NULL) {
            tinsert(dp, dic);
            olddp = dp;
            dp = dp->next;
            free((char *) olddp);
         }
#else
         while (dp != NULL) {
            if (isvariant) {
               isvariant = dp->flagfield & MOREVARIANTS;
               olddp = newdp->next;
               newdp->next = dp;
               newdp = dp;
               dp = dp->next;
               newdp->next = olddp;
            }
            else {
               isvariant = dp->flagfield & MOREVARIANTS;
               newdp = tinsert(dp, dic);
               olddp = dp;
               dp = dp->next;
               free((char *) olddp);
            }
         }
#endif
      }
   }
   if (oldhtab != NULL)
      free((char *) oldhtab);
}

/*---------------------------------------------------------------------------*/

void new_hsize(hash_info *dic)
{
   int i;

   for (i = 0;  i < sizeof goodsizes / sizeof(goodsizes[0]);  i++) {
      if (goodsizes[i] > dic->hsize)
         break;
   }
   if (i >= sizeof goodsizes / sizeof goodsizes[0])
      dic->hsize += dic->hsize + 1;
   else
      dic->hsize = goodsizes[i];
}

/*---------------------------------------------------------------------------*/

void new_hash_space(hash_info *dic)
{
    /* register int i; */
   struct dent *oldhtab;
   int oldhsize;

   /*
    * Expand hash table when it is MAXPCT % full.
    */
   if (!dic->cantexpand  &&  (dic->hcount * 100) / MAXPCT >= dic->hsize) {
      oldhsize = dic->hsize;
      oldhtab = dic->htab;
      new_hsize(dic);
      dic->htab =
        (struct dent *) calloc((unsigned) dic->hsize, sizeof(struct dent));
      if (dic->htab == NULL)
         treat_overflow(dic, oldhsize, oldhtab);
      else
         reinsert_old_entries(dic, oldhsize, oldhtab);
   }
}

/*---------------------------------------------------------------------------*/

void ins_repl_all(char *word, char *st_repl)
{
   struct dent wordent;
   char ent[255];

   if (repl_inited == 0) {init_dic(&repl); repl_inited = 1;}

   new_hash_space(&repl);

   sprintf(ent, "%s/%s", word, st_repl);
/*   printf("DEB-Vou inserir %s\n\r", ent); */
   if (makedent(ent, ICHARTOSSTR_SIZE, &wordent) < 0) {
      putchar(7);
      return;                      /* Word must be too big or something */
   }
   tinsert(&wordent, &repl);
}

/*---------------------------------------------------------------------------*/

void treeinsert(char *word,     /* Word to insert - must be canonical */
                int wordlen,     /* Length of the word buffer */
                int keep)
{
   struct dent  wordent;
   register struct dent * dp;
   ichar_t nword[INPUTWORDLEN + MAXAFFIXLEN];

   new_hash_space(&pers);

   /*
   ** We're ready to do the insertion.  Start by creating a sample
   ** entry for the word.
   */
   if (makedent(word, wordlen, &wordent) < 0)
      return;                        /* Word must be too big or something */
   if (keep)
      wordent.flagfield |= KEEP;
   /*
   ** Now see if word or a variant is already in the table.  We use the
   ** capitalized version so we'll find the header, if any.
   **/
   strtoichar(nword, word, sizeof nword, 1);
   upcase(nword);
   if ((dp = lookup(nword, 1)) != NULL) {
      /* It exists.  Combine caps and set the keep flag. */
      if (combinecaps(dp, &wordent) < 0) {
         free(wordent.word);
         return;
      }
   }
   else {
      /* It's new. Insert the word. */
      dp = tinsert(&wordent, &pers);
#ifndef NO_CAPITALIZATION_SUPPORT
       if (captype(dp->flagfield) == FOLLOWCASE)
          addvheader(dp);
#endif
   }
   newwords |= keep;
}

/*---------------------------------------------------------------------------*/

static struct dent *tinsert(struct dent *proto,  /* Prototype entry to copy */
                            hash_info *dic)
{
   ichar_t iword[INPUTWORDLEN + MAXAFFIXLEN];
   register int hcode;
   register struct dent *hp;    /* Next trial entry in hash table */
   register struct dent *php;   /* Prev. value of hp, for chaining */

   if (strtoichar(iword, proto->word, sizeof iword, 1)) {
      fprintf(stderr, WORD_TOO_LONG (proto->word));
      return NULL;
   }
#ifdef NO_CAPITALIZATION_SUPPORT
   upcase(iword);
#endif
   hcode = hash(iword, dic->hsize);
   php = NULL;
   hp = &(dic->htab[hcode]);
   if (hp->flagfield & USED) {
      while (hp != NULL) {
         php = hp;
         hp = hp->next;
      }
      hp = (struct dent *) calloc(1, sizeof(struct dent));
      if (hp == NULL) {
         fprintf(stderr, TREE_C_NO_SPACE);
         exit(1);
      }
   }
   *hp = *proto;
   if (php != NULL)
      php->next = hp;
   hp->next = NULL;
   return hp;
}



/**
 * search in dic dictionary 
 */
struct dent *treelookup(register ichar_t *word, hash_info *dic) {
    register int hcode;
    register struct dent * hp;
    char chword[INPUTWORDLEN + MAXAFFIXLEN];

    if (dic->hsize <= 0) return NULL;
    ichartostr(chword, word, sizeof chword, 1);
    hcode = hash(word, dic->hsize);
    hp = &(dic->htab[hcode]);
    while (hp != NULL  &&  (hp->flagfield & USED)) {
	/* printf("DEB- hp->word=%s\n", hp->word); */
	if (strcmp(chword, hp->word) == 0 && (!(hp->saw) || !saw_mode)) {  /* found */
	    if (saw_mode) hp->saw = 1;
	    break;
	}
#ifndef NO_CAPITALIZATION_SUPPORT
	while (hp->flagfield & MOREVARIANTS)
	    hp = hp->next;
#endif
	hp = hp->next;
    }

    if (hp != NULL  &&  (hp->flagfield & USED))
	return hp;
    else
	return NULL;
}


/**
 * put saw off in personal dictionary
 */
void tree_saw_off(register ichar_t *word) {
    register int hcode;
    register struct dent * hp;
    char chword[INPUTWORDLEN + MAXAFFIXLEN];

    if (pers.hsize <= 0) return;
    ichartostr(chword, word, sizeof chword, 1);
    hcode = hash(word, pers.hsize);
    hp = &(pers.htab[hcode]);
    while (hp != NULL  &&  (hp->flagfield & USED)) {
	if (strcmp(chword, hp->word) == 0)   /* found */
	    hp->saw = 0;
#ifndef NO_CAPITALIZATION_SUPPORT
	while (hp->flagfield & MOREVARIANTS)
	    hp = hp->next;
#endif
	hp = hp->next;
    }
}

/*---------------------------------------------------------------------------*/

#if SORTPERSONAL != 0
/* Comparison routine for sorting the personal dictionary with qsort */
static int pdictcmp(struct dent **enta, struct dent **entb) {
    /* The parentheses around *enta / *entb below are NECESSARY!
     * Otherwise the compiler reads it as *(enta->word), or enta->word[0], 
     * which is illegal (but gcc takes it and produces wrong code).
     */
    return casecmp((*enta)->word, (*entb)->word, 1);
}
#endif

/*---------------------------------------------------------------------------*/

void treeoutput(void) {
    register struct dent *cent;       /* Current entry */
    register struct dent *lent;       /* Linked entry */
#if SORTPERSONAL != 0
    int pdictsize;                    /* Number of entries to write */
    struct dent **sortlist;           /* List of entries to be sorted */
    register struct dent **sortptr;   /* Handy pointer into sortlist */
#endif
    register struct dent *ehtab;      /* End of pershtab, for fast looping */

    if (newwords == 0)
       return;

    if ((dictf = fopen(personaldict, "w")) == NULL) {
       fprintf(stderr, CANT_CREATE, personaldict);
       return;
    }

#if SORTPERSONAL != 0
    /*
     * If we are going to sort the personal dictionary, we must know
     * how many items are going to be sorted.
     */
    pdictsize = 0;
    if (pers.hcount >= SORTPERSONAL)
       sortlist = NULL;
    else {
        for (cent = pers.htab, ehtab = pers.htab + pers.hsize;
             cent < ehtab; cent++) {
            for (lent = cent;  lent != NULL;  lent = lent->next) {
                if ((lent->flagfield & (USED | KEEP)) == (USED | KEEP))
                    pdictsize++;
#ifndef NO_CAPITALIZATION_SUPPORT
                while (lent->flagfield & MOREVARIANTS)
                  lent = lent->next;
#endif
            }
        }
        for (cent = hashtbl, ehtab = hashtbl + hashsize;
             cent < ehtab; cent++)  {
            if ((cent->flagfield & (USED | KEEP)) == (USED | KEEP)) {
                /*
                ** We only want to count variant headers
                ** and standalone entries.  These happen
                ** to share the characteristics in the
                ** test below.  This test will appear
                ** several more times in this routine.
                */
#ifndef NO_CAPITALIZATION_SUPPORT
                if (captype (cent->flagfield) != FOLLOWCASE
                  &&  cent->word != NULL)
#endif
                    pdictsize++;
            }
        }
        sortlist = (struct dent **) malloc(pdictsize * sizeof(struct dent));
    }
    if (sortlist == NULL) {
#endif
        for (cent = pers.htab, ehtab = pers.htab + pers.hsize;
             cent < ehtab; cent++) {
            for (lent = cent;  lent != NULL;  lent = lent->next) {
                if ((lent->flagfield & (USED | KEEP)) == (USED | KEEP)) {
                    toutent(dictf, lent, 1);
#ifndef NO_CAPITALIZATION_SUPPORT
                    while (lent->flagfield & MOREVARIANTS)
                        lent = lent->next;
#endif
                }
            }
        }
        for (cent = hashtbl, ehtab = hashtbl + hashsize;
             cent < ehtab; cent++) {
            if ((cent->flagfield & (USED | KEEP)) == (USED | KEEP)) {
#ifndef NO_CAPITALIZATION_SUPPORT
                if (captype(cent->flagfield) != FOLLOWCASE
                    &&  cent->word != NULL)
#endif
                    toutent(dictf, cent, 1);
            }
        }
#if SORTPERSONAL != 0
        return;
    }
    /*
     * Produce dictionary in sorted order.  We used to do this
     * destructively, but that turns out to fail because in some modes
     * the dictionary is written more than once.  So we build an
     * auxiliary pointer table (in sortlist) and sort that.  This is
     * faster anyway, though it uses more memory.
     */
    sortptr = sortlist;
    for (cent = pers.htab, ehtab = pers.htab + pers.hsize; cent < ehtab; cent++) {
       for (lent = cent;  lent != NULL;  lent = lent->next) {
          if ((lent->flagfield & (USED | KEEP)) == (USED | KEEP)) {
             *sortptr++ = lent;
#ifndef NO_CAPITALIZATION_SUPPORT
             while (lent->flagfield & MOREVARIANTS)
                lent = lent->next;
#endif
          }
       }
    }
    for (cent = hashtbl, ehtab = hashtbl + hashsize;  cent < ehtab;  cent++) {
       if ((cent->flagfield & (USED | KEEP)) == (USED | KEEP)) {
#ifndef NO_CAPITALIZATION_SUPPORT
          if (captype(cent->flagfield) != FOLLOWCASE  &&  cent->word != NULL)
#endif
             *sortptr++ = cent;
       }
    }
    /* Sort the list */
    qsort((char *) sortlist, (unsigned) pdictsize, sizeof(sortlist[0]),
          (int (*) (const void *, const void *)) pdictcmp);
    /* Write it out */
    for (sortptr = sortlist;  --pdictsize >= 0;  )
       toutent(dictf, *sortptr++, 1);
    free((char *) sortlist);
#endif

    newwords = 0;

    fclose(dictf);
}

/*---------------------------------------------------------------------------*/

VOID *mymalloc(unsigned int size) {
    return malloc((unsigned) size);
}

/*---------------------------------------------------------------------------*/

void myfree(VOID *ptr) {
    if (hashstrings != NULL  &&  (char *) ptr >= hashstrings
	&&  (char *) ptr <= hashstrings + hashheader.stringsize)
	return;                        /* Can't free stuff in hashstrings */
    free(ptr);
}

/*---------------------------------------------------------------------------*/

#ifdef REGEX_LOOKUP

/** 
 * check the hashed dictionary for words matching the regex. return
 * the a matching string if found else return NULL 
 *
 * expr - regular expression to use in the match
 * whence - 0 = start at the beg with new regx, else
 *          continue from cur point w/ old regex
 */
char *do_regex_lookup(char *expr, int whence) {
    static struct dent *curent;
    static int          curindex;
    static struct dent *curpersent;
    static int          curpersindex;
    static char *       cmp_expr;
    char                dummy[INPUTWORDLEN + MAXAFFIXLEN];
    ichar_t *           is;

    if (whence == 0) {
	is = strtosichar(expr, 0);
	upcase (is);
	expr = ichartosstr(is, 1);
	cmp_expr = REGCMP(expr);
	curent = hashtbl;
	curindex = 0;
	curpersent = pershtab;
	curpersindex = 0;
    }

    /* search the dictionary until the word is found or the words run out */
    for (  ; curindex < hashsize;  curent++, curindex++) {
	if (curent->word != NULL
	    &&  REGEX (cmp_expr, curent->word, dummy) != NULL) {
	    curindex++;
	    /* Everybody's gotta write a wierd expression once in a while! */
	    return curent++->word;
	}
    }
    /* Try the personal dictionary too */
    for (  ; curpersindex < pershsize;  curpersent++, curpersindex++) {
	if ((curpersent->flagfield & USED) != 0
	    &&  curpersent->word != NULL
	    &&  REGEX(cmp_expr, curpersent->word, dummy) != NULL) {
	    curpersindex++;
	    /* Everybody's gotta write a wierd expression once in a while! */
	    return curpersent++->word;
	}
    }
    return NULL;
}
#endif /* REGEX_LOOKUP */