/*
* tree.c - a hash style dictionary for user's personal words
*
* Pace Willisson, 1983
* Hash support added by Geoff Kuenning, 1987
*
* Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
* All rights reserved.
*/
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include "jsconfig.h"
#include "jspell.h"
#include "proto.h"
#include "msgs.h"
void treeinit(char *p, char *LibDict);
static FILE *trydict(char *dictname, char *home, char *prefix, char *suffix);
static void treeload(FILE * dictf);
void treeinsert(char * word, int wordlen, int keep);
static struct dent *tinsert(struct dent *proto, hash_info *dic);
struct dent *treelookup(register ichar_t *word, hash_info *dic);
#if SORTPERSONAL != 0
static int pdictcmp(struct dent ** enta, struct dent **entb);
#endif /* SORTPERSONAL != 0 */
void treeoutput(void);
VOID * mymalloc(unsigned int size);
void myfree(VOID * ptr);
#ifdef REGEX_LOOKUP
char * do_regex_lookup(char * expr, int whence);
#endif /* REGEX_LOOKUP */
/*
* Hash table sizes. Prime is probably a good idea, though in truth I
* whipped the algorithm up on the spot rather than looking it up, so
* who knows what's really best? If we overflow the table, we just
* use a double-and-add-1 algorithm.
*/
static int goodsizes[] =
{
53, 223, 907, 3631
};
static char personaldict[MAXPATHLEN];
static FILE *dictf;
static int newwords = 0;
static int repl_inited = 0;
/*---------------------------------------------------------------------------*/
void init_dic(hash_info *dic)
{
dic->cantexpand = 0;
dic->hsize = 0;
dic->hcount = 0;
}
/*
* p - Value specified in -p switch
* LibDict - Root of default dict name
*/
void treeinit(char *p, char *LibDict) {
int abspath; /* NZ if p is abs path name */
char *h; /* Home directory name */
char seconddict[MAXPATHLEN]; /* Name of secondary dict */
FILE *secondf; /* Access to second dict file */
init_dic(&pers);
/*
** If -p was not specified, try to get a default name from the
** environment. After this point, if p is null, the the value in
** personaldict is the only possible name for the personal dictionary.
** If p is non-null, then there is a possibility that we should
** prepend HOME to get the correct dictionary name.
*/
if (p == NULL)
p = getenv(PDICTVAR);
/*
** if p exists and begins with '/' we don't really need HOME,
** but it's not very likely that HOME isn't set anyway.
*/
if ((h = getenv("HOME")) == NULL)
return;
if (p == NULL) {
/*
* No -p and no PDICTVAR. We will use LibDict and DEFPAFF to
* figure out the name of the personal dictionary and where it
* is. The rules are as follows:
*
* (1) If there is a local dictionary and a HOME dictionary,
* both are loaded, but changes are saved in the local one.
* The dictionary to save changes in is named "personaldict".
* (2) Dictionaries named after the affix file take precedence
* over dictionaries with the default suffix (DEFPAFF).
* (3) Dictionaries named with the new default names
* (DEFPDICT/DEFPAFF) take precedence over the old ones
* (OLDPDICT/OLDPAFF).
* (4) Dictionaries aren't combined unless they follow the same
* naming scheme.
* (5) If no dictionary can be found, a new one is created in
* the home directory, named after DEFPDICT and the affix
* file.
*/
dictf = trydict(personaldict, (char *) NULL, DEFPDICT, LibDict);
secondf = trydict(seconddict, h, DEFPDICT, LibDict);
if (dictf == NULL && secondf == NULL) {
dictf = trydict(personaldict, (char *) NULL, DEFPDICT, DEFPAFF);
secondf = trydict(seconddict, h, DEFPDICT, DEFPAFF);
}
if (dictf == NULL && secondf == NULL) {
dictf = trydict(personaldict, (char *) NULL, OLDPDICT, LibDict);
secondf = trydict(seconddict, h, OLDPDICT, LibDict);
}
if (dictf == NULL && secondf == NULL) {
dictf = trydict(personaldict, (char *) NULL, OLDPDICT, OLDPAFF);
secondf = trydict(seconddict, h, OLDPDICT, OLDPAFF);
}
if (personaldict[0] == '\0') {
if (seconddict[0] != '\0')
strcpy(personaldict, seconddict);
else
sprintf(personaldict, "%s/%s%s", h, DEFPDICT, LibDict);
}
if (dictf != NULL) {
treeload(dictf);
fclose(dictf);
}
if (secondf != NULL) {
treeload(secondf);
fclose(secondf);
}
}
else {
/*
** Figure out if p is an absolute path name. Note that beginning
** with "./" and "../" is considered an absolute path, since this
** still means we can't prepend HOME.
*/
abspath = (*p == '/' || strncmp(p, "./", 2) == 0
|| strncmp(p, "../", 3) == 0);
if (abspath) {
strcpy(personaldict, p);
if ((dictf = fopen(personaldict, "r")) != NULL) {
treeload(dictf);
fclose(dictf);
}
}
else {
/*
** The user gave us a relative pathname. We will try it
** locally, and if that doesn't work, we'll try the home
** directory. If neither exists, it will be created in
** the home directory if words are added.
*/
strcpy(personaldict, p);
if ((dictf = fopen(personaldict, "r")) != NULL) {
treeload(dictf);
fclose(dictf);
}
else if (!abspath) {
/* Try the home */
sprintf(personaldict, "%s/%s", h, p);
if ((dictf = fopen(personaldict, "r")) != NULL) {
treeload(dictf);
fclose(dictf);
}
}
/*
* If dictf is null, we couldn't open the dictionary
* specified in the -p switch. Complain.
*/
if (dictf == NULL) {
fprintf(stderr, CANT_OPEN, p);
perror("");
return;
}
}
}
if (!lflag && !aflag
&& access(personaldict, 2) < 0 && errno != ENOENT) {
fprintf(stderr, TREE_C_CANT_UPDATE, personaldict);
sleep((unsigned) 2);
}
}
/*---------------------------------------------------------------------------*/
/*
* Try to open a dictionary. As a side effect, leaves the dictionary
* name in "filename" if one is found, and leaves a null string there
* otherwise.
*/
static FILE *trydict(
char *filename, /* Where to store the file name */
char *home, /* Home directory */
char *prefix, /* Prefix for dictionary */
char *suffix) /* Suffix for dictionary */
{
FILE *dictf; /* Access to dictionary file */
if (home == NULL)
sprintf(filename, "%s%s", prefix, suffix);
else
sprintf(filename, "%s/%s%s", home, prefix, suffix);
dictf = fopen(filename, "r");
if (dictf == NULL)
filename[0] = '\0';
return dictf;
}
/*---------------------------------------------------------------------------*/
static void treeload(register FILE *loadfile /* File to load words from */)
{
char buf[BUFSIZ]; /* Buffer for reading pers dict */
while (fgets(buf, sizeof buf, loadfile) != NULL)
treeinsert(buf, sizeof buf, 1);
newwords = 0;
}
/*---------------------------------------------------------------------------*/
void treat_overflow(hash_info *dic, int oldhsize, struct dent *oldhtab)
{
fprintf(stderr, TREE_C_NO_SPACE);
/*
* Try to continue anyway, since our overflow
* algorithm can handle an overfull (100%+) table,
* and the malloc very likely failed because we
* already have such a huge table, so small mallocs
* for overflow entries will still work.
*/
if (oldhtab == NULL)
exit(1); /* No old table, can't go on */
fprintf(stderr, TREE_C_TRY_ANYWAY);
dic->cantexpand = 1; /* Suppress further messages */
dic->hsize = oldhsize; /* Put things back */
dic->htab = oldhtab; /* ... */
newwords = 1; /* And pretend it worked */
}
/*---------------------------------------------------------------------------*/
/* Re-insert old entries into new table */
void reinsert_old_entries(hash_info *dic, int oldhsize, struct dent *oldhtab)
{
int i;
register struct dent * dp;
struct dent *olddp;
#ifndef NO_CAPITALIZATION_SUPPORT
struct dent *newdp;
int isvariant;
#endif
for (i = 0; i < oldhsize; i++) {
dp = &oldhtab[i];
if (dp->flagfield & USED) {
#ifdef NO_CAPITALIZATION_SUPPORT
tinsert(dp, dic);
#else
newdp = tinsert(dp, dic);
isvariant = (dp->flagfield & MOREVARIANTS);
#endif
dp = dp->next;
#ifdef NO_CAPITALIZATION_SUPPORT
while (dp != NULL) {
tinsert(dp, dic);
olddp = dp;
dp = dp->next;
free((char *) olddp);
}
#else
while (dp != NULL) {
if (isvariant) {
isvariant = dp->flagfield & MOREVARIANTS;
olddp = newdp->next;
newdp->next = dp;
newdp = dp;
dp = dp->next;
newdp->next = olddp;
}
else {
isvariant = dp->flagfield & MOREVARIANTS;
newdp = tinsert(dp, dic);
olddp = dp;
dp = dp->next;
free((char *) olddp);
}
}
#endif
}
}
if (oldhtab != NULL)
free((char *) oldhtab);
}
/*---------------------------------------------------------------------------*/
void new_hsize(hash_info *dic)
{
int i;
for (i = 0; i < sizeof goodsizes / sizeof(goodsizes[0]); i++) {
if (goodsizes[i] > dic->hsize)
break;
}
if (i >= sizeof goodsizes / sizeof goodsizes[0])
dic->hsize += dic->hsize + 1;
else
dic->hsize = goodsizes[i];
}
/*---------------------------------------------------------------------------*/
void new_hash_space(hash_info *dic)
{
/* register int i; */
struct dent *oldhtab;
int oldhsize;
/*
* Expand hash table when it is MAXPCT % full.
*/
if (!dic->cantexpand && (dic->hcount * 100) / MAXPCT >= dic->hsize) {
oldhsize = dic->hsize;
oldhtab = dic->htab;
new_hsize(dic);
dic->htab =
(struct dent *) calloc((unsigned) dic->hsize, sizeof(struct dent));
if (dic->htab == NULL)
treat_overflow(dic, oldhsize, oldhtab);
else
reinsert_old_entries(dic, oldhsize, oldhtab);
}
}
/*---------------------------------------------------------------------------*/
void ins_repl_all(char *word, char *st_repl)
{
struct dent wordent;
char ent[255];
if (repl_inited == 0) {init_dic(&repl); repl_inited = 1;}
new_hash_space(&repl);
sprintf(ent, "%s/%s", word, st_repl);
/* printf("DEB-Vou inserir %s\n\r", ent); */
if (makedent(ent, ICHARTOSSTR_SIZE, &wordent) < 0) {
putchar(7);
return; /* Word must be too big or something */
}
tinsert(&wordent, &repl);
}
/*---------------------------------------------------------------------------*/
void treeinsert(char *word, /* Word to insert - must be canonical */
int wordlen, /* Length of the word buffer */
int keep)
{
struct dent wordent;
register struct dent * dp;
ichar_t nword[INPUTWORDLEN + MAXAFFIXLEN];
new_hash_space(&pers);
/*
** We're ready to do the insertion. Start by creating a sample
** entry for the word.
*/
if (makedent(word, wordlen, &wordent) < 0)
return; /* Word must be too big or something */
if (keep)
wordent.flagfield |= KEEP;
/*
** Now see if word or a variant is already in the table. We use the
** capitalized version so we'll find the header, if any.
**/
strtoichar(nword, word, sizeof nword, 1);
upcase(nword);
if ((dp = lookup(nword, 1)) != NULL) {
/* It exists. Combine caps and set the keep flag. */
if (combinecaps(dp, &wordent) < 0) {
free(wordent.word);
return;
}
}
else {
/* It's new. Insert the word. */
dp = tinsert(&wordent, &pers);
#ifndef NO_CAPITALIZATION_SUPPORT
if (captype(dp->flagfield) == FOLLOWCASE)
addvheader(dp);
#endif
}
newwords |= keep;
}
/*---------------------------------------------------------------------------*/
static struct dent *tinsert(struct dent *proto, /* Prototype entry to copy */
hash_info *dic)
{
ichar_t iword[INPUTWORDLEN + MAXAFFIXLEN];
register int hcode;
register struct dent *hp; /* Next trial entry in hash table */
register struct dent *php; /* Prev. value of hp, for chaining */
if (strtoichar(iword, proto->word, sizeof iword, 1)) {
fprintf(stderr, WORD_TOO_LONG (proto->word));
return NULL;
}
#ifdef NO_CAPITALIZATION_SUPPORT
upcase(iword);
#endif
hcode = hash(iword, dic->hsize);
php = NULL;
hp = &(dic->htab[hcode]);
if (hp->flagfield & USED) {
while (hp != NULL) {
php = hp;
hp = hp->next;
}
hp = (struct dent *) calloc(1, sizeof(struct dent));
if (hp == NULL) {
fprintf(stderr, TREE_C_NO_SPACE);
exit(1);
}
}
*hp = *proto;
if (php != NULL)
php->next = hp;
hp->next = NULL;
return hp;
}
/**
* search in dic dictionary
*/
struct dent *treelookup(register ichar_t *word, hash_info *dic) {
register int hcode;
register struct dent * hp;
char chword[INPUTWORDLEN + MAXAFFIXLEN];
if (dic->hsize <= 0) return NULL;
ichartostr(chword, word, sizeof chword, 1);
hcode = hash(word, dic->hsize);
hp = &(dic->htab[hcode]);
while (hp != NULL && (hp->flagfield & USED)) {
/* printf("DEB- hp->word=%s\n", hp->word); */
if (strcmp(chword, hp->word) == 0 && (!(hp->saw) || !saw_mode)) { /* found */
if (saw_mode) hp->saw = 1;
break;
}
#ifndef NO_CAPITALIZATION_SUPPORT
while (hp->flagfield & MOREVARIANTS)
hp = hp->next;
#endif
hp = hp->next;
}
if (hp != NULL && (hp->flagfield & USED))
return hp;
else
return NULL;
}
/**
* put saw off in personal dictionary
*/
void tree_saw_off(register ichar_t *word) {
register int hcode;
register struct dent * hp;
char chword[INPUTWORDLEN + MAXAFFIXLEN];
if (pers.hsize <= 0) return;
ichartostr(chword, word, sizeof chword, 1);
hcode = hash(word, pers.hsize);
hp = &(pers.htab[hcode]);
while (hp != NULL && (hp->flagfield & USED)) {
if (strcmp(chword, hp->word) == 0) /* found */
hp->saw = 0;
#ifndef NO_CAPITALIZATION_SUPPORT
while (hp->flagfield & MOREVARIANTS)
hp = hp->next;
#endif
hp = hp->next;
}
}
/*---------------------------------------------------------------------------*/
#if SORTPERSONAL != 0
/* Comparison routine for sorting the personal dictionary with qsort */
static int pdictcmp(struct dent **enta, struct dent **entb) {
/* The parentheses around *enta / *entb below are NECESSARY!
* Otherwise the compiler reads it as *(enta->word), or enta->word[0],
* which is illegal (but gcc takes it and produces wrong code).
*/
return casecmp((*enta)->word, (*entb)->word, 1);
}
#endif
/*---------------------------------------------------------------------------*/
void treeoutput(void) {
register struct dent *cent; /* Current entry */
register struct dent *lent; /* Linked entry */
#if SORTPERSONAL != 0
int pdictsize; /* Number of entries to write */
struct dent **sortlist; /* List of entries to be sorted */
register struct dent **sortptr; /* Handy pointer into sortlist */
#endif
register struct dent *ehtab; /* End of pershtab, for fast looping */
if (newwords == 0)
return;
if ((dictf = fopen(personaldict, "w")) == NULL) {
fprintf(stderr, CANT_CREATE, personaldict);
return;
}
#if SORTPERSONAL != 0
/*
* If we are going to sort the personal dictionary, we must know
* how many items are going to be sorted.
*/
pdictsize = 0;
if (pers.hcount >= SORTPERSONAL)
sortlist = NULL;
else {
for (cent = pers.htab, ehtab = pers.htab + pers.hsize;
cent < ehtab; cent++) {
for (lent = cent; lent != NULL; lent = lent->next) {
if ((lent->flagfield & (USED | KEEP)) == (USED | KEEP))
pdictsize++;
#ifndef NO_CAPITALIZATION_SUPPORT
while (lent->flagfield & MOREVARIANTS)
lent = lent->next;
#endif
}
}
for (cent = hashtbl, ehtab = hashtbl + hashsize;
cent < ehtab; cent++) {
if ((cent->flagfield & (USED | KEEP)) == (USED | KEEP)) {
/*
** We only want to count variant headers
** and standalone entries. These happen
** to share the characteristics in the
** test below. This test will appear
** several more times in this routine.
*/
#ifndef NO_CAPITALIZATION_SUPPORT
if (captype (cent->flagfield) != FOLLOWCASE
&& cent->word != NULL)
#endif
pdictsize++;
}
}
sortlist = (struct dent **) malloc(pdictsize * sizeof(struct dent));
}
if (sortlist == NULL) {
#endif
for (cent = pers.htab, ehtab = pers.htab + pers.hsize;
cent < ehtab; cent++) {
for (lent = cent; lent != NULL; lent = lent->next) {
if ((lent->flagfield & (USED | KEEP)) == (USED | KEEP)) {
toutent(dictf, lent, 1);
#ifndef NO_CAPITALIZATION_SUPPORT
while (lent->flagfield & MOREVARIANTS)
lent = lent->next;
#endif
}
}
}
for (cent = hashtbl, ehtab = hashtbl + hashsize;
cent < ehtab; cent++) {
if ((cent->flagfield & (USED | KEEP)) == (USED | KEEP)) {
#ifndef NO_CAPITALIZATION_SUPPORT
if (captype(cent->flagfield) != FOLLOWCASE
&& cent->word != NULL)
#endif
toutent(dictf, cent, 1);
}
}
#if SORTPERSONAL != 0
return;
}
/*
* Produce dictionary in sorted order. We used to do this
* destructively, but that turns out to fail because in some modes
* the dictionary is written more than once. So we build an
* auxiliary pointer table (in sortlist) and sort that. This is
* faster anyway, though it uses more memory.
*/
sortptr = sortlist;
for (cent = pers.htab, ehtab = pers.htab + pers.hsize; cent < ehtab; cent++) {
for (lent = cent; lent != NULL; lent = lent->next) {
if ((lent->flagfield & (USED | KEEP)) == (USED | KEEP)) {
*sortptr++ = lent;
#ifndef NO_CAPITALIZATION_SUPPORT
while (lent->flagfield & MOREVARIANTS)
lent = lent->next;
#endif
}
}
}
for (cent = hashtbl, ehtab = hashtbl + hashsize; cent < ehtab; cent++) {
if ((cent->flagfield & (USED | KEEP)) == (USED | KEEP)) {
#ifndef NO_CAPITALIZATION_SUPPORT
if (captype(cent->flagfield) != FOLLOWCASE && cent->word != NULL)
#endif
*sortptr++ = cent;
}
}
/* Sort the list */
qsort((char *) sortlist, (unsigned) pdictsize, sizeof(sortlist[0]),
(int (*) (const void *, const void *)) pdictcmp);
/* Write it out */
for (sortptr = sortlist; --pdictsize >= 0; )
toutent(dictf, *sortptr++, 1);
free((char *) sortlist);
#endif
newwords = 0;
fclose(dictf);
}
/*---------------------------------------------------------------------------*/
VOID *mymalloc(unsigned int size) {
return malloc((unsigned) size);
}
/*---------------------------------------------------------------------------*/
void myfree(VOID *ptr) {
if (hashstrings != NULL && (char *) ptr >= hashstrings
&& (char *) ptr <= hashstrings + hashheader.stringsize)
return; /* Can't free stuff in hashstrings */
free(ptr);
}
/*---------------------------------------------------------------------------*/
#ifdef REGEX_LOOKUP
/**
* check the hashed dictionary for words matching the regex. return
* the a matching string if found else return NULL
*
* expr - regular expression to use in the match
* whence - 0 = start at the beg with new regx, else
* continue from cur point w/ old regex
*/
char *do_regex_lookup(char *expr, int whence) {
static struct dent *curent;
static int curindex;
static struct dent *curpersent;
static int curpersindex;
static char * cmp_expr;
char dummy[INPUTWORDLEN + MAXAFFIXLEN];
ichar_t * is;
if (whence == 0) {
is = strtosichar(expr, 0);
upcase (is);
expr = ichartosstr(is, 1);
cmp_expr = REGCMP(expr);
curent = hashtbl;
curindex = 0;
curpersent = pershtab;
curpersindex = 0;
}
/* search the dictionary until the word is found or the words run out */
for ( ; curindex < hashsize; curent++, curindex++) {
if (curent->word != NULL
&& REGEX (cmp_expr, curent->word, dummy) != NULL) {
curindex++;
/* Everybody's gotta write a wierd expression once in a while! */
return curent++->word;
}
}
/* Try the personal dictionary too */
for ( ; curpersindex < pershsize; curpersent++, curpersindex++) {
if ((curpersent->flagfield & USED) != 0
&& curpersent->word != NULL
&& REGEX(cmp_expr, curpersent->word, dummy) != NULL) {
curpersindex++;
/* Everybody's gotta write a wierd expression once in a while! */
return curpersent++->word;
}
}
return NULL;
}
#endif /* REGEX_LOOKUP */