The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* -*- Mode: C; c-file-style: "stroustrup" -*- */

/**
 * @file
 * @brief routines to manage the higher-level aspects of spell-checking
 *
 * Copyright 1983, by Pace Willisson
 * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
 * Copyright 1994-2006 Ulisses Pinto & José João Almeida & Alberto Simões
 *                     Projecto Natura, Universidade do Minho
 */

#include <string.h>

#include <ctype.h>
#include "jsconfig.h"
#include "jspell.h"
#include "proto.h"
#include "msgs.h"
#include "version.h"
#include "good.h"

void checkfile(void);
int  casecmp(char *a, char *b, int canonical);
void makepossibilities(ichar_t *word);
int  compoundgood(ichar_t *word);
int  ins_root_cap(ichar_t *word, ichar_t *pattern, int prestrip, int preadd, 
                  int sufstrip, int sufadd, struct dent *firstdent, 
                  struct flagent *pfxent, struct flagent *sufent);
void askmode(void);


/*---------------------------------------------------------------------------*/

void printhelp(register FILE *helpout)        /* File to write help to */
{
    fprintf(helpout, CORR_C_HELP_1);
    fprintf(helpout, CORR_C_HELP_2);
    fprintf(helpout, CORR_C_HELP_3);
    fprintf(helpout, CORR_C_HELP_4);
    fprintf(helpout, CORR_C_HELP_5);
    fprintf(helpout, CORR_C_HELP_6);
    fprintf(helpout, CORR_C_HELP_7);
    fprintf(helpout, CORR_C_HELP_8);
    fprintf(helpout, CORR_C_HELP_9);

    fprintf(helpout, CORR_C_HELP_COMMANDS);

    fprintf(helpout, CORR_C_HELP_R_CMD);
    fprintf(helpout, CORR_C_HELP_E_CMD);
    fprintf(helpout, CORR_C_HELP_BLANK);
    fprintf(helpout, CORR_C_HELP_A_CMD);
    fprintf(helpout, CORR_C_HELP_I_CMD);
    fprintf(helpout, CORR_C_HELP_U_CMD);
    fprintf(helpout, CORR_C_HELP_0_CMD);
    fprintf(helpout, CORR_C_HELP_L_CMD);
    fprintf(helpout, CORR_C_HELP_X_CMD);
    fprintf(helpout, CORR_C_HELP_Q_CMD);
    fprintf(helpout, CORR_C_HELP_BANG);
    fprintf(helpout, CORR_C_HELP_REDRAW);
    fprintf(helpout, CORR_C_HELP_SUSPEND);
    fprintf(helpout, CORR_C_HELP_HELP);
}

/*---------------------------------------------------------------------------*/
/* checkfile                                                                 */
/*---------------------------------------------------------------------------*/

static void init_zero_contextbufs()
{
    int bufno;

    for (bufno = 0;  bufno < contextsize;  bufno++)
        contextbufs[bufno][0] = '\0';
}

/*---------------------------------------------------------------------------*/

static void move_contextbufs_down()
{
    int bufno;

    for (bufno = contextsize;  --bufno > 0;  )
        strcpy(contextbufs[bufno], contextbufs[bufno - 1]);
}

/*---------------------------------------------------------------------------*/

void checkfile(void)
{
    int bufsize, ch;

    init_zero_contextbufs();

    for (  ;  ;  ) {
        move_contextbufs_down();
        if (quit) {       /* quit can't be set in l mode */
            while (fgets(contextbufs[0], sizeof contextbufs[0], infile) != NULL)
                fputs(contextbufs[0], outfile);
            break;
        }
        /*
         * Only read in enough characters to fill half this buffer so that any
         * corrections we make are not likely to cause an overflow.
         */
        if (fgets(contextbufs[0], (sizeof contextbufs[0]) / 2, infile) == NULL)
            break;
        /*
         * If we didn't read to end-of-line, we may have ended the
         * buffer in the middle of a word.  So keep reading until we
         * see some sort of character that can't possibly be part of a
         * word. (or until the buffer is full, which fortunately isn't
         * all that likely).
         */
        bufsize = strlen(contextbufs[0]);
        if (bufsize == (sizeof contextbufs[0]) / 2 - 1) {
            ch = (unsigned char) contextbufs[0][bufsize - 1];
            while (bufsize < sizeof contextbufs[0] - 1
                   && (iswordch((ichar_t) ch)  ||  isboundarych((ichar_t) ch)
                       || isstringstart(ch)))  {
                ch = getc(infile);
                if (ch == EOF)
                    break;
                contextbufs[0][bufsize++] = (char) ch;
                contextbufs[0][bufsize] = '\0';
            }
        }
        checkline(outfile);
    }
}

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

static int posscmp(char *a, char *b)
{
    return casecmp(a, b, 0);
}

/*---------------------------------------------------------------------------*/

int casecmp(char *a, char *b, int canonical)
/* int canonical - NZ for canonical string chars */
{
    register ichar_t *ap, *bp;
    ichar_t inta[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
    ichar_t intb[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];

    strtoichar(inta, a, sizeof inta, canonical);
    strtoichar(intb, b, sizeof intb, canonical);
    for (ap = inta, bp = intb;  *ap != 0;  ap++, bp++) {
        if (*ap != *bp) {
            if (*bp == '\0')
                return hashheader.sortorder[*ap];
            else if (mylower(*ap)) {
                if (mylower(*bp)  ||  mytoupper(*ap) != *bp)
                    return (int) hashheader.sortorder[*ap]
                        - (int) hashheader.sortorder[*bp];
            }
            else {
                if (myupper(*bp)  ||  mytolower(*ap) != *bp)
                    return (int) hashheader.sortorder[*ap]
                        - (int) hashheader.sortorder[*bp];
            }
        }
      }
    if (*bp != '\0')
      return -(int) hashheader.sortorder[*bp];
    for (ap = inta, bp = intb;  *ap;  ap++, bp++) {
        if (*ap != *bp) {
            return (int) hashheader.sortorder[*ap]
                - (int) hashheader.sortorder[*bp];
        }
    }
    return 0;
}

/*---------------------------------------------------------------------------*/
/* makepossibilities                                                         */
/*---------------------------------------------------------------------------*/

static int insert(register ichar_t *word)
{
    register int   i;
    register char *realword;

    realword = ichartosstr(word, 0);
    for (i = 0; i < pcount; i++) {
        if (strcmp(possibilities[i], realword) == 0)
            return 0;
    }

    strcpy(possibilities[pcount++], realword);
    i = strlen(realword);
    if (i > maxposslen)
        maxposslen = i;
    if (pcount >= MAXPOSSIBLE)
        return -1;
    else
        return 0;
}

/*---------------------------------------------------------------------------*/

/* Insert one or more correctly capitalized versions of word */
static int ins_cap(ichar_t *word, ichar_t *pattern)
{
    int prestrip, preadd, sufstrip, sufadd, hitno;

    if (*word == 0)
        return 0;

    for (hitno = numhits;  --hitno >= 0;  ) {
        if (hits[hitno].prefix) {
            prestrip = hits[hitno].prefix->stripl;
            preadd = hits[hitno].prefix->affl;
        }
        else
            prestrip = preadd = 0;
        if (hits[hitno].suffix) {
            sufstrip = hits[hitno].suffix->stripl;
            sufadd = hits[hitno].suffix->affl;
        }
        else
            sufadd = sufstrip = 0;
        if (ins_root_cap(word, pattern, prestrip, preadd, sufstrip, sufadd,
                         hits[hitno].dictent, hits[hitno].prefix, hits[hitno].suffix)
            < 0)
            return -1;
    }
    return 0;
}

/*---------------------------------------------------------------------------*/

#ifndef NO_CAPITALIZATION_SUPPORT
static void wrongcapital(register ichar_t *word)
{
    ichar_t newword[MAXWLEN];

    /* When the third parameter to "good" is nonzero, it ignores case.
       If the word matches this way, "ins_cap" will recapitalize it correctly.
    */
    if (bgood(word, 0, 1, 1)) {
        icharcpy(newword, word);
        upcase(newword);
        ins_cap(newword, word);
    }
}
#endif

/*---------------------------------------------------------------------------*/

static void wrongletter(register ichar_t *word)
{
    register int i, j, n;
    ichar_t savechar;
    ichar_t newword[MAXWLEN];
    
    n = icharlen(word);
    icharcpy(newword, word);
#ifndef NO_CAPITALIZATION_SUPPORT
    upcase(newword);
#endif
    
    for (i = 0; i < n; i++) {
        savechar = newword[i];
        for (j = 0; j < Trynum; ++j) {
            if (Try[j] == savechar)
                continue;
            newword[i] = Try[j];
            if (bgood(newword, 0, 1, 1)) {
                if (ins_cap(newword, word) < 0)
                    return;
            }
        }
        newword[i] = savechar;
    }
}

/*---------------------------------------------------------------------------*/

static void extraletter(register ichar_t *word)
{
    ichar_t newword[MAXWLEN];
    register ichar_t *p, *r;
    
    if (icharlen(word) < 2)
        return;
    
    icharcpy(newword, word + 1);
    for (p = word, r = newword;  *p != 0;  ) {
        if (bgood(newword, 0, 1, 1)) {
            if (ins_cap(newword, word) < 0)
                return;
        }
        *r++ = *p++;
    }
}

/*---------------------------------------------------------------------------*/

static void missingletter(ichar_t *word)
{
    ichar_t newword[MAXWLEN + 1];
    register ichar_t *p, *r;
    register int      i;

    icharcpy(newword + 1, word);
    for (p = word, r = newword;  *p != 0; ) {   /* for each char. in the word */
        for (i = 0;  i < Trynum;  i++) {         /* try all possible chars */
            *r = Try[i];
            if (bgood(newword, 0, 1, 1)) {  /* the new word exists in dictionary */
                if (ins_cap(newword, word) < 0)
                    return;
            }
        }
        *r++ = *p++;
    }
    for (i = 0;  i < Trynum;  i++) {
        *r = Try[i];
        if (bgood(newword, 0, 1, 1)) {
            if (ins_cap(newword, word) < 0)
                return;
        }
    }
}

/*---------------------------------------------------------------------------*/

static void missingspace(ichar_t *word)
{
   ichar_t newword[MAXWLEN + 1];
   register ichar_t *p, savech;

   /*
   ** We don't do words of length less than 3;  this keeps us from
   ** splitting all two-letter words into two single letters.
   ** Also, we just duplicate the existing capitalizations, rather
   ** than try to reconstruct both, which would require a smarter
   ** version of ins_cap.
   */
   if (word[0] == 0  ||  word[1] == 0  ||  word[2] == 0)
      return;
   icharcpy(newword, word);
   for (p = newword + 1;  *p != 0;  p++) {
      savech = *p;
      *p = 0;
      if (bgood(newword, 0, 1, 1)) {  /* left word correct */
         *p = savech;
         if (bgood(p, 0, 1, 1)) {     /* right word correct */
            *p = ' ';
            icharcpy(p + 1, word + (p - newword));
            if (insert(newword) < 0)
               return;
            *p = '-';
            if (insert(newword) < 0)
               return;
            icharcpy(p, word + (p - newword));
         }
      }
      *p = savech;
   }
}

/*---------------------------------------------------------------------------*/

static void transposedletter(register ichar_t *word)
{
   ichar_t newword[MAXWLEN];
   register ichar_t *p, temp;

   icharcpy(newword, word);
   for (p = newword;  p[1] != 0;  p++) {
      temp = *p;
      *p = p[1];
      p[1] = temp;
      if (bgood(newword, 0, 1, 1)) {
         if (ins_cap(newword, word) < 0)
            return;
      }
      temp = *p;
      *p = p[1];
      p[1] = temp;
   }
}

/*---------------------------------------------------------------------------*/

static void tryveryhard(ichar_t *word)
{
   bgood(word, 1, 0, 1);   /* the second parameter is 1 to ignoreflagbits */
}

/*---------------------------------------------------------------------------*/

void makepossibilities(register ichar_t *word)
{
    register int i;

    for (i = 0; i < MAXPOSSIBLE; i++)
        possibilities[i][0] = 0;
    pcount = 0;
    maxposslen = 0;
    easypossibilities = 0;   /* number of possiblities using 4 usual errors */
    my_poss_count = 0;

#ifndef NO_CAPITALIZATION_SUPPORT
    wrongcapital(word);
#endif

    /*
     * according to Pollock and Zamora, CACM April 1984 (V. 27, No. 4),
     * page 363, the correct order for this is:
     * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION
     * thus, it was exactly backwards in the old version. -- PWP
     */
    if (!yflag) {  /* not supressing typing errors */
        if (pcount < MAXPOSSIBLE)
            missingletter(word);               /* omission */
        if (pcount < MAXPOSSIBLE)
            transposedletter(word);       /* transposition */
        if (pcount < MAXPOSSIBLE)
            extraletter(word);                /* insertion */
        if (pcount < MAXPOSSIBLE)
            wrongletter(word);             /* substitution */

        if (missingspaceflag  &&  pcount < MAXPOSSIBLE && !aflag)
            missingspace(word);        /* two words */
    }
    easypossibilities = pcount;

    if (tryhardflag)
        tryveryhard(word);

    if ((sortit  || (pcount > easypossibilities))  &&  pcount) {
        if (easypossibilities > 0  &&  sortit)
            qsort((char *) possibilities, (unsigned) easypossibilities,
                  sizeof(possibilities[0]),
                  (int (*) (const void *, const void *)) posscmp);
        if (pcount > easypossibilities)
            qsort((char *) &possibilities[easypossibilities][0],
                  (unsigned) (pcount - easypossibilities), sizeof (possibilities[0]),
                  (int (*) (const void *, const void *)) posscmp);
    }
}

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

int compoundgood(ichar_t *word)
{
   ichar_t newword[MAXWLEN];
   register ichar_t *p, savech;
   long secondcap;        /* Capitalization of 2nd half */

   /*
   ** If missingspaceflag is set, compound words are never ok.
   */
   if (missingspaceflag)
       return 0;
   /*
   ** Test for a possible compound word (for languages like German that
   ** form lots of compounds).
   **
   ** This is similar to missingspace, except we quit on the first hit,
   ** and we won't allow either member of the compound to be a single
   ** letter.
   **
   ** We don't do words of length less than 2 * compoundmin, since
   ** both halves must at least compoundmin letters.
   */
   if (icharlen(word) < 2 * hashheader.compoundmin)
       return 0;
   icharcpy(newword, word);
   p = newword + hashheader.compoundmin;
   for (  ;  p[hashheader.compoundmin - 1] != 0;  p++) {
      savech = *p;
      *p = 0;
      if (bgood(newword, 0, 0, 0)) {
         *p = savech;
         if (bgood(p, 0, 1, 0)) {      /* Accept any case variant in 2nd */
            secondcap = whatcap(p);
            switch (whatcap(newword)) {
               case ANYCASE:
               case CAPITALIZED:
               case FOLLOWCASE:        /* Followcase can have l.c. suffix */
                   return secondcap == ANYCASE;
               case ALLCAPS:
                   return secondcap == ALLCAPS;
            }
         }
      }
      else
         *p = savech;
   }
   return 0;
}

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

/* ARGSUSED */
int ins_root_cap(ichar_t *word, ichar_t *pattern,
                 int prestrip, int preadd, int sufstrip, int sufadd,
                 struct dent *firstdent,
                 struct flagent *pfxent, struct flagent *sufent)
{
#ifndef NO_CAPITALIZATION_SUPPORT
   register struct dent * dent;
#endif /* NO_CAPITALIZATION_SUPPORT */
   int     firstisupper;
   ichar_t newword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
#ifndef NO_CAPITALIZATION_SUPPORT
   register ichar_t *p;
   int len, i, limit;
#endif /* NO_CAPITALIZATION_SUPPORT */

   icharcpy(newword, word);
   firstisupper = myupper(pattern[0]);
#ifdef NO_CAPITALIZATION_SUPPORT
   /*
   ** Apply the old, simple-minded capitalization rules.
   */
   if (firstisupper) {
      if (myupper(pattern[1]))
         upcase(newword);
      else {
         lowcase(newword);
         newword[0] = mytoupper(newword[0]);
      }
   }
   else
      lowcase(newword);
   return insert(newword);
#else /* NO_CAPITALIZATION_SUPPORT */
#define flagsareok(dent)    \
    ((pfxent == NULL ||  TSTMASKBIT(dent->mask, pfxent->flagbit)) \
      &&  (sufent == NULL ||  TSTMASKBIT(dent->mask, sufent->flagbit)))

    dent = firstdent;
    if ((dent->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS) {
       upcase(newword);        /* Uppercase required */
       return insert(newword);
    }
    for (p = pattern;  *p;  p++) {
       if (mylower(*p))
          break;
    }
    if (*p == 0) {
       upcase(newword);        /* Pattern was all caps */
       return insert(newword);
    }
    for (p = pattern + 1;  *p;  p++) {
       if (myupper(*p))
          break;
    }
    if (*p == 0) {
       /*
       ** The pattern was all-lower or capitalized.  If that's
       ** legal, insert only that version.
       */
       if (firstisupper) {
          if (captype(dent->flagfield) == CAPITALIZED
              ||  captype(dent->flagfield) == ANYCASE) {
             lowcase(newword);
             newword[0] = mytoupper(newword[0]);
             return insert(newword);
          }
        }
        else {
           if (captype(dent->flagfield) == ANYCASE) {
              lowcase(newword);
              return insert(newword);
           }
        }
        while (dent->flagfield & MOREVARIANTS) {
           dent = dent->next;
           if (captype(dent->flagfield) == FOLLOWCASE
               ||  !flagsareok(dent))
              continue;
           if (firstisupper) {
              if (captype(dent->flagfield) == CAPITALIZED) {
                 lowcase(newword);
                 newword[0] = mytoupper(newword[0]);
                 return insert(newword);
              }
           }
           else {
              if (captype (dent->flagfield) == ANYCASE) {
                 lowcase(newword);
                 return insert(newword);
              }
           }
       }
   }
    /*
    ** Either the sample had complex capitalization, or the simple
    ** capitalizations (all-lower or capitalized) are illegal.
    ** Insert all legal capitalizations, including those that are
    ** all-lower or capitalized.  If the prototype is capitalized,
    ** capitalized all-lower samples.  Watch out for affixes.
    */
    dent = firstdent;
    p = strtosichar(dent->word, 1);
    len = icharlen(p);
    if (dent->flagfield & MOREVARIANTS)
       dent = dent->next;        /* Skip place-holder entry */
    for (  ;  ;  ) {
       if (flagsareok(dent)) {
          if (captype(dent->flagfield) != FOLLOWCASE) {
             lowcase(newword);
             if (firstisupper  ||  captype(dent->flagfield) == CAPITALIZED)
                newword[0] = mytoupper(newword[0]);
             if (insert(newword) < 0)
                return -1;
          }
          else {
             /* Followcase is the tough one. */
             p = strtosichar(dent->word, 1);
             bcopy((char *) (p + prestrip),  (char *) (newword + preadd),
                   (len - prestrip - sufstrip) * sizeof(ichar_t));
             if (myupper(p[prestrip])) {
                for (i = 0;  i < preadd;  i++)
                   newword[i] = mytoupper(newword[i]);
             }
             else {
                for (i = 0;  i < preadd;  i++)
                   newword[i] = mytolower(newword[i]);
             }
             limit = len + preadd + sufadd - prestrip - sufstrip;
             i = len + preadd - prestrip - sufstrip;
             p += len - sufstrip - 1;
             if (myupper(*p)) {
                for (p = newword + i;  i < limit;  i++, p++)
                   *p = mytoupper(*p);
             }
             else {
                for (p = newword + i;  i < limit;  i++, p++)
                   *p = mytolower(*p);
             }
             if (insert(newword) < 0)
                return -1;
          }
      }
      if ((dent->flagfield & MOREVARIANTS) == 0)
         break;                /* End of the line */
      dent = dent->next;
   }
   return 0;
#endif /* NO_CAPITALIZATION_SUPPORT */
}




static void save_pers_dic()
{
    /* this is also part of the library */
    treeoutput();
    math_mode = 0;
    LaTeX_Mode = 'P';
}

/*---------------------------------------------------------------------------*/

/*
 *
 */
static void init_modes(char *strg) {
    int i;
    
    for (i = 0; i < strlen(strg); i++) {
	switch(strg[i]) {
	case 'g':
	    gflag = 1;
	    break;
	case 'G': /* default */
	    gflag = 0;
	    break;    
	case 'P':
	    tryhardflag = 0;
	    break;
	case 'm':
	    tryhardflag = 1;
	    break;
	case 'y':
	    yflag = 1;
	    break;
	case 'Y': /* default */
	    yflag = 0;
	    break;
	case 'z':
	    showflags = 1;
	    break;
	case 'Z': /* default */
	    showflags = 0;
	    break; 
	}
    }
}

/*---------------------------------------------------------------------------*/

void askmode()
{
   register char *cp1, *cp2;
   ichar_t *itok;                /* Ichar version of current word */

   if (fflag) {
      if (freopen(askfilename, "w", stdout) == NULL) {
         fprintf(stderr, CANT_CREATE, askfilename);
         exit(1);
      }
   }

   printf("%s\n", Version_ID[0]);

   while (fflush(stdout),
      xgets(contextbufs[0], sizeof contextbufs[0], stdin) != NULL) {
      /*
      ** *line is like `i', 
      ** @line is like `a', 
      ** &line is like 'u'
      ** $... init options do Ulisses
      ** $"... jj
      ** `#' is like `Q' (writes personal dictionary)
      ** `+' sets tflag, 
      ** `-' clears tflag
      ** `!' sets terse mode, 
      ** `%' clears terse
      ** `~' followed by a filename sets parameters according to file name
      ** `^' causes rest of line to be checked after stripping 1st char
      */
      if (contextbufs[0][0] == '*'  ||  contextbufs[0][0] == '@')
         treeinsert(ichartosstr(strtosichar(contextbufs[0] + 1, 0), 1),
                    ICHARTOSSTR_SIZE, contextbufs[0][0] == '*');
      else if (contextbufs[0][0] == '&') {
             itok = strtosichar(contextbufs[0] + 1, 0);
             lowcase(itok);
             treeinsert(ichartosstr(itok, 1), ICHARTOSSTR_SIZE, 1);
         }
      else if (contextbufs[0][0] == '#' && contextbufs[0][1] == '#')
              save_pers_dic();
      else if (contextbufs[0][0] == '#')   /* JJoao 2002 */
              printf("%s\n\n",contextbufs[0]);
      else if (contextbufs[0][0] == '!')
          terse = 1;
      else if (contextbufs[0][0] == '%')
          terse = 0;
      else if (contextbufs[0][0] == '+' || contextbufs[0][0] == '-') {
          math_mode = 0;
          LaTeX_Mode = 'P';
          tflag = (contextbufs[0][0] == '+');
          prefstringchar =
            findfiletype(tflag ? "tex" : "nroff", 1, (int *) NULL);
          if (prefstringchar < 0)
             prefstringchar = 0;
          defdupchar = prefstringchar;
      }
      else if (contextbufs[0][0] == '~') {
          defdupchar = findfiletype(&contextbufs[0][1], 1, &tflag);
          if (defdupchar < 0)
             defdupchar = 0;
      }
      else if (contextbufs[0][0] == '$' && contextbufs[0][1] == '"') {
          jjflags(contextbufs[0] + 2);
      }
      else if (contextbufs[0][0] == '$') {
          init_modes(contextbufs[0] + 1);
      }
      else {
         if (contextbufs[0][0] == '^') {
            /* Strip off leading uparrow */
            for (cp1 = contextbufs[0], cp2 = contextbufs[0] + 1;
                 (*cp1++ = *cp2++) != '\0'; )
              ;
         }
         checkline(stdout);
      }
   }
}



/**
 * @brief Copy/ignore "cnt" number of characters pointed to by *cc.
 * 
 */
void copyout(register char **cc, register int  cnt)
{
    while (--cnt >= 0) {
        if (**cc == '\0')
            break;
        if (!aflag && !lflag)
            putc(**cc, outfile);
        (*cc)++;
    }
}