The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package gma.util;

/**
 * <p>Title: </p>
 * <p>Description: InputFileHandler is a utility class for reading inputs from file.</p>
 * <p>Copyright: Copyright (C) 2004 I. Dan Melamed</p>
 * <p>Company: Department of Computer Science, New York University</p>
 * @author Ali Argyle
 */


import java.io.ByteArrayInputStream;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.Iterator;


import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class ByteParser {

  private FileInputStream reader; //buffered reader, reads text from a character input stream
  private int nextByte = 0; //next read in line
  private List byteList;
  private ArrayList wordList;
  private ArrayList positionList;
  private ArrayList dictFrom;
  private ArrayList dictTo;    

  /**
   * Constructor.
   * @param fileName              name of the input file
   */
  public ByteParser(List inList) {
      byteList = inList; 
  }


  /**
   * Get the string representation
   * @return string
   */
  public String listToString() {
      StringBuffer outString = new StringBuffer();
      Iterator li = byteList.iterator();      
      while (li.hasNext()) {
	  outString.append( (char)((Integer)li.next()).intValue() );
      }
      return outString.toString();
  }

  /**
   * How big is the current byteList
   * 
   */
  public int getSize() {
      int ls = byteList.size();
      return ls;
  }

  /**
   * Parse the dictionary line in current byteList
   * @return  list of bytes 
   */
  public List parseDictionaryLine() {
      
      List langList1 = new ArrayList();
      List langList2 = new ArrayList();
      List outList = new ArrayList();
      int start = -1;
      int end = -1;
      boolean preSpace = true;

      for (int i = 0; i < byteList.size(); i++) {
	  int c = ((Integer)byteList.get(i)).intValue();
	  //System.err.println("c = " + c);
	  if ( (c==62) && (i>1) ) {  
	      /* check one backward */
	      //System.err.println("--------------> got a 62");
	      int cminus1 = ((Integer)byteList.get(i-1)).intValue();
	      if (cminus1==60) {
		  //System.err.println("--------------> got a 60");
		  /* found an instance of <> */
		  start = i-1;
		  end = i;
	      }
	  }
      }
      if ((start == -1) || (end == -1)) {
	  /* seperator not found */
	  System.err.println("The input file is not in the correct translation lexicon format.");
	  System.exit(1);
      }

      for (int i = 0; i < byteList.size(); i++) {
	  if (i < start) {
	      /* put in langList1 */
	      int c = ((Integer)byteList.get(i)).intValue();
	      langList1.add(new Integer(c));
	  } else if (i > end) {
	      /* put in langList2 */
	      int c = ((Integer)byteList.get(i)).intValue();
	      langList2.add(new Integer(c));
	  }
      }
      if (langList1.size() != 0) {
	  outList.add( strip(langList1) );
      }
      if (langList2.size() != 0) {
	  outList.add( strip(langList2) );
      }

      return outList;
  }

  public List strip(List inList) {
      List outList = new ArrayList();
      int start = 0;
      int end = inList.size();

      // don't let the list start or end with a space
      while ( ((Integer)inList.get(start)).intValue() == 32) {
	  start = start + 1;
	  // catch the case where there is no corresponding entry
	  if (start == end) {
	      System.err.println("The lexicon contains an invalid entry");
	      return outList;
	  }
      }
      while ( ((Integer)inList.get(end-1)).intValue() == 32) {
	  end = end -1;
      }
	      
      for (int i=start; i< end; i++) 

	  {
	      outList.add(  (Integer)inList.get(i)  );
	  }
      return outList;
  }
  /**
   * Parse the axis line in current byteList
   * @return  list of bytes 
   */
  public List parseAxisLine() {
      List positionList = new ArrayList();
      List wordList = new ArrayList();
      List outList = new ArrayList();
      boolean preSpace = true;

      for (int i = 0; i < byteList.size(); i++) {
	  int c = ((Integer)byteList.get(i)).intValue();

	    
	  if ((c == 32) && (preSpace == true)) { /* 32 is the space character */
	      if (positionList.size() != 0) {
		  preSpace = false;
	      }
	  } else {       
	      if (preSpace == true) {
		  positionList.add(new Integer(c));
	      }
	      else {
		  wordList.add(new Integer(c));
	      }
	  }
      }
      if (positionList.size() != 0) {
	  outList.add( strip(positionList) );
      }
      if (wordList.size() != 0) {
	  outList.add( strip (wordList) );
      }
      /* System.err.println("positionList " + positionList); */
      /* System.err.println("wordList " + wordList); */
      return outList;
  }

  
}