The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package gma.simr;

/**
 * <p>Title: </p>
 * <p>Description: SIMR is the driver class for simr algorithm.</p>
 * <p>Copyright: Copyright (C) 2004 I. Dan Melamed</p>
 * <p>Company: Department of Computer Science, New York University</p>
 * @author Luke Shen
 */

import gma.AxisTick;
import gma.BitextSpace;
import gma.MapPoint;
import gma.util.InputFileHandler;
import gma.util.OutputFileHandler;

import java.io.BufferedInputStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.IOException;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.StringTokenizer;
import java.text.*;

public class SIMR {

  //constants for command line arguments and property file
  public static final String DASH = "-";
  public static final String PLUS = "+";
  public static final String SPACE = " ";
  public static final String SIMR = "simr";
  public static final String VERBOSE = "verbose";
  public static final String PROPERTIES = "properties";
  public static final String X_AXIS_FILE = "xAxisFile";
  public static final String Y_AXIS_FILE = "yAxisFile";
  public static final String OUTPUT_FILE = "outputFile";
  public static final String DEF_OUT = "/tmp/temp.simr";
  
  public boolean debug  = false;
  public String level  = "1"; // some output by default 
  public Integer verbose = new Integer(1);
  private Properties properties = new Properties(); //properties

  private BitextSpace bitextSpace = null; //bitext space

  private boolean isDone = false; //true when finish searching the whole bitext space
  
  /**
   * Constructor.
   * @param args                  command line arguments
   */
  public SIMR(String[] args) {
    try {
      parseArguments(args);
    } catch (IllegalArgumentException e) {
      printUsage();
      System.exit(1);
    }
  }

  /**
   * Constructor.
   * @param properties            properties
   */
  public SIMR(Properties properties) {
    this.properties = properties;
    level = properties.getProperty(VERBOSE,"1"); 
    verbose = new Integer(level);
  }

  /**
   * Prints command usage.
   */
  private void printUsage() {
    StringBuffer buffer = new StringBuffer("Usage: java gma.simr.SIMR [arguments]\n\n");
    buffer.append("where [arguments] are:\n\n");
    buffer.append(formArgumentUsage(PROPERTIES, true, "./SIMR.properties"));
    buffer.append(formArgumentUsage(X_AXIS_FILE, false, "./french.txt"));
    buffer.append(formArgumentUsage(Y_AXIS_FILE, false, "./english.txt"));
    buffer.append(formArgumentUsage(SIMR + "." + OUTPUT_FILE, false, "./simrOutput.txt"));
    System.err.println(buffer.toString());
  }

  /**
   * Forms command line argument usage.
   * @param argument              command line argument
   * @param isRequired            true for required argument
   * @param example               example command line argument
   * @return                      command line argument usage
   */
  private String formArgumentUsage(String argument, boolean isRequired, String example) {
    StringBuffer buffer = new StringBuffer();
    buffer.append("\t").append(DASH).append(argument).append(SPACE).append(argument).append("\n");
    if (isRequired) {
      buffer.append("\t").append("required argument; ");
    } else {
      buffer.append("\t").append("optional argument; ");
    }
    buffer.append("e.g., ").append(DASH).append(argument).append(SPACE).append(example).append("\n\n");
    return buffer.toString();
  }

  /**
   * Recursively reads in property files
   */
  private void readProperties(String fileName) {
      InputStream in = null;
      try {
	  in = new BufferedInputStream(new FileInputStream(fileName));
	  properties.load(in);
	  BufferedReader reader = new BufferedReader(new FileReader(fileName));
	  String inc = new String("#INCLUDE");
	  String line;
	  while ((line = reader.readLine()) != null) {
	      if( (!line.equals("")) && (line.startsWith(inc)) ) {
		  String[] tok = line.split(" +");
		  InputStream in2 = null;
		  readProperties(tok[1]);
	      }
	  }
      } catch (FileNotFoundException e) {
	  e.printStackTrace();
	  System.exit(1);
      } catch (IOException e) {
          e.printStackTrace();
          System.exit(1);
      } finally {
          if (in != null) {
	      try {
		  in.close();
	      } catch (IOException e) {
		  e.printStackTrace();
	      }
          }
      }
  
  }

  /**
   * Parses command line arguments.
   * @param args                              command line arguments
   * @throws IllegalArgumentException
   */
  private void parseArguments(String[] args) throws IllegalArgumentException {
    boolean gotProp = false;  
    boolean gotOut = false;

    if ((args.length % 2) != 0) {
      throw new IllegalArgumentException("The number of arguments must be even.");
    }

    for (int index = 0; index < args.length; index++) {

      if (args[index].equals(DASH + PROPERTIES)) {
	  readProperties(args[++index]);
	  gotProp = true;
      } else if (args[index].equals(DASH + X_AXIS_FILE)) {
        properties.put(X_AXIS_FILE, args[++index]);

      } else if (args[index].equals(DASH + Y_AXIS_FILE)) {
        properties.put(Y_AXIS_FILE, args[++index]);

      } else if (args[index].equals(DASH + VERBOSE)) {
        properties.put(VERBOSE, args[++index]);
	level = properties.getProperty(VERBOSE);
	verbose = new Integer(level);


      } else if (args[index].equals(DASH + SIMR + "." + OUTPUT_FILE)) {
        properties.put(SIMR + "." + OUTPUT_FILE, args[++index]);
	gotOut = true;
	System.err.println("out file = " + properties.getProperty(SIMR + "." + OUTPUT_FILE));

      } else {
        throw new IllegalArgumentException(args[index] + "is an invalid argument.");
      }
    }
   if (gotProp == false) {
       throw new IllegalArgumentException("Property file must be specified at the command line.");
   }
   if (gotOut == false) {
       properties.put(SIMR + "." + OUTPUT_FILE, DEF_OUT);
   }
  }


  /**
   * Generates bitext correspondence.
   * @return                  sortedset of bitext correspondence
   */
  public SortedSet generateBitextCorrespondence() {
    bitextSpace = new BitextSpace(properties);
    /** @todo load anchor file */
    bitextSpace.generateAxes(); /* reading in the axis files */
    bitextSpace.updateSlopeProperty();
    SortedSet mappingChains = generateMappingChains();
    // the printChain method will write the chains as they are before
    // disambiguation to a file named [simr output file].debug
    if (debug) {printChain(mappingChains);}
    List disambiguatedChains = disambiguateChains(mappingChains);
    return sortMapPoints(disambiguatedChains);
    /** @todo extra pass for those gaps caused by inversed mapping */
  }

  /**
   * Prints out a list of chains for debugging purposes
   * @param mappingChains            sorted set of map points of bitext correspondence
   */
  public void printChain(SortedSet mappingChains) {

      String outfile = properties.getProperty(SIMR + "." + OUTPUT_FILE);
      OutputFileHandler outdebug = new OutputFileHandler(outfile + ".debug");

    //********************************************************************************
    Iterator it = mappingChains.iterator();

    // Added by Ali to print out these mapping Chains before disambiguation starts
    
    while (it.hasNext()) {
	MappingChain itChain = (MappingChain)it.next();
	List newList = ((MappingChain)itChain.clone()).sort(itChain);
	
	Iterator lit = newList.iterator();
	outdebug.write(", " + lit.next() + "\n");
	while (lit.hasNext()) {
	    outdebug.write(lit.next()+"\n");
	}
    }

    //********************************************************************************
    outdebug.close();

  }


  /**
   * Generates mapping chains.
   * @return                      sorted set of mapping chains.
   */
  private SortedSet generateMappingChains() {
    if (verbose.intValue() >= 1 ) { System.err.println("Generating mapping chains"); }
    SortedSet mappingChains = new TreeSet();
    SearchRectangle searchRectangle = new SearchRectangle(properties);

    searchRectangle.expandSearchRectangle(bitextSpace.getAxisTick(0, true), true); //expand xAxis

    searchRectangle.expandSearchRectangle(bitextSpace.getAxisTick(0, false), false); //expand yAxis

    while (!isDone) {
      MappingChain mappingChain = searchMappingChains(searchRectangle);
      if (mappingChain == null) {
        break;
      }
      mappingChains.add(mappingChain);
      //System.err.println("CHAIN --> " + mappingChain);
      searchRectangle = searchRectangle.reduceSearchRectangle();
    }


    return mappingChains;
  }

  /**
   * Searches for mapping chains.
   * @param searchRectangle                 search rectangle
   * @return                                mapping chain
   */
  private MappingChain searchMappingChains(SearchRectangle searchRectangle) {
      AxisTick xTick = searchRectangle.getBoundaryAxisTick(true, false);
      AxisTick yTick = searchRectangle.getBoundaryAxisTick(false, false);
      int xoffset = searchRectangle.getNumInstances(true, xTick);
      int yoffset = searchRectangle.getNumInstances(false, yTick);


    Iterator xTickIterator = bitextSpace.getAxisIterator(searchRectangle.getBoundaryAxisTick(true, false), true, xoffset-1);
    Iterator yTickIterator = bitextSpace.getAxisIterator(searchRectangle.getBoundaryAxisTick(false, false), false, yoffset-1);

    boolean first = true;
    while (xTickIterator.hasNext()) {
      if (first || searchRectangle.hasNewPoint()) {
	  first = false;
	  if (searchRectangle.hasMappingChain()) {
	     return searchRectangle.getBestChain();
	  }

      }
      searchRectangle.expandSearchRectangle((AxisTick)xTickIterator.next(), true);
      while (yTickIterator.hasNext()) {
        if (searchRectangle.canExpandYAxis()) {
	    searchRectangle.expandSearchRectangle((AxisTick)yTickIterator.next(), false);
        } else {
          break;
        }
      }
    }

    // case changed by Ali get output to match Perl output, no longer mandating
    // a new point, will leave it this way (old line is commented out below)
    // if (searchRectangle.hasNewPoint() && searchRectangle.hasMappingChain()) {
    if (searchRectangle.hasMappingChain()) {
      return searchRectangle.getBestChain();
    }
    isDone = true;
    //we are here when no mappingChain is found
    return null;
  }

  /**
   * Removes conflictual chains.
   * @param mappingChains           sorted set of mapping chains
   * @return                        list of non-conflictual mapping chains
   */
  private List disambiguateChains(SortedSet mappingChains) {

    //separate non-conflictual mapping chains from conflictual mapping chains
    List nonConflictChains = new ArrayList();
    Map conflictWithChains = new HashMap();

    Iterator outerIterator = mappingChains.iterator();
    while (outerIterator.hasNext()) {
      List conflictChains = new ArrayList();
      MappingChain outerMappingChain = (MappingChain)outerIterator.next();
      
      Iterator innerIterator = mappingChains.iterator();
      while (innerIterator.hasNext()) {
        MappingChain innerMappingChain = (MappingChain)innerIterator.next();

        if (outerMappingChain == innerMappingChain) {
          //only check up to the outer mapping chain
          break;

        } else if (outerMappingChain.getEndMapPoint(true, false).isMaxMapPoint(innerMappingChain.getEndMapPoint(true, true), true) == -1
        && outerMappingChain.getEndMapPoint(false, false).isMaxMapPoint(innerMappingChain.getEndMapPoint(false, true), false) == -1) {
          //if the max index of outerMappingChain is smaller than the min index of innerMappingChain,
          //there is no need to continue comparing because the mappingChains are sorted by min xAxisTick index
          break;

        } else if (outerMappingChain.getEndMapPoint(true, true).isMaxMapPoint(innerMappingChain.getEndMapPoint(true, false), true) == 1
		   && outerMappingChain.getEndMapPoint(false, true).isMaxMapPoint(innerMappingChain.getEndMapPoint(false, false), false) == 1) {
          //if the min index of outerMappingChain is greater than the max index of innerMappingChain,
          //move on to the next one because there is no overlapping part between there two chains
	  continue;

        } else {
	  // do the overlapping parts match?
          if (outerMappingChain.isConflict(innerMappingChain)) {
            conflictChains.add(innerMappingChain);

            List reverseConflictChains = null;
            if ( !conflictWithChains.containsKey(innerMappingChain) ) {
              reverseConflictChains = new ArrayList();
            } else {
              reverseConflictChains = (List)conflictWithChains.get(innerMappingChain);
            }

            reverseConflictChains.add(outerMappingChain);
            conflictWithChains.put(innerMappingChain, reverseConflictChains);
          }
	}
      }//end of inner while loop

      if (!conflictChains.isEmpty()) {
        conflictWithChains.put(outerMappingChain, conflictChains);
      }
    }//end of outer while loop
    

    Iterator nonConflictIterator = mappingChains.iterator();
    while (nonConflictIterator.hasNext()) {
      MappingChain mappingChain = (MappingChain)nonConflictIterator.next();
      if ( !conflictWithChains.containsKey(mappingChain) ) {
        nonConflictChains.add(mappingChain);
      }
    }

    //remove most conflictual chain one at a time
    while (!conflictWithChains.isEmpty()) {

      int maxConflictCounter = 0;
      MappingChain mostConflictualChain = null;
      Set keys = conflictWithChains.keySet();
      Iterator iterator = keys.iterator();
      while (iterator.hasNext()) {

        MappingChain keyChain = (MappingChain)iterator.next();	
        List conflictChains = (List)conflictWithChains.get(keyChain);


        if ((mostConflictualChain == null) || (conflictChains.size() > maxConflictCounter)
        || ((conflictChains.size() == maxConflictCounter) && (mostConflictualChain.getSumSquareError() < keyChain.getSumSquareError()))) {

          maxConflictCounter = conflictChains.size();
          mostConflictualChain = keyChain;
	}
        
      } //end of inner while loop
      
      List conflictChains = (List)conflictWithChains.get(mostConflictualChain);
      Iterator conflictIterator = conflictChains.iterator();
      
      // clean up the reverse mapping of the conflicts 
      while (conflictIterator.hasNext()) {
        MappingChain conflictChain = (MappingChain)conflictIterator.next();
        List reversedConflictChains = (List)conflictWithChains.get(conflictChain);
        reversedConflictChains.remove(mostConflictualChain);

        if (reversedConflictChains.isEmpty()) {
          conflictIterator.remove();
          nonConflictChains.add(conflictChain);
          conflictWithChains.remove(conflictChain);
        }
      }
      
      conflictWithChains.remove(mostConflictualChain);
    }

    return nonConflictChains;
    }

  /**
   * Sorts map points from the non-conflictual mapping chains and removes duplicate map points
   * @param bestChains                list of non-conflictual mapping chains
   * @return                          sorted set of map points
   */
  private SortedSet sortMapPoints(List bestChains) {
    SortedSet mapPoints = new TreeSet();
    Iterator iterator = bestChains.iterator();
    while (iterator.hasNext()) {
      MappingChain mappingChain = (MappingChain)iterator.next();
      for (int index = 0; index < mappingChain.getChainSize(); index++) {
        MapPoint mapPoint = mappingChain.getMapPoint(index);
	//System.err.println("mapPoint " + mapPoint);
        mapPoints.add(mapPoint);
      }
    }
    return mapPoints;
  }


  /**
   * Adds the intro and terminus in to the map
   * @param bestPoints            sorted set of map points of bitext correspondence
   */ 
  public String getTerminus(SortedSet bestPoints){
    InputFileHandler xInput = new InputFileHandler(properties.getProperty(X_AXIS_FILE));
    String xLastLine = "null";
    while (xInput.hasLine()) {
      String line = xInput.nextLine();
      xLastLine = line;
    }	
    StringTokenizer xTokenizer = new StringTokenizer(xLastLine);
    float xEndPosition = Float.parseFloat(xTokenizer.nextToken());

    InputFileHandler yInput = new InputFileHandler(properties.getProperty(Y_AXIS_FILE));
    String yLastLine = "null";
    while (yInput.hasLine()) {
      String line = yInput.nextLine();
      yLastLine = line;
    }	
    StringTokenizer yTokenizer = new StringTokenizer(yLastLine);
    float yEndPosition = Float.parseFloat(yTokenizer.nextToken());
    String terminus = (xEndPosition + " " + yEndPosition);
    // does this terminus already exist in bestPoints?
    //System.err.println("mapPoint terminus >" + terminus + "<");
    //System.err.println("mapPoint last >" + bestPoints.last() + "<");
    //String last = mp.getXAxisTick

    return terminus;
  }
  /**
   * Prints out map points of bitext correspondence.
   * @param bestPoints            sorted set of map points of bitext correspondence
   */
  public void printMapPoints(SortedSet bestPoints) {
    OutputFileHandler out = new OutputFileHandler(properties.getProperty(SIMR + "." + OUTPUT_FILE));
    Iterator iterator = bestPoints.iterator();
    out.write ("0.0 0.0\n");
    String lastLine = getTerminus(bestPoints);
    if (iterator.hasNext()) {
	String first = (((MapPoint)iterator.next()).toString());
	if (!(first.equals("0.0 0.0"))) {
	    out.write(first + "\n");
	}
	while (iterator.hasNext()) {
	    out.write(((MapPoint)iterator.next()).toString() + "\n");
	}
	MapPoint mp = (MapPoint)bestPoints.last();
	// they are equal
	if (!(lastLine.equals(mp.toString())) ) {
	    out.write(lastLine);
	    out.write("\n");
	}
    } else {
	out.write(lastLine + "\n");
    }
    out.close();
    // Now also write to stdout if no output file was specified
    if ((properties.getProperty(SIMR + "." + OUTPUT_FILE)) == DEF_OUT) {
	try {
	    BufferedReader out2screen = new BufferedReader(new InputStreamReader(new FileInputStream(DEF_OUT)));

	    String line = "";
	    while((line = out2screen.readLine()) != null) {
		System.out.println(line);
	    }
	    out2screen.close();
	} catch (IOException e) {
	    e.printStackTrace();
	    System.exit(1);
	}
    }
  }

  /**
   * Main method.
   * @param args                        command line arguments
   */
  public static void main (String[] args) {
    SIMR simr = new SIMR(args);
    long start = System.currentTimeMillis();
    SortedSet mapPoints = simr.generateBitextCorrespondence();
    simr.printMapPoints(mapPoints);
    long end = System.currentTimeMillis();
    long elapsed = end - start; //time in msecs
    //System.err.println("SIMR took " + DecimalFormat.getInstance().format(elapsed/1000.) + " seconds."); 
    System.exit(0);
  }
}