The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* -*- Mode: C; c-file-style: "stroustrup" -*- */

/* NATools - Package with parallel corpora tools
 * Copyright (C) 1998-2001  Djoerd Hiemstra
 * Copyright (C) 2002-2012  Alberto Simões
 *
 * This package is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */


/**
 * @file
 * @brief Post processing unit to output a binary dictionary
 */

#include <stdlib.h>
#include <stdio.h>
#include "standard.h"
#include <NATools/words.h>
#include "tempdict.h"
#include "dictionary.h"
#include "partials.h"

/**
 * @brief Lower threshold to add in the dictionary
 *
 * @todo Fix-me. I think I'm wrong.
 */
#define MINTOT 0.0f

static void setEntry(Dictionary* dic, nat_uint32_t word, 
		     Words *WL,
		     nat_uint32_t c[MAXENTRY], float *f, nat_uint32_t max, float total)
{
    nat_uint32_t j;
    float v;

    for(j = 0; j < max; j++) {
	v = f[j]/total;
	if (c[j] && v > 0.005f) {
	    dic = dictionary_set_id(dic, word, j, c[j]);
	    dic = dictionary_set_val(dic, word, j, v);
	}
    }
}

static void saveDicts(char *f1, char* f2, struct cMat2 *M, 
		      Words *WL1, Words *WL2,
		      PartialCounts *partials1, 
		      PartialCounts *partials2)
{
    float f[MAXENTRY], total;
    nat_uint32_t r, N, c[MAXENTRY];
    nat_uint32_t index;
    // unused -- WordLstNode *node;
    Dictionary* dic;
    
    N = words_size(WL1);

    dic = dictionary_new(N);

    index = 2;
    for (r = 2; r <= partials1->last; r++) {   
	total = tempdict_getrowmax2(M, index, c, f, MAXENTRY);
	if (total > (float) MINTOT) setEntry(dic, index, WL2, c, f, MAXENTRY, total);

	/* TO CHANGE */
	/* node = words_get_full_by_id(W1, index); */
	dic = dictionary_set_occ(dic, index, partials1->buffer[index]);

	index++;
    }
    total = tempdict_getrowmax2(M, 1, c, f, MAXENTRY);
    setEntry(dic, 1, WL2, c, f, MAXENTRY, total); /*  */

    if (!dictionary_save(dic, f1))
	report_error("Cannot save dictionary 2");

    dictionary_free(dic);


    /*  */

    /* SECOND DICTIONARY */

    N = words_size(WL2);

    dic = dictionary_new(N);

    index = 2;
    for(r = 2; r <= partials2->last; r++) {   
	total = tempdict_getcolumnmax2(M, index, c, f, MAXENTRY);
	if (total > (float) MINTOT) setEntry(dic, index, WL1, c, f, MAXENTRY, total);

	/* TO CHANGE */
	/* node = words_get_full_by_id(W2, index); */
	dic = dictionary_set_occ(dic, index, partials2->buffer[index]);
	index++;
    }   
    total = tempdict_getcolumnmax2(M, 1, c, f, MAXENTRY);   
    setEntry(dic, 1, WL1, c, f, MAXENTRY, total); /*  */

    if (!dictionary_save(dic, f2))
	report_error("Cannot save dictionary 2");

    dictionary_free(dic);
}

/**
 * @brief Main program
 *
 * @todo Document this
 */
int main(int argc, char **argv)
{
    struct cMat2 Dictionary;
    Words *Words1, *Words2;
    PartialCounts *partials1 = NULL, *partials2 = NULL;

    if (argc != 8)
	report_error("Usage: post dictfile partials1 partials2 lexiconfile1 lexiconfile2 out1 out2");
    
    printf("\n");
    printf("Loading %s...\n", argv[1]);
    if (tempdict_loadmatrix2(&Dictionary, argv[1])) 
	report_error("load dictionary");

    printf("Loading %s...\n", argv[2]);
    partials1 = PartialCountsLoad(argv[2]);
    if (!partials1) report_error("load partials 1");

    printf("Loading %s...\n", argv[3]);
    partials2 = PartialCountsLoad(argv[3]);
    if (!partials2) report_error("load partials 2");

    printf("Loading %s...\n", argv[4]);
    if (!(Words1 = words_load(argv[4]))) report_error("load strings 1");

    printf("Loading %s...\n", argv[5]);
    if (!(Words2 = words_load(argv[5]))) report_error("load strings 2");

    printf("Writing dictionaries %s,%s...\n\n", argv[6], argv[7]);
    saveDicts(argv[6], argv[7], &Dictionary, 
	      Words1, Words2, 
	      partials1, partials2);

    printf("Freeing data structures\n");
    tempdict_freematrix2(&Dictionary);
    words_free(Words1);
    words_free(Words2);
    
    return 0;
}