The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
#-*-perl-*-
#
# OPTIONS
#
#  -c align-file ..... aligned treebank in stockholm treealigner format
#  -a nrAlign ........ max number of test sentences (default = 100)
#  -s strategy ....... alignment search strategy (default = greedy)
#  -m learner ........ classifier model to be used (default=megam)
#  -C ................ enable linked-children feature
#  -S ................ enable linked-subtree-nodes feature
#  -P ................ enable linked-parent feature
#  -D ................ enable link distance feature (parent-current)
#  -x threshold ...... score threshold for aligning
#  -M dir ............ Moses data dir (giza align + lexfiles)
#  -o model-file ..... name of the model file
#
#

use strict;
use FindBin;
use lib $FindBin::Bin.'/../lib';

use vars qw($opt_a $opt_s $opt_c $opt_m $opt_o
	    $opt_C $opt_S $opt_P $opt_D $opt_x $opt_M);
use Getopt::Std;

getopts('f:a:s:c:m:SCPDkx:M:o:');


use Lingua::Align::Trees;

my $nrAlign = $opt_a || 100;
my $search = $opt_s || 'greedy';
my $model = $opt_m || 'megam';
my $modelfile = $opt_o || 'treealign.'.$model;

my $MosesDir = $opt_M || 'moses-sophie';
my $algfile = $opt_c || 'Alignments_SMULTRON_Sophies_World_SV_EN.xml';


my $treealigner = new Lingua::Align::Trees(

    -classifier => $model,                # classifier used

    -same_types_only => 1,                # link only T&T and nonT&nonT
#    -nonterminals_only => 1,              # link non-terminals only
#    -terminals_only => 1,                 # link terminals only
    -skip_unary => 1,                     # skip nodes with unary productions

    -linked_children => $opt_C,                # add first-order dependency
                                          # (proportion of linked children)
    -linked_subtree => $opt_S,                # add first-order dependency
    -linked_parent => $opt_P,
    -linked_parent_distance => $opt_D,

    -lexe2f => $MosesDir.'/model/lex.0-0.e2f',
    -lexf2e => $MosesDir.'/model/lex.0-0.f2e',

    ## for the GIZA++ word alignment features
    -gizaA3_e2f => $MosesDir.'/giza.src-trg/src-trg.A3.final.gz',
    -gizaA3_f2e => $MosesDir.'/giza.trg-src/trg-src.A3.final.gz',

    ## for the Moses word alignment features
    -moses_align => $MosesDir.'/model/aligned.intersect',

    -lex_lower => 1,                      # always convert to lower case!

#    -output_format => 'dublin',          # Dublin format (default = sta)
    -min_score => $opt_x,                    # classification score threshold
    -verbose => 1,

    );


# corpus to be used for training (and testing)

my %corpus = (
    -alignfile => $algfile,
    -type => 'STA');

#-------------------------------------------------------------------
$treealigner->align(\%corpus,$modelfile,$search,$nrAlign);