#!/usr/bin/env perl
# -*-perl-*-
#
# wordalign.pl:
#
#---------------------------------------------------------------------------
# Copyright (C) 2004 Jörg Tiedemann <joerg@stp.ling.uu.se>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#---------------------------------------------------------------------------
#
# $Id$
#
#
# information from the configfile will override other parameters
# (e.g. parameters [-in ...] and [-out ...] are discarded
# if input/output files are given in the configfile)
# default parameters are given in the &GetDefaultIni subfunction
# at the end of the script!
#
use strict;
BEGIN{
use FindBin qw($Bin);
use lib "$Bin/../lib";
if (not defined $ENV{UPLUGHOME}){
$ENV{UPLUGHOME}=$Bin.'/..';
}
}
# use Time::HiRes qw(time);
use Uplug::Data::Align;
use Uplug::IO::Any;
use Uplug::Config;
use Uplug::Align::Word;
use Uplug::Align::Word::Clue;
use Uplug::Align::Word::UWA;
use Cwd;
## global variable for socket handles
use vars qw/*SOCKET/;
## server mode: listen on port 1201 for requests
## read wordalign arguments and run word alignment
## write result and log info to socket
if (grep($_ eq '-server',@ARGV)){
use IO::Socket;
my $request_sock = new IO::Socket::INET (
LocalHost => 'localhost',
LocalPort => '1201',
Proto => 'tcp',
Listen => 1,
Reuse => 1,
);
die "Could not create socket: $!\n" unless $request_sock;
while (*SOCKET = $request_sock->accept()){
my $request = <SOCKET>;
*STDERR=*SOCKET;
*STDOUT=*SOCKET;
# print STDERR "req: $request";
my @argv = split(/\s+/,$request);
&wordalign(@argv);
# print SOCKET "done!\n";
close(*SOCKET);
}
close($request_sock);
}
## standard mode: just run the word alignment with given parameters
else{
&wordalign(@ARGV);
}
sub wordalign{
my @argv = @_;
#---------------------------------------------------------------------------
my $TmpCnt=0;
my %IniData=&GetDefaultIni;
my $IniFile='wordalign.ini';
&CheckParameter(\%IniData,\@argv,$IniFile);
my $PrintProgr=$IniData{'parameter'}{'runtime'}{'print progress'};
my $PrintHtml=$IniData{'parameter'}{'runtime'}{'print html'};
my $PrintHtmlOnly=$IniData{'parameter'}{'runtime'}{'print html only'};
my $CurrentDir=getcwd();
if (defined $IniData{parameter}{'runtime dir'}){
chdir($IniData{parameter}{'runtime dir'});
}
#---------------------------------------------------------------------------
# input and output data streams
#
my $CorpusStream=&GetCorpusStream(\%IniData);
my ($OutputStreamName,$OutputStream)=each %{$IniData{'output'}};
my $corpus=Uplug::IO::Any->new($CorpusStream);
$corpus->open('read',$CorpusStream) ||
die "# wordalign.pl: failed to open the bitext!\n";
my $links={};
my $Param={};
&OpenLinkStreams(\%IniData,$links,$Param);
my $output=Uplug::IO::Any->new($OutputStream);
# my $header=$corpus->header;
# if (ref($header) ne 'HASH'){$header={};}
my $header={};
$header->{SkipDataHeader}=0;
$header->{SkipDataTail}=0;
$header->{SkipSrcFile}=1;
$header->{SkipTrgFile}=1;
$output->addheader($header);
if (not $PrintHtmlOnly){
$output->open('write',$OutputStream);
}
#---------------------------------------------------------------------------
# some additional parameters
#
my $PWASTYLE=0;
if ($OutputStream->{style}=~/pwa/i){$PWASTYLE=1;}
my $PRINTMATRIX=$IniData{parameter}{runtime}{'print link matrix'};
my $DefaultWeight=$IniData{'parameter'}{'alignment'}{'default score weight'};
my $verboseAlign=$IniData{'parameter'}{'alignment'}{verbose};
my $alignIndex=$IniData{'parameter'}{'alignment'}{'index'};
my $nrAlign=$IniData{parameter}{runtime}{'number of segments'};
if (not defined $DefaultWeight){$DefaultWeight=0.5;}
#-----------------------------------------------------------------
# get general parameters for getting N-gram pairs from the bitext
# (take the settings of one of the input streams)
if ((defined $IniData{'parameter'}{'general input parameter'}) and
(ref($IniData{'parameter'}{'general input parameter'}) eq 'HASH')){
%{$Param->{general}}=%{$IniData{'parameter'}{'general input parameter'}};
}
elsif(defined $IniData{'parameter'}{'alignment'}{'general stream'}){
my $st=$IniData{'parameter'}{'alignment'}{'general stream'};
if (ref($Param->{$st}) eq 'HASH'){
%{$Param->{general}}=%{$Param->{$st}};
}
}
if (not defined $Param->{general}){
my ($LinkStr)=each %{$links};
if (ref($Param->{$LinkStr}) eq 'HASH'){
%{$Param->{general}}=%{$Param->{$LinkStr}};
}
}
#-----------------------------------------------------------------
# original N-grams: original wordforms from the text
#
$Param->{original}->{'language (source)'}=
$Param->{general}->{'language (source)'};
$Param->{original}->{'language (target)'}=
$Param->{general}->{'language (target)'};
$Param->{original}->{'token label'}=$Param->{general}->{'token label'};
$Param->{original}->{'delimiter'}=$Param->{general}->{'delimiter'};
#-----------------------------------------------------------------
# some more (clue) parameters from the config-file
my @ParSpec;
if (ref($IniData{parameter}{alignment}{clues}) eq 'ARRAY'){
@ParSpec=@{$IniData{parameter}{alignment}{clues}};
}
elsif (ref($IniData{parameter}{alignment}{clues}) eq 'HASH'){
@ParSpec=grep ($IniData{parameter}{alignment}{clues}{$_},
keys %{$IniData{parameter}{alignment}{clues}});
}
else{@ParSpec=keys %{$links};}
push (@ParSpec,'general','original','string pairs');
foreach my $l (@ParSpec){
if (ref($IniData{parameter}{$l}) eq 'HASH'){
foreach (keys %{$IniData{parameter}{$l}}){
$Param->{$l}->{$_}=$IniData{parameter}{$l}{$_};
}
}
}
#-----------------------------------------------------------------
my %lang;
$lang{source}=$Param->{general}->{'language (source)'};
$lang{target}=$Param->{general}->{'language (target)'};
my $MinScore=$IniData{'parameter'}{'alignment'}{'minimal score'};
$$Param{general}{'minimal score'}=$MinScore;
my $SearchMode=$IniData{'parameter'}{'alignment'}{'search'};
my $MaxNrNodes=$IniData{'parameter'}{'alignment'}{'node limit'};
#---------------------------------------------------------------------------
# variables for storing the processing time
#
my $StartTime=time;
my $FindBestTime=0;
my $GetLinksTime=0;
my $StoreLinksTime=0;
my $time;
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
# and here we go!
# ... the main part - alignment starts
if ($PrintProgr){
print STDERR "read alignments\n";
}
my $count=0;
my $AlignCount=0;
my %CoocFreq;
my %SrcFreq;
my %TrgFreq;
my ($TotalCooc,$TotalSrc,$TotalTrg);
my $NrLinks=0;
my $NrToken=0;
my @idx=();
#------------------------------------------------------------------------
my $align;
if ($SearchMode=~/best\-?first/){
$align=Uplug::Align::Word->new($corpus,%{$IniData{parameter}{alignment}});
}
#elsif ($SearchMode=~/tree/){
# $align=uplugTreeAlign->new($corpus,%{$IniData{parameter}{alignment}});
#}
elsif ($SearchMode=~/uwa/){
$align=Uplug::Align::Word::UWA->new($corpus,%{$IniData{parameter}{alignment}});
}
else{
$align=Uplug::Align::Word::Clue->new($corpus,%{$IniData{parameter}{alignment}});
}
$align->setLinkParams($Param);
$align->setLanguages($lang{source},$lang{target});
$align->setLinkStreams($links);
#------------------------------------------------------------------------
my $time=time();
while ($align->read($alignIndex)){
$count++;
$AlignCount++;
my $id=$align->dataId();
# if ($alignIndex){
# if ($alignIndex ne $id){next;}
# }
#------------------------------------------------
# print some information to show progress
if ($PrintProgr){
if ($SearchMode ne 'tree'){
if (not ($AlignCount % 10)){
# if (Uplug::Align::Word::Clue::DEBUG){
# my $used=time-$StartTime;
# print STDERR "\n* prepare : ",$align->{prepare_time},"\n";
# print STDERR "* get scores: ",$align->{get_scores_time},"\n";
# print STDERR " identical: ",$align->{identical_score_time},"\n";
# print STDERR " 1x: ",$align->{'1x_score_time'},"\n";
# print STDERR " before: ",$align->{before_score_time},"\n";
# print STDERR " search: ",$align->{search_score_time},"\n";
# print STDERR " after: ",$align->{after_score_time},"\n";
# print STDERR "* align : ",$align->{align_time},"\n";
# print STDERR "= used : ",$used,"\n";
# }
$|=1;print STDERR '.';$|=0;
}
}
if (not ($AlignCount % 100)){
$|=1;
print STDERR time()-$time;
print STDERR " sec: $AlignCount bitext segments\n";
$|=0;
}
if ($nrAlign and ($AlignCount>$nrAlign)){last;}
}
my $LinkPt=$align->align();
my %Links=();
if (ref($LinkPt) eq 'HASH'){
%Links=%{$LinkPt};
}
#-------------------------------------------------------------------------
if ($PrintHtml and ($PrintHtml>$#idx)){
my $file=$id.'.html';
push (@idx,$id);
&PrintHtmlClue(\@idx,$#idx,$align,$PrintHtml);
}
if ($PrintHtmlOnly){ # just print html-output
&PrintHtml(\@idx,$#idx,$align); # for one alignment segment!!
last;
}
#-------------------------------------------------------------------------
my $data=$align->data();
if ($verboseAlign){
print STDERR "\n===================================================\n";
print STDERR "word alignments";
print STDERR "\n===================================================\n";
}
my @LinkedWords = ();
my %SrcId2Pos = ();
my %TrgId2Pos = ();
if ($OutputStream->{format}=~/moses/i){
my @SrcIDs = $data->getSrcTokens({'use attribute' => 'id'});
my @TrgIDs =$data->getTrgTokens({'use attribute' => 'id'});
foreach (0..$#SrcIDs){ $SrcId2Pos{$SrcIDs[$_]}=$_; }
foreach (0..$#TrgIDs){ $TrgId2Pos{$TrgIDs[$_]}=$_; }
next unless (@SrcIDs and @TrgIDs);
}
foreach my $s (keys %Links){
# if ($Links{$s}{score}<$MinScore){next;}
$NrLinks+=split(/\:/,$Links{$s}{source});
if ($verboseAlign){
$align->printBitextLink($id,$Links{$s});
}
if ($OutputStream->{format}=~/moses/i){
my @SrcLinked=split(/\:/,$Links{$s}{source});
my @TrgLinked=split(/\:/,$Links{$s}{target});
foreach my $s (@SrcLinked){
foreach my $t (@TrgLinked){
push(@LinkedWords,$SrcId2Pos{$s}.'-'.$TrgId2Pos{$t});
}
}
}
elsif ($PWASTYLE){
my $data=Uplug::Data::Align->new;
my ($src,$trg)=split(/;/,$Links{$s}{link});
$data->setAttribute('source_id',$Links{$s}{source});
$data->setAttribute('target_id',$Links{$s}{target});
$data->setAttribute('id',$id);
$data->setAttribute('source',$src);
$data->setAttribute('target',$trg);
$data->setAttribute('score',$Links{$s}{score});
$data->setAttribute('align step',1);
if ($Links{$s}{src}){
$data->setAttribute('source_span',$Links{$s}{src});
}
if ($Links{$s}{trg}){
$data->setAttribute('target_span',$Links{$s}{trg});
}
$output->write($data);
}
# elsif ($OutputStream->{style}=~/liu/){
elsif (($OutputStream->{format}=~/(align|koma|xces|xml)/i) or
($OutputStream->{style}=~/(liu|koma|xces|xml)/i)){
$data->addWordLink($Links{$s});
}
else{
my $data=Uplug::Data::Align->new;
$data->setAttribute('source',$Links{$s}{source});
$data->setAttribute('target',$Links{$s}{target});
$data->setAttribute('id',$id);
$data->setAttribute('link',$Links{$s}{link});
$data->setAttribute('score',$Links{$s}{score});
$output->write($data);
}
}
if ($OutputStream->{format}=~/moses/i){
$output->write(\@LinkedWords);
}
elsif (($OutputStream->{format}=~/(koma|align|xces|xml)/i) or
($OutputStream->{style}=~/(liu|koma|xces|xml)/i)){
my $OutData=$data->{link};
$output->write($OutData);
}
# my $used=time-$time;
if ($alignIndex){last;} # align only one sentence alignment!
}
#--------------------------------------------------------------------------
# ready!!!
# close all streams and print some information
#
$corpus->close;
if (not $PrintHtmlOnly){$output->close;}
foreach (keys %{$links}){
$links->{$_}->close;
}
my $TotalTime=time-$StartTime;
print STDERR "overall time for this module : $TotalTime\n";
print STDERR "linked source tokens: $NrLinks/$NrToken = ";
# print STDERR int(10000*$NrLinks/$NrToken)/100;
print STDERR "\% \n";
chdir($CurrentDir);
}
#--------------------------------------------------------------------------
#--------------------------------------------------------------------------
#--------------------------------------------------------------------------
sub PrintHtmlClue{
my ($idx,$id,$align,$max)=@_;
my $dir='clue-html-files';
if (not -d $dir){
mkdir $dir,0755;
}
my $file="$dir/$id.html";
open F,">$file";
&PrintHtmlHeader(*F);
&PrintPrevNextLinks(*F,$idx,$id,$max);
print F $align->clueMatrixToHtml();
print F $align->linksToHtml();
print F "<hr>\n";
my $data=$align->data();
print F $data->toHTML();
&PrintHtmlFooter(*F);
close F;
return;
}
sub PrintHtml{
my ($idx,$id,$align)=@_;
# &PrintHtmlHeader(*STDOUT);
my $data=$align->data();
print $align->clueMatrixToHtml();
print $align->linksToHtml();
print "<hr>\n";
print $data->toHTML();
# &PrintHtmlFooter(*STDOUT);
return;
}
sub PrintHtmlHeader{
my $f=shift;
print $f '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/transitional.dtd">';
print $f "\n<html>\n<head>\n";
print $f '<meta http-equiv="content-type" content="text/html; charset=UTF-8">';
print $f "\n<title>clue alignment result</title>\n";
print $f "\n<style>\n";
print $f "<!--th { font-size:x-small; }-->\n";
print $f "<!--td { font-size:x-small;text-align:center }-->\n";
print $f "</style>\n</head><body>\n";
}
sub PrintHtmlFooter{
my $f=shift;
print $f "</body></html>";
}
sub PrintPrevNextLinks{
my ($f,$idx,$id,$max)=@_;
my $dir='clue-html-files';
print $f "<table width=\"60%\"><tr><td>";
my $prev;if ($id>0){$prev=$id-1;}
my $next=$id+1;
if (defined $prev){
print $f "<a href=\"$prev.html\">\<\<</a>";
}
print $f "</td><td>$$idx[$id]</td>";
if ($id<$max){
print $f "<td><a href=\"$next.html\">\>\></a></td>";
}
print $f "</tr></table><hr>";
}
sub GetCorpusStream{
my $IniData=shift;
foreach (keys %{$$IniData{input}}){
if (/text/){return $$IniData{input}{$_};}
}
}
sub OpenLinkStreams{
my ($IniData,$links,$Param)=@_;
my %LinkStream;
foreach (keys %{$$IniData{input}}){
# if (/text/){$CorpusStream=$$IniData{input}{$_};}
# else{$LinkStream{$_}=$$IniData{input}{$_};}
if ($_!~/text/){$LinkStream{$_}=$$IniData{input}{$_};}
}
#-----------------
# check if there's a defined list of clues
# (open only defined clues)x
my @clues=keys %LinkStream;
if (ref($$IniData{parameter}{alignment}{clues}) eq 'ARRAY'){
@clues=@{$$IniData{parameter}{alignment}{clues}};
}
elsif (ref($$IniData{parameter}{alignment}{clues}) eq 'HASH'){
@clues=grep ($$IniData{parameter}{alignment}{clues}{$_},
keys %{$$IniData{parameter}{alignment}{clues}});
}
foreach my $l (keys %LinkStream){
if (not grep($_ eq $l,@clues)){next;}
if (not defined $LinkStream{$l}{format}){
if (not defined $LinkStream{$l}{'stream name'}){
delete $LinkStream{$l};
next;
}
}
$links->{$l}=Uplug::IO::Any->new($LinkStream{$l});
if (not ref($links->{$l})){
warn " something wrong with $l!\n";
delete $LinkStream{$l};
delete $links->{$l};
next;
}
if (not $links->{$l}->open('read',$LinkStream{$l})){
delete $LinkStream{$l};
delete $links->{$l};
next;
}
$Param->{$l}=$links->{$l}->header;
}
}
sub GetDefaultIni{
my $DefaultIni =
{
'module' => {
'program' => 'wordalign.pl',
'location' => '$UplugBin',
'name' => 'The clue aligner - linking words',
'stdin' => 'bitext',
'stdout' => 'bitext',
},
'description' => 'This module links words and phrases using the
clues that are available and which have been enabled for the
alignment. Note: If you enable additional clues make sure that they
exist, i.e. that they have been produced before. Non-existing clues
are simply ignored.<p>
The search parameter sets the link strategy:
The default search
strategy is a constrained best-first search (=best first). Other
available strategies are
<ul><li>a refined bi-directional alignment
(=refined)
<li>the intersection of directional alignments (source to
target and target to source) (=intersection)
<li>the union of
directional alignments (=union)
<li>a competitive linking approach (=competitive)
<li>and two directional alignment strategies
(directional_src and directional_trg).</ul>',
'input' => {
'bitext' => {
'format' => 'xces align',
},
'string similarities' => {
'stream name' => 'string similarities',
},
'dice' => {
'stream name' => 'dice',
},
'mutual information' => {
'stream name' => 'mutual information',
},
't-score' => {
'stream name' => 't-score',
},
'pos dice' => {
'stream name' => 'pos dice',
},
'giza dictionary' => {
'stream name' => 'giza dictionary',
},
'giza inverse' => {
'stream name' => 'giza inverse',
},
'dynamic POS clue' => {
'stream name' => 'POS clue',
},
'dynamic POS clue (coarse)' => {
'stream name' => 'POS clue (coarse)',
},
'dynamic chunk clue' => {
'stream name' => 'chunk clue',
},
'dynamic position clue' => {
'stream name' => 'position clue',
},
'dynamic lex clue' => {
'stream name' => 'lex clue',
},
'dynamic lex/POS clue' => {
'stream name' => 'lexpos clue',
},
'dynamic left POS-bigram clue' => {
'stream name' => 'posleft clue',
},
'dynamic right POS-bigram clue' => {
'stream name' => 'posright clue',
},
'dynamic POS-trigram clue' => {
'stream name' => 'postrigram clue',
},
'dynamic chunk/POS clue' => {
'stream name' => 'chunkpos clue',
},
'dynamic chunk/POS-trigram clue' => {
'stream name' => 'chunkpostrigram clue',
},
'posposi clue' => {
'stream name' => 'posposi clue',
},
'pos2posi clue' => {
'stream name' => 'pos2posi clue',
},
'postri clue' => {
'stream name' => 'postri clue',
},
'postriposi clue' => {
'stream name' => 'postriposi clue',
},
'postri2posi clue' => {
'stream name' => 'postri2posi clue',
},
'postri2 clue' => {
'stream name' => 'postri2 clue',
},
'chunktripos clue' => {
'stream name' => 'chunktripos clue',
},
'chunktriposi clue' => {
'stream name' => 'chunktriposi clue',
},
'chunktri clue' => {
'stream name' => 'chunktri clue',
},
},
'output' => {
'bitext' => {
'format' => 'xces align',
'status' => 'word',
},
},
'parameter' => {
'string similarities' => {
'minimal score' => 0.3,
'score weight' => 0.05,
},
'dice' => {
'minimal score' => 0.2,
'score weight' => 0.05,
},
'mutual information' => {
'minimal score' => 2,
'score weight' => 0.005,
},
't-score' => {
'minimal score' => 0.8,
'score weight' => 0.01,
},
'length clue' => {
'score weight' => 0.0001,
'string length difference' => 1,
},
'pos dice' => {
'minimal score' => 0.2,
'score weight' => 0.01,
},
'giza dictionary' => {
'score weight' => '0.1',
},
'giza inverse' => {
'score weight' => '0.1',
},
'dynamic POS clue' => {
# 'minimal score' => 0.2,
'score weight' => 0.05,
},
'dynamic POS clue (coarse)' => {
# 'minimal score' => 0.2,
'score weight' => 0.05,
},
'dynamic position clue' => {
# 'minimal score' => 0.2,
'score weight' => 0.01,
},
'dynamic chunk clue' => {
# 'minimal score' => 0.2,
'score weight' => 0.01,
},
'general' => {
'chunks (source)' => 'c.*',
'chunks (target)' => 'c.*',
},
'alignment' => {
'remove word links' => 0,
'clues' => {
'string similarities' => 1,
'dice' => 1,
'mutual information' => 0,
't-score' => 0,
'giza dictionary' => 1,
'giza inverse' => 1,
'dynamic POS clue' => 0,
'dynamic POS clue (coarse)' => 0,
'dynamic chunk clue' => 0,
'dynamic position clue' => 0,
'chunktriposi clue' => 1,
'postriposi clue' => 1,
},
'minimal score' => '0.00001',
'search' => 'matrix',
'verbose' => 0, # don't print clue matrices!
# 'minimal score' => '70%',
# 'general stream' => 'dice',
# 'align 1:1' => '0.5',
# 'remove linked' => 1,
# 'align identical' => '0.08',
},
'runtime' => {
'print progress' => 1,
'print link matrix' => 1,
},
},
'arguments' => {
'shortcuts' => {
'sim' => 'parameter:alignment:clues:string similarities',
'dice' => 'parameter:alignment:clues:dice',
'mi' => 'parameter:alignment:clues:mutual information',
'tscore' => 'parameter:alignment:clues:t-score',
'giza' => 'parameter:alignment:clues:giza dictionary',
'giza2' => 'parameter:alignment:clues:giza inverse',
'dynpos' => 'parameter:alignment:clues:dynamic POS clue',
'dynpos2' => 'parameter:alignment:clues:dynamic POS clue (coarse)',
'dynchunk' => 'parameter:alignment:clues:dynamic chunk clue',
'dynposi' => 'parameter:alignment:clues:dynamic position clue',
'dynlex' => 'parameter:alignment:clues:dynamic lex clue',
'dynlexpos' => 'parameter:alignment:clues:dynamic lex/POS clue',
'dynposbigramleft' => 'parameter:alignment:clues:dynamic left POS-bigram clue',
'dynposbigramright' => 'parameter:alignment:clues:dynamic right POS-bigram clue',
'dynpostrigram' => 'parameter:alignment:clues:dynamic POS-trigram clue',
'dynchunkpos' => 'parameter:alignment:clues:dynamic chunk/POS clue',
'dynchunkpostrigram' => 'parameter:alignment:clues:dynamic chunk/POS-trigram clue',
'posposi' => 'parameter:alignment:clues:posposi clue',
'pos2posi' => 'parameter:alignment:clues:pos2posi clue',
'postri' => 'parameter:alignment:clues:postri clue',
'postri2' => 'parameter:alignment:clues:postri2 clue',
'postriposi' => 'parameter:alignment:clues:postriposi clue',
'postri2posi' => 'parameter:alignment:clues:postri2posi clue',
'chunktri' => 'parameter:alignment:clues:chunktri clue',
'chunktripos' => 'parameter:alignment:clues:chunktripos clue',
'chunktriposi' => 'parameter:alignment:clues:chunktriposi clue',
'simw' => 'parameter:string similarities:score weight',
'dicew' => 'parameter:dice:score weight',
'miw' => 'parameter:mutual information:score weight',
'tscorew' => 'parameter:t-score:score weight',
'gizaw' => 'parameter:giza dictionary:score weight',
'dynposw' => 'parameter:dynamic POS clue:score weight',
'dynpos2w' => 'parameter:dynamic POS clue (coarse):score weight',
'dynchunkw' => 'parameter:dynamic chunk clue:score weight',
'dynposiw' => 'parameter:dynamic position clue:score weight',
'statposw' => 'parameter:static POS clue:score weight',
'statpos2w' => 'parameter:static POS clue 2:score weight',
'statchunkw' => 'parameter:static chunk clue:score weight',
'dynlexw' => 'parameter:dynamic lex clue:score weight',
'dynlexposw' => 'parameter:dynamic lex/POS clue:score weight',
'dynposbigramleftw' => 'parameter:dynamic left POS-bigram clue:score weight',
'dynposbigramrightw' => 'parameter:dynamic right POS-bigram clue:score weight',
'dynpostrigramw' => 'parameter:dynamic POS-trigram clue:score weight',
'dynchunkposw' => 'parameter:dynamic chunk/POS clue:score weight',
'dynchunkpostrigramw' => 'parameter:dynamic chunk/POS-trigram clue:score weight',
'new' => 'parameter:alignment:non-aligned only',
'in' => 'input:bitext:file',
'infile' => 'input:bitext:file',
'informat' => 'input:bitext:format',
'out' => 'output:bitext:file',
'srclang' => 'parameter:general:language (source)',
'trglang' => 'parameter:general:language (target)',
'id' => 'parameter:alignment:index',
'html' => 'parameter:runtime:print html only',
'search' => 'parameter:alignment:search',
'v' => 'parameter:alignment:verbose',
'adj' => 'parameter:alignment:adjacent_only',
'phr' => 'parameter:alignment:in_phrases_only',
'min' => 'parameter:alignment:minimal score',
}
},
'widgets' => {
'parameter' => {
'alignment' => {
'clues' => {
'string similarities' => 'checkbox',
'dice' => 'checkbox',
'giza dictionary' => 'checkbox',
'dynamic POS clue' => 'checkbox',
'dynamic POS clue (coarse)' => 'checkbox',
'dynamic chunk clue' => 'checkbox',
'dynamic position clue' => 'checkbox',
'dynamic lex clue' => 'checkbox',
'dynamic lex/POS clue' => 'checkbox',
'posposi clue' => 'checkbox',
'postri clue' => 'checkbox',
'postriposi clue' => 'checkbox',
'chunktri clue' => 'checkbox',
'chunktripos clue' => 'checkbox',
'chunktriposi clue' => 'checkbox',
},
'minimal score' => 'scale (0,1,0.00001,0.005)',
'search' => 'optionmenu (best first,refined,intersection,union,competitive,directional_src,directedional_trg)',
}
}
}
};
return %{$DefaultIni};
}