#-*-perl-*-
####################################################################
# Copyright (C) 2004 Jörg Tiedemann <joerg@stp.ling.uu.se>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
###########################################################################
# Uplug::Align::Word::Clue
#
#
#
###########################################################################
package Uplug::Align::Word::Clue;
use strict;
# use Time::HiRes qw(time);
use vars qw(@ISA $DEBUG);
use vars qw($INPHRASESONLY $ADJACENTONLY $ADJACENTSCORE $FILLPHRASES);
use vars qw($ALLOWMULTIOVERLAP $PRINTHTML);
# use utf8;
use Uplug::Data;
use Uplug::Align::Word;
use Data::Dumper;
$Data::Dumper::Indent=1;
$Data::Dumper::Terse=1;
$Data::Dumper::Purity=1;
@ISA = qw( Uplug::Align::Word );
$DEBUG = 0;
#---------------------------------
# parameters for add2LinkCluster
$INPHRASESONLY = 0; # if = 1 --> no links outside of chunks
$ADJACENTONLY = 0; # if = 1 --> allow only adjacent links
$ADJACENTSCORE = 0; # if > 0 --> $score >= $neighbor * $ADJACENTSCORE
# $ALLOWMULTIOVERLAP = 0; # allow overlap with more than 1 link cluster!
# $ADJACENTSCORE = 0.4;
$ADJACENTSCORE = 0;
$FILLPHRASES = 0; # ... doesn't work ....
sub new{
my $class=shift;
my $self=$class->SUPER::new(@_);
if (not defined $self->parameter('adjacent_only')){
$self->setParameter('adjacent_only',$ADJACENTONLY);
}
if (not defined $self->parameter('adjacent_score')){
$self->setParameter('adjacent_score',$ADJACENTSCORE);
}
if (not defined $self->parameter('in_phrases_only')){
$self->setParameter('in_phrases_only',$INPHRASESONLY);
}
if (not defined $self->parameter('fill_phrase')){
$self->setParameter('fill_phrases',$FILLPHRASES);
}
if (not defined $self->parameter('allow_multiple_overalps')){
$self->setParameter('allow_multiple_overlaps',$ALLOWMULTIOVERLAP);
}
if (not defined $self->parameter('verbose')){
$self->setParameter('verbose',$DEBUG);
}
else{$DEBUG=$self->parameter('verbose');}
return $self;
}
sub DESTROY {
my $self = shift;
}
#===========================================================================
#
# get all link scores and fill the clue matrix
#
#===========================================================================
sub getLinkScores{
my $self=shift;
$self->{matrix}=[];
$self->{links}={};
my $LinkProb=$self->{matrix};
my $links=$self->{linkStreams};
my $SrcTok=$self->{srcToken};
my $TrgTok=$self->{trgToken};
my $Param=$self->{param};
my $data=$self->{data};
## prepare clue param hash (reduce checks in the loop below)
my %ClueParam=%{$Param};
if (exists $ClueParam{general}){delete $ClueParam{general};}
if (exists $ClueParam{original}){delete $ClueParam{original};}
foreach (keys %ClueParam){
if (ref($ClueParam{$_}) ne 'HASH'){$ClueParam{$_}={};}
if (not defined $ClueParam{$_}{'score weight'}){
$ClueParam{$_}{'score weight'}=$self->defaultClueWeight();
}
}
## define some variables used in the loop
my $weight; # clue combination weight
my ($src,$trg); # source and target language tokens
my $score; # clue score found for the current pair
my $time; # time (for debugging)
my %search; # hash of patterns for searching clues
my $found=Uplug::Data->new; # clues found
my @SrcTok; # positions of the current source
my @TrgTok; # and target tokens in the sentence
my ($s,$t,$x,$y,$p); # variables for iteration
my $ScoreComb=$self->parameter('score combination');
if (not $ScoreComb){$ScoreComb='probabilistic';}
if ($self->parameter('verbose')){
print STDERR "\n=====================================================\n";
print STDERR "matching clue scores";
print STDERR "\n=====================================================\n";
}
## the following loop takes most of the time!
foreach $s (sort {$a <=> $b} keys %{$SrcTok}){
foreach $t (keys %{$TrgTok}){
$time=time();
($src,$trg)=($$SrcTok{$s}{general},$$TrgTok{$t}{general});
$self->alignIdentical($src,$trg,$s,$t,$LinkProb);
### DEBUG: store search time
$self->{identical_score_time}+=time()-$time if ($DEBUG);
foreach (keys %ClueParam){
$time=time();
$weight=$ClueParam{$_}{'score weight'};
if ($ClueParam{$_}{'relative position'}){
($src,$trg)=$self->makeRelPosFeature($$SrcTok{$s}{$_},
$$TrgTok{$t}{$_});
}
else{($src,$trg)=($$SrcTok{$s}{$_},$$TrgTok{$t}{$_});}
### DEBUG: store search time
$self->{before_score_time}+=time()-$time if ($DEBUG);
$score=0;
#---------------------------------------
# length difference as scores ...
#---------------------------------------
if ($ClueParam{$_}{'string length difference'}){
$score=$data->lengthQuotient($src,$trg);
}
#---------------------------------------
# otherwise: search scores in link-DB
#---------------------------------------
else{
if (not defined $links->{$_}){next;}
if (defined($src) and defined($trg)){
%search=('source' => $src,
'target' => $trg);
$time=time();
if ($links->{$_}->select($found,\%search)){
$score=$found->attribute('score');
}
### DEBUG: store search time
$self->{search_score_time}+=time()-$time if ($DEBUG);
}
}
$time=time();
#---------------------------------------
# set weighted score in score matrix
#---------------------------------------
if (not $score){next;}
if (not $data->checkPairParameter($src,$trg,$ClueParam{$_})){
### DEBUG: store search time
$self->{after_score_time}+=time()-$time if ($DEBUG);
next;
}
if (exists $ClueParam{$_}{'minimal score'}){
if ($score<$ClueParam{$_}{'minimal score'}){
### DEBUG: store search time
$self->{after_score_time}+=time()-$time if ($DEBUG);
next;
}
}
$score*=$weight;
# shouldn't be >1, but in case ...
#--------------------------------
if ($score>=1){$score=0.999999999999;}
#--------------------------------
if ($self->parameter('verbose')){
# printf STDERR "%20s [ %s %s ] %15s - %-15s %s\n",
printf STDERR "$_\t$s\t$t\t$src\t$trg\t$score\n";
}
@SrcTok=split(/:/,$s);
@TrgTok=split(/:/,$t);
foreach $x (@SrcTok){
foreach $y (@TrgTok){
# if ($self->parameter('verbose')){
# printf STDERR "%20s [%d %d] %15s - %-15s %s\n",
# $_,$x,$y,$src,$trg,$score;
# }
if ($ScoreComb eq 'addition'){
$$LinkProb[$x][$y]+=$score;
}
#
# log-linear and multiplication are useless!
# * there's not always a positive score for each possible pair!
# --> multiplications with one factor = 0 --> score = 0
# --> leaving out zero-factors -> implicit penalty for pairs with multiple
# clue scores
#
# elsif ($ScoreComb eq 'log-linear'){
# $$LinkProb[$x][$y]+=log($score);
# }
# elsif ($ScoreComb eq 'multiplication'){
# $$LinkProb[$x][$y]+=log($score);
# }
else{
$p=$$LinkProb[$x][$y];
$$LinkProb[$x][$y]=$p+$score-$p*$score;
}
}
}
### DEBUG: store search time
$self->{after_score_time}+=time()-$time if ($DEBUG);
}
}
}
$time=time();
$self->align1x($LinkProb);
# if ($ScoreComb eq 'log-linear'){ # special for log-linear:
# foreach $x (0..$#{$LinkProb}){ # reverse log (make positiv
# foreach $y (0..$#{$$LinkProb[$x]}){ # score values)
# $$LinkProb[$x][$y]=exp($$LinkProb[$x][$y]);
# }
# }
# }
if ($self->parameter('verbose')){
$self->printClueMatrix($self->{token}->{source},
$self->{token}->{target},
$self->{matrix});
$self->printBitextTokensWithID();
# $self->printBitextToken($self->{token}->{source},
# $self->{token}->{target});
}
### DEBUG: store search time
$self->{'1x_score_time'}+=time()-$time if ($DEBUG);
}
#===========================================================================
#
# search for the best alignment using the clue matrix scores
#
# topLinkSearch ........ iteratively add top links to link clusters
# nextBestSearch ....... score = distance to next best link (+topLinkSearch)
# oneOneFirstSearch .... non-overlapping first, overlapping then
# competitiveSearch .... competitive linking (1:1 links only!)
# bidirectionalRefineSearch intersection of directional links + overlapping
# directionalSrcSearch ..... best alignment source --> target
# directionalTrgSearch ..... best alignment target --> source
# bidirectionalUnion ....... union of directionalSrc & directionalTrg
# bidirectionalIntersection intersection of directionalSrc & directionalTrg
#
# parameter search: nextbest ........ nextBestSearch
# oneone....... ... oneOneFirstSearch
# competitive ..... competitiveSearch
# myrefined ....... bidirectionalRefinedSearch
# och ............. bidirectionalRefinedSearchOch
# src ............. directionalSrcSearch
# trg ............. directionalTrgSearch
# union ........... bidirectionalUnion
# intersection .... bidirectionalIntersection
# <default> ....... topLinkSearch
#
#===========================================================================
sub findAlignment{
my $self=shift;
$self->{links}={};
my $minScore=$self->scoreThreshold();
my $method=$self->parameter('search');
if ($method=~/nextbest/){
return $self->nextBestSearch($self->{links},$minScore);}
elsif ($method=~/competitive/){
return $self->competitiveSearch($self->{links},$minScore);}
elsif ($method=~/oneone/){
return $self->oneOneFirstSearch($self->{links},$minScore);}
elsif ($method=~/myrefined/){
return $self->bidirectionalRefinedSearch($self->{links},$minScore);}
elsif ($method=~/(och|refined)/){
return $self->bidirectionalRefinedSearchOch($self->{links},$minScore);}
elsif ($method=~/src/){
return $self->directionalSrcSearch($self->{links},$minScore);}
elsif ($method=~/trg/){
return $self->directionalTrgSearch($self->{links},$minScore);}
elsif ($method=~/union/){
return $self->bidirectionalUnion($self->{links},$minScore);}
elsif ($method=~/intersection/){
return $self->bidirectionalIntersection($self->{links},$minScore);}
else{
return $self->topLinkSearch($self->{links},$minScore);}
}
#===========================================================================
# add scores to the clue matrix for
# sentence alignments with only 1 word in either source or target
#===========================================================================
sub align1x{
my $self=shift;
my ($LinkProb)=@_;
my $Align11=$self->parameter('align 1:1');
my $Align1x=$self->parameter('align 1:x');
if ($Align11 and ($#{$LinkProb}==0)){
if ($#{$$LinkProb[0]}==0){
my $p=$$LinkProb[0][0];
$$LinkProb[0][0]=$p+$Align11-$p*$Align11;
return;
}
}
if ($Align1x and ($#{$LinkProb}==0)){
foreach (0..$#{$$LinkProb[0]}){
my $p=$$LinkProb[0][$_];
$$LinkProb[0][$_]=$p+$Align1x-$p*$Align1x;
}
return;
}
if ($Align1x){
my $ok=1;
foreach (0..$#{$LinkProb}){
if ($#{$$LinkProb[$_]}!=0){$ok=0;}
}
if ($ok){
foreach (0..$#{$LinkProb}){
my $p=$$LinkProb[$_][0];
$$LinkProb[$_][0]=$p+$Align1x-$p*$Align1x;
}
}
}
}
#===========================================================================
# add scores to the clue matrix for
# pairs of identical tokens with at least one non-alphabetical character
# (hard-coded as /[^A-Za-z]/ !!!!!!)
#===========================================================================
sub alignIdentical{
my $self=shift;
my $AlignIdentical=$self->parameter('align identical');
if (not $AlignIdentical){return;}
my ($src,$trg,$s,$t,$LinkProb)=@_;
if ($src=~/[^A-Za-z]/){
if ($src eq $trg){
my @SrcTok=split(/:/,$s);
my @TrgTok=split(/:/,$t);
foreach my $x (@SrcTok){
foreach my $y (@TrgTok){
my $p=$$LinkProb[$x][$y];
$$LinkProb[$x][$y]=$p+$AlignIdentical-$p*$AlignIdentical;
}
}
}
}
}
#===========================================================================
#
# topLinkSearch:
# 1) search best link in the matrix
# 2) add link to link clusters
# 3) continue with 1) until finished
#
#===========================================================================
sub topLinkSearch{
my $self=shift;
my $Links=shift;
my $MinScore=shift;
my $LinkProb=$self->{matrix};
my $Token=$self->{token};
my $TokenAttr=$self->{tokenAttr};
my @SrcLinks;
my @TrgLinks;
my $NrSrc=$#{$$Token{source}};
my $NrTrg=$#{$$Token{target}};
my @LinkMatrix;
my @LinkCluster;
my ($x,$y);
# ----------------------------
# print STDERR "---------new sentence-------$MinScore-------\n";
undef $self->{SORTEDLINKS};
$self->cloneLinkMatrix($LinkProb,\@LinkMatrix); # clone the matrix
while (($x,$y)=$self->getTopLink(\@LinkMatrix,$MinScore)){
# print STDERR "$x:$y:$LinkMatrix[$x][$y]\n";
if ($MinScore=~/\%/){
$MinScore=$LinkMatrix[$x][$y]*$MinScore/100;
# print STDERR "## minscore == $MinScore\n";
}
if (not defined($LinkMatrix[$x][$y])){last;}
if ($LinkMatrix[$x][$y]<$MinScore){last;}
if ($self->add2LinkCluster($x,$y,\@LinkCluster)){
$LinkMatrix[$x][$y]=0;
}
}
# ----------------------------
# get the links from the set of link clusters
$self->getClusterLinks(\@LinkCluster,$Links); # get links
}
#===========================================================================
#
# nextBestSearch:
# 1) find score distance to "next best link" for each word pair
# 2) call topLinkSearch
#
#===========================================================================
sub nextBestSearch{
my $self=shift;
my $LinkProb=$self->{matrix};
$self->nextBestMatrix($LinkProb);
return $self->topLinkSearch(@_);
}
sub nextBestMatrix{
my $self=shift;
my ($LinkProb)=@_;
my @SortedColumns=();
my @SortedRows=();
my $sizeX=$#{$LinkProb};
my $sizeY=$#{$$LinkProb[0]};
foreach my $x (0..$sizeX){
@{$SortedColumns[$x]}=
sort {$$LinkProb[$x][$b] <=> $$LinkProb[$x][$a]} (0..$sizeY);
}
foreach my $y (0..$sizeY){
@{$SortedRows[$y]}=
sort {$$LinkProb[$b][$y] <=> $$LinkProb[$a][$y]} (0..$sizeX);
}
my @LinkMatrix=();
$self->cloneLinkMatrix($LinkProb,\@LinkMatrix);
my $lowest=0;
foreach my $x (0..$sizeX){
foreach my $y (0..$sizeY){
my $NextBestY=$SortedColumns[$x][0];
if ($NextBestY==$y){$NextBestY=$SortedColumns[$x][1];}
my $NextBestX=$SortedRows[$y][0];
if ($NextBestX==$x){$NextBestX=$SortedRows[$y][1];}
my $NextBest=$LinkMatrix[$NextBestX][$y];
if ($LinkMatrix[$x][$NextBestY]>$NextBest){
$NextBest=$LinkMatrix[$x][$NextBestY];
}
$$LinkProb[$x][$y]-=$NextBest;
if ($$LinkProb[$x][$y]<$lowest){
$lowest=$$LinkProb[$x][$y];
}
}
}
foreach my $x (0..$sizeX){ # normalize!
foreach my $y (0..$sizeY){ # no negative values
$$LinkProb[$x][$y]-=$lowest; # in the matrix!
}
}
if ($self->parameter('verbose')){
$self->printClueMatrix($self->{token}->{source},
$self->{token}->{target},
$LinkProb);
}
}
#===========================================================================
#
# oneOneFirstSearch:
# 1) find all one-to-one word links first (non-overlapping links)
# 2) add iteratively overlapping links
#
#===========================================================================
sub oneOneFirstSearch{
my $self=shift;
my $Links=shift;
my $MinScore=shift;
my $LinkProb=$self->{matrix};
my $Token=$self->{token};
my $TokenAttr=$self->{tokenAttr};
my @SrcLinks;
my @TrgLinks;
my $NrSrc=$#{$$Token{source}};
my $NrTrg=$#{$$Token{target}};
my @LinkMatrix;
my @LinkCluster;
my ($x,$y);
# ----------------------------
# 1) get all word-to-word links without any overlaps
$self->cloneLinkMatrix($LinkProb,\@LinkMatrix); # clone the matrix
while (($x,$y)=$self->getTopLink(\@LinkMatrix,$MinScore)){
if ($MinScore=~/\%/){
$MinScore=$LinkMatrix[$x][$y]*$MinScore/100;
print STDERR "## minscore == $MinScore\n";
}
if ($LinkMatrix[$x][$y]<$MinScore){last;}
my @overlap=$self->findClusterOverlap($x,$y,\@LinkCluster);
if (not @overlap){
$LinkCluster[$#LinkCluster+1]={};
$LinkCluster[-1]{src}{$x}=1;
$LinkCluster[-1]{trg}{$y}=1;
}
$LinkMatrix[$x][$y]=0;
}
# ----------------------------
# 2) do it again --> find overlapping links!
$self->cloneLinkMatrix($LinkProb,\@LinkMatrix); # clone the matrix
while (($x,$y)=$self->getTopLink(\@LinkMatrix,$MinScore)){
if ($LinkMatrix[$x][$y]<$MinScore){last;}
$self->add2LinkCluster($x,$y,\@LinkCluster);
$LinkMatrix[$x][$y]=0;
}
# ----------------------------
# get the links from the set of link clusters
$self->getClusterLinks(\@LinkCluster,$Links); # get links
}
#===========================================================================
# ------------------ directional alignment (source to target) ----------------
#===========================================================================
sub directionalSrcSearch{
my $self=shift;
my $Links=shift;
my $MinScore=shift;
my $competitive=shift;
my @LinkCluster;
my ($x,$y);
my @SrcLinks=$self->bestSrcLinks($MinScore,$competitive);
foreach (0..$#SrcLinks){
if ((defined $SrcLinks[$_]) and
($SrcLinks[$_] > 0)){
$self->add2LinkCluster($_,$SrcLinks[$_],\@LinkCluster);
}
}
$self->getClusterLinks(\@LinkCluster,$Links);
}
#===========================================================================
# ------------------ directional alignment (target to source ) ---------------
#===========================================================================
sub directionalTrgSearch{
my $self=shift;
my $Links=shift;
my $MinScore=shift;
my $competitive=shift;
my @LinkCluster;
my ($x,$y);
my @TrgLinks=$self->bestTrgLinks($MinScore,$competitive);
foreach (0..$#TrgLinks){
if ((defined $TrgLinks[$_]) and
($TrgLinks[$_] > 0)){
$self->add2LinkCluster($TrgLinks[$_],$_,\@LinkCluster);
}
}
$self->getClusterLinks(\@LinkCluster,$Links);
}
#===========================================================================
# competitive linking
# 1) get best word-to-word link (s,t)
# 2) remove alternative links for (s) and for (t)
# 3) go to 1) until finished
#===========================================================================
sub competitiveSearch{
my $self=shift;
my $Links=shift;
my $MinScore=shift;
if (not defined $MinScore){
$MinScore=0.00000000000001;
}
my $Token=$self->{token};
my $NrSrc=$#{$$Token{source}};
my $NrTrg=$#{$$Token{target}};
my @WordLinks=();
if ($NrTrg>$NrSrc){
return $self->directionalTrgSearch($Links,$MinScore,1);
}
return $self->directionalSrcSearch($Links,$MinScore,1);
}
#===========================================================================
# refined symmetric link search a la Och&Ney
#
#===========================================================================
sub bidirectionalRefinedSearchOch{
my $self=shift;
my $Links=shift;
my $MinScore=shift;
my $competitive=shift;
if (not defined $MinScore){
$MinScore=0.00000000000001;
}
my $LinkProb=$self->{matrix};
my @LinkCluster;
my %WordLinks=();
my %InvWordLinks=();
my ($x,$y);
#-----------------------------------
# 1) get directional links
my @SrcLinks=$self->bestSrcLinks($MinScore,$competitive);
my @TrgLinks=$self->bestTrgLinks($MinScore,$competitive);
#-----------------------------------
# 2) intersection of directional links
foreach (0..$#SrcLinks){
if ((defined $SrcLinks[$_]) and
($TrgLinks[$SrcLinks[$_]] eq $_)){
$WordLinks{$_}{$SrcLinks[$_]}=1;
$InvWordLinks{$SrcLinks[$_]}{$_}=1;
# print STDERR "$_ --> $SrcLinks[$_]\n";
}
}
#-----------------------------------
# 3) add overlapping links
# * sort all scores in the matrix
# * run through possible links starting with the highest score
# * repeat until no more links can be added
#
# links (s,t) are added if
# * there is no other link for both, s AND t
# * or ..the new link is adjacent to another link in source OR target
# and thew new link does not create links which have neighbors
# in both directions
my %scores=();
foreach my $s (0..$#{$LinkProb}){
foreach my $t (0..$#{$$LinkProb[$s]}){ # put all scores
$scores{"$s:$t"}=$$LinkProb[$s][$t]; # in a long list
}
}
my $added=0;
do{
$added=0;
foreach my $pair (sort {$scores{$b} <=> $scores{$a} } keys %scores){
if ($scores{$pair}<$MinScore){last;}
my ($s,$t)=split(/\:/,$pair);
if (((not defined $WordLinks{$s}) or # if no other links
(not keys %{$WordLinks{$s}})) and # are defined for both,
((not defined $InvWordLinks{$t}) or # source AND target
(not keys %{$InvWordLinks{$t}}))){ # word:
$added++;
$scores{$pair}=0; # add the link
$WordLinks{$s}{$t}=1;
$InvWordLinks{$t}{$s}=1;
# print STDERR "add $s --> $t (new)\n";
}
elsif ((($s>0) and
(defined $WordLinks{$s-1}{$t})) or # the link has a
(defined $WordLinks{$s+1}{$t}) or # vertical neighbor
(($t>0) and
(defined $WordLinks{$s}{$t-1})) or # or a
(defined $WordLinks{$s}{$t+1})){ # horizontal neighbor
$InvWordLinks{$t}{$s}=1;
$WordLinks{$s}{$t}=1; # if there are
if (&CheckWordLinks(\%WordLinks, # no links with
\%InvWordLinks)){ # neighbors in both
$added++; # dimensions! -->
$scores{$pair}=0; # add the new link
# print STDERR "add $s --> $t (adj)\n";
}
else{ # else:
delete $WordLinks{$s}{$t}; # delete the link
delete $InvWordLinks{$t}{$s};
# print STDERR "reject $s --> $t\n";
}
}
}
}
until (not $added); # repeat as long as links are added!
$self->setParameter('adjacent_only',0);
$self->setParameter('adjacent_score',0);
foreach my $s (keys %WordLinks){ # put word-to-word
foreach my $t (keys %{$WordLinks{$s}}){ # links together
$self->add2LinkCluster($s,$t,\@LinkCluster); # (link clusters)
}
}
#-----------------------------------
# 4) convert link cluster to word/phrase links
$self->getClusterLinks(\@LinkCluster,$Links);
}
#-------------------------------------------------------------------------
# check if there are alignments containing horicontal AND vertical links
# (---> return 0 if there are such links!)
sub CheckWordLinks{
my $srclinks=shift;
my $trglinks=shift;
foreach my $s (keys %{$srclinks}){
foreach my $t (keys %{$$srclinks{$s}}){
if (keys %{$$srclinks{$s}} > 1){
if (keys %{$$trglinks{$t}} > 1){
return 0;
}
}
}
}
return 1;
}
#===========================================================================
# symmetric alignment (bi-directional)
# 1) get links in both directions
# 2) get intersection of links
# 3) iteratively add new links to existing link clusters
#===========================================================================
sub bidirectionalRefinedSearch{
my $self=shift;
my $Links=shift;
my $MinScore=shift;
my $competitive=shift;
if (not defined $MinScore){
$MinScore=0.00000000000001;
}
my $LinkProb=$self->{matrix};
my @LinkCluster;
my ($x,$y);
#-----------------------------------
# 1) get directional links
my @SrcLinks=$self->bestSrcLinks($MinScore,$competitive);
my @TrgLinks=$self->bestTrgLinks($MinScore,$competitive);
#-----------------------------------
# 2) intersection of directional links
foreach (0..$#SrcLinks){
if ((defined $SrcLinks[$_]) and
($TrgLinks[$SrcLinks[$_]] eq $_)){
$self->add2LinkCluster($_,$SrcLinks[$_],
\@LinkCluster); # (link clusters)
}
}
#-----------------------------------
# 3) add overlapping links
# * sort all scores in the matrix
# * run through possible links starting with the highest score
# * repeat until no more links can be added
#
# links (s,t) are added if
# * there is no other link for both, s AND t
# * or ..the new link is adjacent to another link in source OR target
# and thew new link does not create links which have neighbors
# in both directions
my %scores=();
foreach my $s (0..$#{$LinkProb}){
foreach my $t (0..$#{$$LinkProb[$s]}){ # put all scores
$scores{"$s:$t"}=$$LinkProb[$s][$t]; # in a long list
}
}
my $added=0;
do{
$added=0;
foreach my $pair (sort {$scores{$b} <=> $scores{$a} } keys %scores){
if ($scores{$pair}<$MinScore){last;}
my ($s,$t)=split(/\:/,$pair);
if ($self->add2LinkCluster($s,$t,\@LinkCluster)){
$added++;
delete $scores{$pair};
}
}
}
until (not $added); # repeat as long as links are added!
#-----------------------------------
# 4) convert link cluster to word/phrase links
$self->getClusterLinks(\@LinkCluster,$Links);
}
# ------------------ bi-directional alignment (union) ------------------
#
# union of links in both diretions
#
sub bidirectionalUnion{
my $self=shift;
my $Links=shift;
my $MinScore=shift;
my $competitive=shift;
my @LinkCluster;
my ($x,$y);
my @SrcLinks=$self->bestSrcLinks($MinScore,$competitive);
foreach (0..$#SrcLinks){
if (defined $SrcLinks[$_]){
$self->add2LinkCluster($_,$SrcLinks[$_],\@LinkCluster);
}
}
my @TrgLinks=$self->bestTrgLinks($MinScore,$competitive);
foreach (0..$#TrgLinks){
if (defined $TrgLinks[$_]){
$self->add2LinkCluster($TrgLinks[$_],$_,\@LinkCluster);
}
}
$self->getClusterLinks(\@LinkCluster,$Links);
}
# ------------------ bi-directional alignment (intersection) -------------
#
# intersection of links in both directions
#
sub bidirectionalIntersection{
my $self=shift;
my $Links=shift;
my $MinScore=shift;
my $competitive=shift;
my @LinkCluster;
my ($x,$y);
my @SrcLinks=$self->bestSrcLinks($MinScore,$competitive);
my @TrgLinks=$self->bestTrgLinks($MinScore,$competitive);
foreach (0..$#SrcLinks){
if ((defined $SrcLinks[$_]) and
($TrgLinks[$SrcLinks[$_]] eq $_)){
$self->add2LinkCluster($_,$SrcLinks[$_],\@LinkCluster);
$SrcLinks[$_]=undef;
$TrgLinks[$SrcLinks[$_]]=undef;
}
}
$self->getClusterLinks(\@LinkCluster,$Links);
}
# ------------------------------------
# get best links from source to target words
sub bestSrcLinks{
my $self=shift;
my $MinScore=shift; # score threshold
my $competitive=shift; # enable/disable competive linking
if ($competitive){
return $self->competitiveSrcLinks($MinScore,@_);
}
my $LinkProb=$self->{matrix};
my $Token=$self->{token};
my $NrSrc=$#{$$Token{source}};
my $NrTrg=$#{$$Token{target}};
my @Links=();
# ----------------------------
my @LinkMatrix=();
$self->cloneLinkMatrix($LinkProb,\@LinkMatrix);
# ----------------------------
foreach my $s (0..$NrSrc){
my $bestLink=0;
my $bestScore=$LinkMatrix[$s][$bestLink];
foreach my $t (1..$NrTrg){
if ($LinkMatrix[$s][$t]>$bestScore){
$bestLink=$t;
$bestScore=$LinkMatrix[$s][$bestLink];
}
}
if ($LinkMatrix[$s][$bestLink]<$MinScore){next;}
# if ($LinkMatrix[$s][$bestLink]<$MinScore){last;}
$Links[$s]=$bestLink;
}
return @Links;
}
# ------------------------------------
# get best links from target to source words
sub bestTrgLinks{
my $self=shift;
my $MinScore=shift; # score threshold
my $competitive=shift; # enable/disable competive linking
if ($competitive){
return $self->competitiveTrgLinks($MinScore,@_);
}
my $LinkProb=$self->{matrix};
my $Token=$self->{token};
my $NrSrc=$#{$$Token{source}};
my $NrTrg=$#{$$Token{target}};
my @Links=();
# ----------------------------
my @LinkMatrix=();
$self->cloneLinkMatrix($LinkProb,\@LinkMatrix);
# ----------------------------
foreach my $t (0..$NrTrg){
my $bestLink=0;
my $bestScore=$LinkMatrix[$bestLink][$t];
foreach my $s (1..$NrSrc){
if ($LinkMatrix[$s][$t]>$bestScore){
$bestLink=$s;
$bestScore=$LinkMatrix[$bestLink][$t];
}
}
if ($LinkMatrix[$bestLink][$t]<$MinScore){next;}
# if ($LinkMatrix[$bestLink][$t]<$MinScore){last;}
$Links[$t]=$bestLink;
}
return @Links;
}
# ------------------------------------
# competitive linking from source to target
sub competitiveSrcLinks{
my $self=shift;
my $MinScore=shift; # score threshold
my $LinkProb=$self->{matrix};
my $Token=$self->{token};
my $NrSrc=$#{$$Token{source}};
my $NrTrg=$#{$$Token{target}};
my @Links=();
# ----------------------------
my @LinkMatrix=();
$self->cloneLinkMatrix($LinkProb,\@LinkMatrix);
# ----------------------------
my ($s,$t);
while (($s,$t)=$self->getTopLink(\@LinkMatrix,$MinScore)){
if ($LinkMatrix[$s][$t]<$MinScore){next;}
$LinkMatrix[$s][$t]=0;
$Links[$s]=$t;
foreach my $x (0..$NrSrc){$LinkMatrix[$x][$t]=0;}
foreach my $x (0..$NrTrg){$LinkMatrix[$s][$x]=0;}
}
return @Links;
}
# ------------------------------------
# competitive linking from target to source
sub competitiveTrgLinks{
my $self=shift;
my $MinScore=shift; # score threshold
my $LinkProb=$self->{matrix};
my $Token=$self->{token};
my $NrSrc=$#{$$Token{source}};
my $NrTrg=$#{$$Token{target}};
my @Links=();
# ----------------------------
my @LinkMatrix=();
$self->cloneLinkMatrix($LinkProb,\@LinkMatrix);
# ----------------------------
my ($s,$t);
while (($s,$t)=$self->getTopLink(\@LinkMatrix,$MinScore)){
if ($LinkMatrix[$s][$t]<$MinScore){next;}
$LinkMatrix[$s][$t]=0;
$Links[$t]=$s;
foreach my $x (0..$NrSrc){$LinkMatrix[$x][$t]=0;}
foreach my $x (0..$NrTrg){$LinkMatrix[$s][$x]=0;}
}
return @Links;
}
#==========================================================================
#
# get the word-to-word link with the highest score from the clue matrix
#
#==========================================================================
sub getTopLink{
my $self=shift;
my $LinkProb=shift;
my $MinScore=shift;
my $bestX=undef;
my $bestY=undef;
my $bestVal;
if (not ref($self->{SORTEDLINKS})){
$self->sortLinks($LinkProb,$MinScore);
}
my $top=shift @{$self->{SORTEDLINKS}};
if (not defined $top){
delete $self->{SORTEDLINKS};
}
my @link=split (':',$top);
return @link;
}
sub sortLinks{
my $self=shift;
my $LinkProb=shift;
my $MinScore=shift;
$self->{ALLLINKS}={};
foreach my $x (0..$#{$LinkProb}){
foreach my $y (0..$#{$$LinkProb[$x]}){
if ($$LinkProb[$x][$y]<$MinScore){next;}
if ($$LinkProb[$x][$y]<=0){next;}
$self->{ALLLINKS}->{"$x:$y"}=$$LinkProb[$x][$y];
}
}
@{$self->{SORTEDLINKS}}=
sort {$self->{ALLLINKS}->{$b} <=> $self->{ALLLINKS}->{$a}}
keys %{$self->{ALLLINKS}};
}
sub getTopLinkOld{
my $self=shift;
my $LinkProb=shift;
my $bestX=undef;
my $bestY=undef;
my $bestVal;
foreach my $x (0..$#{$LinkProb}){
my @sort = sort {$$LinkProb[$x][$b] <=> $$LinkProb[$x][$a]}
(0..$#{$$LinkProb[$x]});
if ($$LinkProb[$x][$sort[0]]>$bestVal){
$bestVal=$$LinkProb[$x][$sort[0]];
$bestX="$x";
$bestY="$sort[0]";
}
}
if ((defined $bestX) and (defined $bestY)){
return ($bestX,$bestY);
}
else{
return ();
}
}
#==========================================================================
#
# getClusterLinks:
# make word/phrase links out of link clusters
# (add all necessary information for storing links,
# e.g. token pairs, id's, byte spans)
#
#==========================================================================
sub getClusterLinks{
my $self=shift;
my $LinkCluster=shift;
my $Links=shift;
my $LinkProb=$self->{matrix};
my $TokenAttr=$self->{tokenAttr};
if (ref($Links) ne 'HASH'){$Links={};}
foreach (0..$#{$LinkCluster}){
if (keys %{$$LinkCluster[$_]{src}}){
if (keys %{$$LinkCluster[$_]{trg}}){
my $src=join ':',sort {$a<=>$b} keys %{$$LinkCluster[$_]{src}};
my $trg=join ':',sort {$a<=>$b} keys %{$$LinkCluster[$_]{trg}};
my $score=$self->getMatrixScore($LinkProb,
$$LinkCluster[$_]{src},
$$LinkCluster[$_]{trg});
my $link=$self->getLinkString($TokenAttr,$src,$trg);
$$Links{$src}{link}=$link;
$$Links{$src}{source}=
$self->ngramIDs($src,$TokenAttr,'source');
$$Links{$src}{target}=
$self->ngramIDs($trg,$TokenAttr,'target');
# my $span=$self->ngramSpans($src,$TokenAttr,'source');
# if ($span){$$Links{$src}{src}=$span;}
# $span=$self->ngramSpans($trg,$TokenAttr,'target');
# if ($span){$$Links{$src}{trg}=$span;}
$$Links{$src}{score}=$score;
}
}
}
return $Links;
}
sub getMatrixScore{
my $self=shift;
my ($matrix,$src,$trg)=@_;
my $score=0;
my $count;
foreach my $s (keys %{$src}){
foreach my $t (keys %{$trg}){
if ($$matrix[$s][$t]>0){
$score+=log($$matrix[$s][$t]);
$count++;
}
}
}
if ($count){
$score/=$count;
}
return exp($score);
}
#==========================================================================
#
# add links to link clusters
#
#==========================================================================
sub add2LinkCluster{
my $self=shift;
my ($x,$y,$cluster)=@_;
my @overlap=$self->findClusterOverlap($x,$y,$cluster);
if ((not $self->parameter('allow_multiple_overlaps')) and (@overlap>1)){
# print STDERR "disregard $x - $y (multi-overlap)!\n";
return 0;
}
elsif (@overlap){
if ($self->parameter('in_phrases_only')){
if ($self->parameter('fill_phrases')){
if (not $self->fillPhrases($x,$y,$cluster,$overlap[0])){
# print STDERR "disregard $x - $y (fill phrase)!\n";
return 0;
}
}
if (not $self->isInPhrase($x,$y,$$cluster[$overlap[0]])){
# print STDERR "disregard $x - $y (not in phrase)!\n";
return 0;
}
}
if ($self->parameter('adjacent_only')){
if (not $self->isAdjacent($x,$y,$$cluster[$overlap[0]])){
# print STDERR "disregard $x - $y (not adjacent)!\n";
return 0;
}
}
if ($self->parameter('adjacent_score')){
if (not $self->isAdjacentScore($x,$y,$$cluster[$overlap[0]],
$self->parameter('adjacent_score'))){
#s print STDERR "disregard $x - $y (score difference to adjacent too big)!\n";
return 0;
}
}
$$cluster[$overlap[0]]{src}{$x}=1;
$$cluster[$overlap[0]]{trg}{$y}=1;
if (@overlap>1){ # join all overlapping
foreach my $o (1..$#overlap){ # link clusters!
foreach (keys %{$$cluster[$overlap[$o]]{src}}){
delete $$cluster[$overlap[$o]]{src}{$_};
$$cluster[$overlap[0]]{src}{$_}=1;
}
foreach (keys %{$$cluster[$overlap[$o]]{trg}}){
delete $$cluster[$overlap[$o]]{trg}{$_};
$$cluster[$overlap[0]]{trg}{$_}=1;
}
}
}
}
else{
$$cluster[$#{$cluster}+1]={};
$$cluster[-1]{src}{$x}=1;
$$cluster[-1]{trg}{$y}=1;
}
return 1;
}
sub isInPhrase{
my $self=shift;
my ($newX,$newY,$cluster)=@_;
my @srcAccepted=keys %{$self->{srcToken}};
my @trgAccepted=keys %{$self->{trgToken}};
my %src=%{$cluster->{src}};
my %trg=%{$cluster->{trg}};
$src{$newX}=1;
$trg{$newY}=1;
# my $srcPhr=join ':',sort {$a <=> $b} keys %src;
# my $trgPhr=join ':',sort {$a <=> $b} keys %trg;
my $srcPhr=join '(:[0-9]+)?:',sort {$a <=> $b} keys %src;
my $trgPhr=join '(:[0-9]+)?:',sort {$a <=> $b} keys %trg;
if (grep(/$srcPhr/,@srcAccepted)){
if (grep(/$trgPhr/,@trgAccepted)){
# my @missing=$self->getMissingTokens(\%src,\%trg);
return 1;
}
}
return 0;
}
sub fillPhrases{
my $self=shift;
my ($newX,$newY,$cluster,$nr)=@_;
my %link=();
%{$link{src}}=%{$cluster->[$nr]->{src}};
%{$link{trg}}=%{$cluster->[$nr]->{trg}};
$link{src}{$newX}=1;
$link{trg}{$newY}=1;
my @missing=$self->getMissingTokens($link{src},$link{trg});
if (not @missing){
return 0;
}
my @missSrc=split(/:/,$missing[0]);
my @missTrg=split(/:/,$missing[1]);
my %overlap=();
foreach my $s (@missSrc){
$self->findSrcOverlap($s,$cluster,\%overlap);
$link{src}{$s}=1;
}
foreach my $t (@missTrg){
$self->findTrgOverlap($t,$cluster,\%overlap);
$link{trg}{$t}=1;
}
foreach (keys %overlap){
if (not $self->isIncluded($cluster->[$_],\%link)){
foreach (@missSrc){delete $link{src}{$_};}
foreach (@missTrg){delete $link{trg}{$_};}
return 0;
}
############# !!!!!!!!!!!!!! change this:
print STDERR "delete cluster $_!\n";
$cluster->[$_]->{src}=();
$cluster->[$_]->{trg}=();
############# !!!!!!!!!!!!!! change this:
}
if (@missSrc or @missTrg){ # ... just for information
print STDERR "fill cluster $nr with missing tokens!\n";
}
foreach (keys %{$link{src}}){
$cluster->[$nr]->{src}->{$_}=1;
}
foreach (keys %{$link{trg}}){
$cluster->[$nr]->{trg}->{$_}=1;
}
return 1;
}
#sub removeClusterInclusions{
# my $self=shift;
# my $cluster=shift;
# foreach my $c (@{$cluster}){
# my $src=join '(:[0-9]+)?:',sort {$a <=> $b} keys %{$$cluster[$c]{src}};
# my $trg=join '(:[0-9]+)?:',sort {$a <=> $b} keys %{$$cluster[$c]{trg}};
# }
#}
sub isIncluded{
my $self=shift;
my ($cluster1,$cluster2)=@_;
foreach (keys %{$cluster1->{src}}){
if (not defined $cluster2->{src}->{$_}){return 0;}
}
foreach (keys %{$cluster1->{trg}}){
if (not defined $cluster2->{trg}->{$_}){return 0;}
}
return 1;
}
sub findSrcOverlap{
my $self=shift;
return $self->findOverlap('src',@_);
}
sub findTrgOverlap{
my $self=shift;
return $self->findOverlap('trg',@_);
}
sub findOverlap{
my $self=shift;
my ($lang,$token,$cluster,$overlap)=@_;
my @c=grep (defined $$cluster[$_]{$lang}{$token},0..$#{$cluster});
foreach (@c){
$$overlap{$_}=1;
}
}
sub getMissingTokens{
my $self=shift;
my ($src,$trg)=@_;
my @srcAccepted=keys %{$self->{srcToken}};
my @trgAccepted=keys %{$self->{trgToken}};
my $srcPhr=join '(:[0-9]+)?:',sort {$a <=> $b} keys %{$src};
my $trgPhr=join '(:[0-9]+)?:',sort {$a <=> $b} keys %{$trg};
my $missingSrc=undef;
my $missingTrg=undef;
my @match;
if (@match=grep(/$srcPhr/,@srcAccepted)){
@match=sort {length($a) <=> length($b)} @match;
if ($match[0]=~/^(.*)$srcPhr(.*)$/){
$missingSrc="$1$2$3$4$5$6$7$8$9";
}
if (@match=grep(/$trgPhr/,@trgAccepted)){
@match=sort {length($a) <=> length($b)} @match;
if ($match[0]=~/^(.*)$trgPhr(.*)$/){
$missingTrg="$1$2$3$4$5$6$7$8$9";
}
$missingSrc=~s/^://;$missingSrc=~s/:$//;
$missingTrg=~s/^://;$missingTrg=~s/:$//;
return ($missingSrc,$missingTrg);
}
}
return ();
}
sub isAdjacent{
my $self=shift;
my ($x,$y,$cluster)=@_;
if ((defined $$cluster{src}{$x}) and
((defined $$cluster{trg}{$y-1}) or
((defined $$cluster{trg}{$y+1})))){
return 1;
}
if ((defined $$cluster{trg}{$y}) and
((defined $$cluster{src}{$x-1}) or
((defined $$cluster{src}{$x+1})))){
return 1;
}
return 0;
}
sub isAdjacentScore{
my $self=shift;
my ($x,$y,$cluster,$p)=@_;
if ((defined $$cluster{src}{$x}) and
(defined $$cluster{trg}{$y-1})){
if ($self->{matrix}->[$x]->[$y]>=$self->{matrix}->[$x]->[$y-1]*$p){
return 1;
}
return 0;
}
if ((defined $$cluster{src}{$x}) and
(defined $$cluster{trg}{$y+1})){
if ($self->{matrix}->[$x]->[$y]>=$self->{matrix}->[$x]->[$y+1]*$p){
return 1;
}
return 0;
}
if ((defined $$cluster{src}{$x-1}) and
(defined $$cluster{trg}{$y})){
if ($self->{matrix}->[$x]->[$y]>=$self->{matrix}->[$x-1]->[$y]*$p){
return 1;
}
return 0;
}
if ((defined $$cluster{src}{$x+1}) and
(defined $$cluster{trg}{$y})){
if ($self->{matrix}->[$x]->[$y]>=$self->{matrix}->[$x+1]->[$y]*$p){
return 1;
}
return 0;
}
return 0;
}
sub findClusterOverlap{
my $self=shift;
my ($x,$y,$cluster)=@_;
my @overlap=();
foreach (0..$#{$cluster}){
if (defined $$cluster[$_]{src}{$x}){
push(@overlap,$_);
}
elsif (defined $$cluster[$_]{trg}{$y}){
push(@overlap,$_);
}
}
return @overlap;
}
#========================================================================
sub cloneLinkMatrix{
my $self=shift;
my $matrix=shift;
my $clone=shift;
if (ref($matrix) ne 'ARRAY'){return ();}
if (ref($clone) ne 'ARRAY'){$clone=[];}
foreach my $x (0..$#{$matrix}){
foreach my $y (0..$#{$$matrix[$x]}){
$$clone[$x][$y]=$$matrix[$x][$y];
}
}
return $clone;
}
#==========================================================================
#
#
#
#==========================================================================
sub clueMatrixToHtml{
my $self=shift;
my $Matrix=$self->{matrix};
my $Token=$self->{token};
my $SrcTok=$$Token{source};
my $TrgTok=$$Token{target};
my $nrSrc=$#{$$Token{source}};
my $nrTrg=$#{$$Token{target}};
my $max;
foreach my $s (0..$nrSrc){
foreach my $t (0..$nrTrg){
if ($Matrix->[$s]->[$t]>$max){$max=$Matrix->[$s]->[$t];}
}
}
if (not $max){$max=1;}
my $html="<p>\n";
$html.="<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\">\n";
$html.="<tr><th></th>\n";
foreach my $t (0..$nrTrg){
my $str=$TrgTok->[$t];
$html.="<th>$str</th>\n";
}
foreach my $s (0..$nrSrc){
$html.="</tr><tr>\n";
my $str=$SrcTok->[$s];
$html.="<th>$str</th>\n";
foreach my $t (0..$nrTrg){
my $score=0;
if ($Matrix->[$s]){
if ($Matrix->[$s]->[$t]){
$score=$Matrix->[$s]->[$t];
}
}
my $color=255-$score*256/$max;
if ($color==-1){$color=0;}
my $hex=sprintf("%X",$color);
if (length($hex)<2){$hex="0$hex";}
my $val=int(100*$score);
if ($color<128){
$html.="<td bgcolor=\"#$hex$hex$hex\">";
$html.='<font color="#ffffff">';
$html.="$val</font></td>\n";
}
else{
$html.="<td bgcolor=\"#$hex$hex$hex\">";
$html.="$val</td>\n";
}
}
}
$html.="</tr></table><hr>\n";
return $html;
}
sub printHtmlClueMatrix{
my $self=shift;
print STDERR $self->clueMatrixToHtml();
}
sub printClueMatrix{
my $self=shift;
my ($SrcTok,$TrgTok,$Matrix)=@_;
my $nrSrc=$#{$SrcTok};
my $nrTrg=$#{$TrgTok};
print STDERR "\n=====================================================\n";
print STDERR "final clue matrix scores";
print STDERR "\n=====================================================\n";
foreach my $s (0..$nrSrc){
foreach my $t (0..$nrTrg){
my $score=$Matrix->[$s]->[$t];
if ($score>0){
# printf STDERR "[%2d-%-2d] %15s - %-15s: %s\n",
printf STDERR "[%d %d] %20s - %-20s %s\n",
$s,$t,$$SrcTok[$s],$$TrgTok[$t],$score;
}
}
}
print STDERR "\n=====================================================\n";
print STDERR "clue matrix $nrSrc x $nrTrg";
print STDERR "\n=====================================================\n";
my @char=();
&MakeCharArr($TrgTok,\@char);
foreach my $c (0..$#char){
printf STDERR "\n%10s",' ';
foreach (@{$char[$c]}){
printf STDERR "%4s",$_;
}
}
print STDERR "\n";
foreach my $s (0..$nrSrc){
my $str=substr($SrcTok->[$s],0,10);
$str=&Uplug::Encoding::convert($str,'utf-8','iso-8859-1');
printf STDERR "%10s",$str;
foreach my $t (0..$nrTrg){
my $score=0;
if ($Matrix->[$s]){
if ($Matrix->[$s]->[$t]){
$score=$Matrix->[$s]->[$t];
}
}
printf STDERR " %3d",$score*100;
}
print STDERR "\n";
}
}
sub MakeCharArr{
my ($tok,$char)=@_;
my @lat1=@{$tok};
# my @lat1=();
# foreach (0..$#{$tok}){
# $lat1[$_]=&Uplug::Data::encode($tok->[$_],'utf-8','iso-8859-1');
# }
map ($lat1[$_]=&Uplug::Encoding::convert($lat1[$_],'utf-8','iso-8859-1'),
(0..$#lat1));
my $max=&MaxLength(\@lat1);
foreach my $t (0..$#{$tok}){
my @c=split(//,$lat1[$t]);
foreach (1..$max){
if (@c){
$char->[$max-$_]->[$t]=pop(@c);
# $char->[$max-$_]->[$t]=shift(@c);
}
else{$char->[$max-$_]->[$t]=' ';}
}
}
}
sub MaxLength{
my ($tok)=@_;
my $max=0;
foreach (@{$tok}){
if (length($_)>$max){$max=length($_);}
}
return $max;
}
######### return a true value
1;