The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#-*-perl-*-
####################################################################
# Copyright (C) 2004 Jörg Tiedemann  <joerg@stp.ling.uu.se>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
###########################################################################
# Uplug::Align::Word::Clue
#
#
#
###########################################################################


package Uplug::Align::Word::Clue;

use strict;
# use Time::HiRes qw(time);


use vars qw(@ISA $DEBUG);
use vars qw($INPHRASESONLY $ADJACENTONLY $ADJACENTSCORE $FILLPHRASES);
use vars qw($ALLOWMULTIOVERLAP $PRINTHTML);
# use utf8;
use Uplug::Data;
use Uplug::Align::Word;
use Data::Dumper;
$Data::Dumper::Indent=1;
$Data::Dumper::Terse=1;
$Data::Dumper::Purity=1;

@ISA = qw( Uplug::Align::Word );

$DEBUG = 0;

#---------------------------------
# parameters for add2LinkCluster

$INPHRASESONLY = 0;          # if = 1 --> no links outside of chunks
$ADJACENTONLY = 0;           # if = 1 --> allow only adjacent links
$ADJACENTSCORE = 0;          # if > 0 --> $score >= $neighbor * $ADJACENTSCORE
# $ALLOWMULTIOVERLAP = 0;      # allow overlap with more than 1 link cluster!
# $ADJACENTSCORE = 0.4;
$ADJACENTSCORE = 0;

$FILLPHRASES = 0;            #  ... doesn't work ....



sub new{
    my $class=shift;
    my $self=$class->SUPER::new(@_);
    if (not defined $self->parameter('adjacent_only')){
	$self->setParameter('adjacent_only',$ADJACENTONLY);
    }
    if (not defined $self->parameter('adjacent_score')){
	$self->setParameter('adjacent_score',$ADJACENTSCORE);
    }
    if (not defined $self->parameter('in_phrases_only')){
	$self->setParameter('in_phrases_only',$INPHRASESONLY);
    }
    if (not defined $self->parameter('fill_phrase')){
	$self->setParameter('fill_phrases',$FILLPHRASES);
    }
    if (not defined $self->parameter('allow_multiple_overalps')){
	$self->setParameter('allow_multiple_overlaps',$ALLOWMULTIOVERLAP);
    }
    if (not defined $self->parameter('verbose')){
	$self->setParameter('verbose',$DEBUG);
    }
    else{$DEBUG=$self->parameter('verbose');}

    return $self;
}

sub DESTROY {
    my $self = shift;
}



#===========================================================================
#
# get all link scores and fill the clue matrix
#
#===========================================================================

sub getLinkScores{
    my $self=shift;

    $self->{matrix}=[];
    $self->{links}={};
    my $LinkProb=$self->{matrix};
    my $links=$self->{linkStreams};
    my $SrcTok=$self->{srcToken};
    my $TrgTok=$self->{trgToken};
    my $Param=$self->{param};
    my $data=$self->{data};

    ## prepare clue param hash (reduce checks in the loop below)
    my %ClueParam=%{$Param};
    if (exists $ClueParam{general}){delete $ClueParam{general};}
    if (exists $ClueParam{original}){delete $ClueParam{original};}
    foreach (keys %ClueParam){
	if (ref($ClueParam{$_}) ne 'HASH'){$ClueParam{$_}={};}
	if (not defined $ClueParam{$_}{'score weight'}){
	    $ClueParam{$_}{'score weight'}=$self->defaultClueWeight();
	}
    }

    ## define some variables used in the loop
    my $weight;           # clue combination weight
    my ($src,$trg);       # source and target language tokens
    my $score;            # clue score found for the current pair
    my $time;             # time (for debugging)
    my %search;           # hash of patterns for searching clues
    my $found=Uplug::Data->new;  # clues found
    my @SrcTok;           # positions of the current source
    my @TrgTok;           # and target tokens in the sentence
    my ($s,$t,$x,$y,$p);  # variables for iteration
    my $ScoreComb=$self->parameter('score combination');
    if (not $ScoreComb){$ScoreComb='probabilistic';}

    if ($self->parameter('verbose')){
      print STDERR "\n=====================================================\n";
      print STDERR "matching clue scores";
      print STDERR "\n=====================================================\n";
    }


    ## the following loop takes most of the time!

    foreach $s (sort {$a <=> $b} keys %{$SrcTok}){
	foreach $t (keys %{$TrgTok}){

	    $time=time();

	    ($src,$trg)=($$SrcTok{$s}{general},$$TrgTok{$t}{general});
	    $self->alignIdentical($src,$trg,$s,$t,$LinkProb);

	    ### DEBUG: store search time
	    $self->{identical_score_time}+=time()-$time if ($DEBUG);

	    foreach (keys %ClueParam){

		$time=time();

		$weight=$ClueParam{$_}{'score weight'};
		if ($ClueParam{$_}{'relative position'}){
		    ($src,$trg)=$self->makeRelPosFeature($$SrcTok{$s}{$_},
							 $$TrgTok{$t}{$_});
		}
		else{($src,$trg)=($$SrcTok{$s}{$_},$$TrgTok{$t}{$_});}

		### DEBUG: store search time
		$self->{before_score_time}+=time()-$time if ($DEBUG);

		$score=0;

		#---------------------------------------
		# length difference as  scores ...
		#---------------------------------------

		if ($ClueParam{$_}{'string length difference'}){
		    $score=$data->lengthQuotient($src,$trg);
		}

		#---------------------------------------
		# otherwise: search scores in link-DB
		#---------------------------------------

		else{
		    if (not defined $links->{$_}){next;}
		    if (defined($src) and defined($trg)){
			%search=('source' => $src,
				 'target' => $trg);
			$time=time();
			if ($links->{$_}->select($found,\%search)){
			    $score=$found->attribute('score');
			}
			### DEBUG: store search time
			$self->{search_score_time}+=time()-$time if ($DEBUG);
		    }
		}

		$time=time();

		#---------------------------------------
		# set weighted score in score matrix
		#---------------------------------------

		if (not $score){next;}
		if (not $data->checkPairParameter($src,$trg,$ClueParam{$_})){
		    ### DEBUG: store search time
		    $self->{after_score_time}+=time()-$time if ($DEBUG);
		    next;
		}

		if (exists $ClueParam{$_}{'minimal score'}){
		    if ($score<$ClueParam{$_}{'minimal score'}){
			### DEBUG: store search time
			$self->{after_score_time}+=time()-$time if ($DEBUG);
			next;
		    }
		}

		$score*=$weight;

		# shouldn't be >1, but in case ...
		#--------------------------------
		if ($score>=1){$score=0.999999999999;}
		#--------------------------------

		if ($self->parameter('verbose')){
#		    printf STDERR "%20s [ %s %s ] %15s - %-15s %s\n",
		    printf STDERR "$_\t$s\t$t\t$src\t$trg\t$score\n";
		}

		@SrcTok=split(/:/,$s);
		@TrgTok=split(/:/,$t);

		foreach $x (@SrcTok){
		    foreach $y (@TrgTok){

#			if ($self->parameter('verbose')){
#			    printf STDERR "%20s [%d %d] %15s - %-15s %s\n",
#			    $_,$x,$y,$src,$trg,$score;
#			}

			if ($ScoreComb eq 'addition'){
			    $$LinkProb[$x][$y]+=$score;
			}
#
# log-linear and multiplication are useless!
# * there's not always a positive score for each possible pair! 
#   --> multiplications with one factor = 0 --> score = 0
#   --> leaving out zero-factors -> implicit penalty for pairs with multiple
#                                   clue scores
#
#			elsif ($ScoreComb eq 'log-linear'){
#			    $$LinkProb[$x][$y]+=log($score);
#			}
#			elsif ($ScoreComb eq 'multiplication'){
#			    $$LinkProb[$x][$y]+=log($score);
#			}
			else{
			    $p=$$LinkProb[$x][$y];
			    $$LinkProb[$x][$y]=$p+$score-$p*$score;
			}
		    }
		}

		### DEBUG: store search time
		$self->{after_score_time}+=time()-$time if ($DEBUG);

	    }
	}
    }

    $time=time();
    $self->align1x($LinkProb);

#    if ($ScoreComb eq 'log-linear'){              # special for log-linear:
#	foreach $x (0..$#{$LinkProb}){            # reverse log (make positiv
#	    foreach $y (0..$#{$$LinkProb[$x]}){   # score values)
#		$$LinkProb[$x][$y]=exp($$LinkProb[$x][$y]);
#	    }
#	}
#    }

    if ($self->parameter('verbose')){
	$self->printClueMatrix($self->{token}->{source},
			       $self->{token}->{target},
			       $self->{matrix});

	$self->printBitextTokensWithID();

#	$self->printBitextToken($self->{token}->{source},
#				$self->{token}->{target});

    }
    ### DEBUG: store search time
    $self->{'1x_score_time'}+=time()-$time if ($DEBUG);
}


#===========================================================================
#
# search for the best alignment using the clue matrix scores
#
#   topLinkSearch ........ iteratively add top links to link clusters
#   nextBestSearch ....... score = distance to next best link (+topLinkSearch)
#   oneOneFirstSearch .... non-overlapping first, overlapping then
#   competitiveSearch .... competitive linking (1:1 links only!)
#   bidirectionalRefineSearch  intersection of directional links + overlapping
#   directionalSrcSearch ..... best alignment source --> target
#   directionalTrgSearch ..... best alignment target --> source
#   bidirectionalUnion ....... union of directionalSrc & directionalTrg
#   bidirectionalIntersection  intersection of directionalSrc & directionalTrg
#
# parameter search: nextbest ........ nextBestSearch
#                   oneone....... ... oneOneFirstSearch
#                   competitive ..... competitiveSearch
#                   myrefined ....... bidirectionalRefinedSearch
#                   och ............. bidirectionalRefinedSearchOch
#                   src ............. directionalSrcSearch
#                   trg ............. directionalTrgSearch
#                   union ........... bidirectionalUnion
#                   intersection .... bidirectionalIntersection
#                   <default> ....... topLinkSearch
#
#===========================================================================


sub findAlignment{
    my $self=shift;
    $self->{links}={};
    my $minScore=$self->scoreThreshold();
    my $method=$self->parameter('search');

    if ($method=~/nextbest/){
	return $self->nextBestSearch($self->{links},$minScore);}
    elsif ($method=~/competitive/){
	return $self->competitiveSearch($self->{links},$minScore);}
    elsif ($method=~/oneone/){
	return $self->oneOneFirstSearch($self->{links},$minScore);}
    elsif ($method=~/myrefined/){
	return $self->bidirectionalRefinedSearch($self->{links},$minScore);}
    elsif ($method=~/(och|refined)/){
	return $self->bidirectionalRefinedSearchOch($self->{links},$minScore);}
    elsif ($method=~/src/){
	return $self->directionalSrcSearch($self->{links},$minScore);}
    elsif ($method=~/trg/){
	return $self->directionalTrgSearch($self->{links},$minScore);}
    elsif ($method=~/union/){
	return $self->bidirectionalUnion($self->{links},$minScore);}
    elsif ($method=~/intersection/){
	return $self->bidirectionalIntersection($self->{links},$minScore);}
    else{
	return $self->topLinkSearch($self->{links},$minScore);}
}





#===========================================================================
# add scores to the clue matrix for 
# sentence alignments with only 1 word in either source or target
#===========================================================================


sub align1x{
    my $self=shift;
    my ($LinkProb)=@_;
    my $Align11=$self->parameter('align 1:1');
    my $Align1x=$self->parameter('align 1:x');
    if ($Align11 and ($#{$LinkProb}==0)){
	if ($#{$$LinkProb[0]}==0){
	    my $p=$$LinkProb[0][0];
	    $$LinkProb[0][0]=$p+$Align11-$p*$Align11;
	    return;
	}
    }
    if ($Align1x and ($#{$LinkProb}==0)){
	foreach (0..$#{$$LinkProb[0]}){
	    my $p=$$LinkProb[0][$_];
	    $$LinkProb[0][$_]=$p+$Align1x-$p*$Align1x;
	}
	return;
    }
    if ($Align1x){
	my $ok=1;
	foreach (0..$#{$LinkProb}){
	    if ($#{$$LinkProb[$_]}!=0){$ok=0;}
	}
	if ($ok){
	    foreach (0..$#{$LinkProb}){
		my $p=$$LinkProb[$_][0];
		$$LinkProb[$_][0]=$p+$Align1x-$p*$Align1x;
	    }
	}
    }
}

#===========================================================================
# add scores to the clue matrix for
# pairs of identical tokens with at least one non-alphabetical character
# (hard-coded as /[^A-Za-z]/ !!!!!!)
#===========================================================================

sub alignIdentical{
    my $self=shift;
    my $AlignIdentical=$self->parameter('align identical');
    if (not $AlignIdentical){return;}
    my ($src,$trg,$s,$t,$LinkProb)=@_;
    if ($src=~/[^A-Za-z]/){
	if ($src eq $trg){
	    my @SrcTok=split(/:/,$s);
	    my @TrgTok=split(/:/,$t);
	    foreach my $x (@SrcTok){
		foreach my $y (@TrgTok){
		    my $p=$$LinkProb[$x][$y];
		    $$LinkProb[$x][$y]=$p+$AlignIdentical-$p*$AlignIdentical;
		}
	    }
	}
    }
}




#===========================================================================
#
# topLinkSearch:
#   1) search best link in the matrix
#   2) add link to link clusters
#   3) continue with 1) until finished
#
#===========================================================================

sub topLinkSearch{
    my $self=shift;
    my $Links=shift;
    my $MinScore=shift;

    my $LinkProb=$self->{matrix};
    my $Token=$self->{token};
    my $TokenAttr=$self->{tokenAttr};

    my @SrcLinks;
    my @TrgLinks;
    my $NrSrc=$#{$$Token{source}};
    my $NrTrg=$#{$$Token{target}};

    my @LinkMatrix;
    my @LinkCluster;
    my ($x,$y);


    # ----------------------------

#    print STDERR "---------new sentence-------$MinScore-------\n";
    undef $self->{SORTEDLINKS};

    $self->cloneLinkMatrix($LinkProb,\@LinkMatrix);   # clone the matrix

    while (($x,$y)=$self->getTopLink(\@LinkMatrix,$MinScore)){
#	print STDERR "$x:$y:$LinkMatrix[$x][$y]\n";
	if ($MinScore=~/\%/){
	    $MinScore=$LinkMatrix[$x][$y]*$MinScore/100;
#	    print STDERR "## minscore == $MinScore\n";
	}
	if (not defined($LinkMatrix[$x][$y])){last;}
	if ($LinkMatrix[$x][$y]<$MinScore){last;}

	if ($self->add2LinkCluster($x,$y,\@LinkCluster)){
	    $LinkMatrix[$x][$y]=0;
	}
    }


    # ----------------------------
    # get the links from the set of link clusters

    $self->getClusterLinks(\@LinkCluster,$Links);     # get links
}




#===========================================================================
#
# nextBestSearch:
#    1) find score distance to "next best link" for each word pair
#    2) call topLinkSearch
#
#===========================================================================



sub nextBestSearch{
    my $self=shift;
    my $LinkProb=$self->{matrix};
    $self->nextBestMatrix($LinkProb);
    return $self->topLinkSearch(@_);
}


sub nextBestMatrix{
    my $self=shift;
    my ($LinkProb)=@_;

    my @SortedColumns=();
    my @SortedRows=();

    my $sizeX=$#{$LinkProb};
    my $sizeY=$#{$$LinkProb[0]};

    foreach my $x (0..$sizeX){
	@{$SortedColumns[$x]}=
	    sort {$$LinkProb[$x][$b] <=> $$LinkProb[$x][$a]} (0..$sizeY);
    }
    foreach my $y (0..$sizeY){
	@{$SortedRows[$y]}=
	    sort {$$LinkProb[$b][$y] <=> $$LinkProb[$a][$y]} (0..$sizeX);
    }

    my @LinkMatrix=();
    $self->cloneLinkMatrix($LinkProb,\@LinkMatrix);

    my $lowest=0;

    foreach my $x (0..$sizeX){
	foreach my $y (0..$sizeY){
	    my $NextBestY=$SortedColumns[$x][0];
	    if ($NextBestY==$y){$NextBestY=$SortedColumns[$x][1];}
	    my $NextBestX=$SortedRows[$y][0];
	    if ($NextBestX==$x){$NextBestX=$SortedRows[$y][1];}
	    my $NextBest=$LinkMatrix[$NextBestX][$y];
	    if ($LinkMatrix[$x][$NextBestY]>$NextBest){
		$NextBest=$LinkMatrix[$x][$NextBestY];
	    }
	    $$LinkProb[$x][$y]-=$NextBest;
	    if ($$LinkProb[$x][$y]<$lowest){
		$lowest=$$LinkProb[$x][$y];
	    }
	}
    }
    foreach my $x (0..$sizeX){               # normalize!
	foreach my $y (0..$sizeY){           # no negative values
	    $$LinkProb[$x][$y]-=$lowest;     # in the matrix!
	}
    }
    if ($self->parameter('verbose')){
	$self->printClueMatrix($self->{token}->{source},
			       $self->{token}->{target},
			       $LinkProb);
    }
}



#===========================================================================
#
# oneOneFirstSearch:
#    1) find all one-to-one word links first (non-overlapping links)
#    2) add iteratively overlapping links
#
#===========================================================================

sub oneOneFirstSearch{
    my $self=shift;
    my $Links=shift;
    my $MinScore=shift;

    my $LinkProb=$self->{matrix};
    my $Token=$self->{token};
    my $TokenAttr=$self->{tokenAttr};

    my @SrcLinks;
    my @TrgLinks;
    my $NrSrc=$#{$$Token{source}};
    my $NrTrg=$#{$$Token{target}};

    my @LinkMatrix;
    my @LinkCluster;
    my ($x,$y);


    # ----------------------------
    # 1) get all word-to-word links without any overlaps

    $self->cloneLinkMatrix($LinkProb,\@LinkMatrix);   # clone the matrix

    while (($x,$y)=$self->getTopLink(\@LinkMatrix,$MinScore)){
	if ($MinScore=~/\%/){
	    $MinScore=$LinkMatrix[$x][$y]*$MinScore/100;
	    print STDERR "## minscore == $MinScore\n";
	}
	if ($LinkMatrix[$x][$y]<$MinScore){last;}
	my @overlap=$self->findClusterOverlap($x,$y,\@LinkCluster);

	if (not @overlap){
	    $LinkCluster[$#LinkCluster+1]={};
	    $LinkCluster[-1]{src}{$x}=1;
	    $LinkCluster[-1]{trg}{$y}=1;
	}
	$LinkMatrix[$x][$y]=0;
    }

    # ----------------------------
    # 2) do it again --> find overlapping links!

    $self->cloneLinkMatrix($LinkProb,\@LinkMatrix);   # clone the matrix

    while (($x,$y)=$self->getTopLink(\@LinkMatrix,$MinScore)){
	if ($LinkMatrix[$x][$y]<$MinScore){last;}
	$self->add2LinkCluster($x,$y,\@LinkCluster);
	$LinkMatrix[$x][$y]=0;
    }


    # ----------------------------
    # get the links from the set of link clusters

    $self->getClusterLinks(\@LinkCluster,$Links);     # get links
}




#===========================================================================
# ------------------  directional alignment (source to target) ----------------
#===========================================================================

sub directionalSrcSearch{
    my $self=shift;
    my $Links=shift;
    my $MinScore=shift;
    my $competitive=shift;

    my @LinkCluster;
    my ($x,$y);

    my @SrcLinks=$self->bestSrcLinks($MinScore,$competitive);

    foreach (0..$#SrcLinks){
	if ((defined $SrcLinks[$_]) and 
	    ($SrcLinks[$_] > 0)){
	    $self->add2LinkCluster($_,$SrcLinks[$_],\@LinkCluster);
	}
    }

    $self->getClusterLinks(\@LinkCluster,$Links);
}

#===========================================================================
# ------------------  directional alignment (target to source ) ---------------
#===========================================================================

sub directionalTrgSearch{
    my $self=shift;
    my $Links=shift;
    my $MinScore=shift;
    my $competitive=shift;

    my @LinkCluster;
    my ($x,$y);

    my @TrgLinks=$self->bestTrgLinks($MinScore,$competitive);
    foreach (0..$#TrgLinks){
	if ((defined $TrgLinks[$_]) and 
	    ($TrgLinks[$_] > 0)){
	    $self->add2LinkCluster($TrgLinks[$_],$_,\@LinkCluster);
	}
    }

    $self->getClusterLinks(\@LinkCluster,$Links);
}


#===========================================================================
# competitive linking
#   1) get best word-to-word link (s,t)
#   2) remove alternative links for (s) and for (t)
#   3) go to 1) until finished
#===========================================================================

sub competitiveSearch{
    my $self=shift;
    my $Links=shift;
    my $MinScore=shift;
    if (not defined $MinScore){
	$MinScore=0.00000000000001;
    }

    my $Token=$self->{token};
    my $NrSrc=$#{$$Token{source}};
    my $NrTrg=$#{$$Token{target}};

    my @WordLinks=();
    if ($NrTrg>$NrSrc){
	return $self->directionalTrgSearch($Links,$MinScore,1);
    }
    return $self->directionalSrcSearch($Links,$MinScore,1);
}

#===========================================================================
# refined symmetric link search a la Och&Ney
#
#===========================================================================

sub bidirectionalRefinedSearchOch{
    my $self=shift;
    my $Links=shift;
    my $MinScore=shift;
    my $competitive=shift;
    if (not defined $MinScore){
	$MinScore=0.00000000000001;
    }

    my $LinkProb=$self->{matrix};
    my @LinkCluster;
    my %WordLinks=();
    my %InvWordLinks=();
    my ($x,$y);

    #-----------------------------------
    # 1) get directional links

    my @SrcLinks=$self->bestSrcLinks($MinScore,$competitive);
    my @TrgLinks=$self->bestTrgLinks($MinScore,$competitive);

    #-----------------------------------
    # 2) intersection of directional links

    foreach (0..$#SrcLinks){
	if ((defined $SrcLinks[$_]) and
	    ($TrgLinks[$SrcLinks[$_]] eq $_)){
	    $WordLinks{$_}{$SrcLinks[$_]}=1;
	    $InvWordLinks{$SrcLinks[$_]}{$_}=1;
#	    print STDERR "$_ --> $SrcLinks[$_]\n";
	}
    }


    #-----------------------------------
    # 3) add overlapping links
    #    * sort all scores in the matrix
    #    * run through possible links starting with the highest score
    #    * repeat until no more links can be added
    #
    # links (s,t) are added if
    #    * there is no other link for both, s AND t
    #    * or ..the new link is adjacent to another link in source OR target
    #           and thew new link does not create links which have neighbors
    #           in both directions

    my %scores=();
    foreach my $s (0..$#{$LinkProb}){
	foreach my $t (0..$#{$$LinkProb[$s]}){   # put all scores
	    $scores{"$s:$t"}=$$LinkProb[$s][$t]; # in a long list
	}
    }

    my $added=0;
    do{
	$added=0;
	foreach my $pair (sort {$scores{$b} <=> $scores{$a} } keys %scores){
	    if ($scores{$pair}<$MinScore){last;}
	    my ($s,$t)=split(/\:/,$pair);

	    if (((not defined $WordLinks{$s}) or      # if no other links
		 (not keys %{$WordLinks{$s}})) and    # are defined for both,
		((not defined $InvWordLinks{$t}) or   # source AND target
		 (not keys %{$InvWordLinks{$t}}))){   # word:
		$added++;
		$scores{$pair}=0;                     # add the link
		$WordLinks{$s}{$t}=1;
		$InvWordLinks{$t}{$s}=1;
#		print STDERR "add $s --> $t (new)\n";
	    }
	    elsif ((($s>0) and 
		    (defined $WordLinks{$s-1}{$t})) or    # the link has a
		   (defined $WordLinks{$s+1}{$t}) or      # vertical neighbor
		   (($t>0) and 
		    (defined $WordLinks{$s}{$t-1})) or    # or a
		   (defined $WordLinks{$s}{$t+1})){       # horizontal neighbor

		$InvWordLinks{$t}{$s}=1;
		$WordLinks{$s}{$t}=1;                     # if there are
		if (&CheckWordLinks(\%WordLinks,          # no links with
				    \%InvWordLinks)){     # neighbors in both
		    $added++;                             # dimensions! -->
		    $scores{$pair}=0;                     # add the new link
#		    print STDERR "add $s --> $t (adj)\n";
		}
		else{                                     # else:
		    delete $WordLinks{$s}{$t};            # delete the link
		    delete $InvWordLinks{$t}{$s};
#		    print STDERR "reject $s --> $t\n";
		}
	    }
	}
    }
    until (not $added);      # repeat as long as links are added!

    $self->setParameter('adjacent_only',0);
    $self->setParameter('adjacent_score',0);

    foreach my $s (keys %WordLinks){                      # put word-to-word
	foreach my $t (keys %{$WordLinks{$s}}){           # links together
	    $self->add2LinkCluster($s,$t,\@LinkCluster);  # (link clusters)
	}
    }


    #-----------------------------------
    # 4) convert link cluster to word/phrase links

    $self->getClusterLinks(\@LinkCluster,$Links);
}

#-------------------------------------------------------------------------
# check if there are alignments containing horicontal AND vertical links
# (---> return 0 if there are such links!)

sub CheckWordLinks{
    my $srclinks=shift;
    my $trglinks=shift;

    foreach my $s (keys %{$srclinks}){
	foreach my $t (keys %{$$srclinks{$s}}){
	    if (keys %{$$srclinks{$s}} > 1){
		if (keys %{$$trglinks{$t}} > 1){
		    return 0;
		}
	    }
	}
    }
    return 1;
}


#===========================================================================
# symmetric alignment (bi-directional)
#   1) get links in both directions
#   2) get intersection of links
#   3) iteratively add new links to existing link clusters
#===========================================================================


sub bidirectionalRefinedSearch{
    my $self=shift;
    my $Links=shift;
    my $MinScore=shift;
    my $competitive=shift;
    if (not defined $MinScore){
	$MinScore=0.00000000000001;
    }

    my $LinkProb=$self->{matrix};
    my @LinkCluster;
    my ($x,$y);

    #-----------------------------------
    # 1) get directional links

    my @SrcLinks=$self->bestSrcLinks($MinScore,$competitive);
    my @TrgLinks=$self->bestTrgLinks($MinScore,$competitive);

    #-----------------------------------
    # 2) intersection of directional links

    foreach (0..$#SrcLinks){
	if ((defined $SrcLinks[$_]) and
	    ($TrgLinks[$SrcLinks[$_]] eq $_)){
	    $self->add2LinkCluster($_,$SrcLinks[$_],
				   \@LinkCluster);  # (link clusters)
	}
    }


    #-----------------------------------
    # 3) add overlapping links
    #    * sort all scores in the matrix
    #    * run through possible links starting with the highest score
    #    * repeat until no more links can be added
    #
    # links (s,t) are added if
    #    * there is no other link for both, s AND t
    #    * or ..the new link is adjacent to another link in source OR target
    #           and thew new link does not create links which have neighbors
    #           in both directions

    my %scores=();
    foreach my $s (0..$#{$LinkProb}){
	foreach my $t (0..$#{$$LinkProb[$s]}){   # put all scores
	    $scores{"$s:$t"}=$$LinkProb[$s][$t]; # in a long list
	}
    }

    my $added=0;
    do{
	$added=0;
	foreach my $pair (sort {$scores{$b} <=> $scores{$a} } keys %scores){
	    if ($scores{$pair}<$MinScore){last;}
	    my ($s,$t)=split(/\:/,$pair);

	    if ($self->add2LinkCluster($s,$t,\@LinkCluster)){
		$added++;
		delete $scores{$pair};
	    }
	}
    }
    until (not $added);      # repeat as long as links are added!

    #-----------------------------------
    # 4) convert link cluster to word/phrase links

    $self->getClusterLinks(\@LinkCluster,$Links);
}




# ------------------  bi-directional alignment (union) ------------------
#
# union of links in both diretions
#

sub bidirectionalUnion{
    my $self=shift;
    my $Links=shift;
    my $MinScore=shift;
    my $competitive=shift;

    my @LinkCluster;
    my ($x,$y);

    my @SrcLinks=$self->bestSrcLinks($MinScore,$competitive);
    foreach (0..$#SrcLinks){
	if (defined $SrcLinks[$_]){
	    $self->add2LinkCluster($_,$SrcLinks[$_],\@LinkCluster);
	}
    }
    my @TrgLinks=$self->bestTrgLinks($MinScore,$competitive);
    foreach (0..$#TrgLinks){
	if (defined $TrgLinks[$_]){
	    $self->add2LinkCluster($TrgLinks[$_],$_,\@LinkCluster);
	}
    }

    $self->getClusterLinks(\@LinkCluster,$Links);
}


# ------------------  bi-directional alignment (intersection) -------------
#
# intersection of links in both directions
#

sub bidirectionalIntersection{
    my $self=shift;
    my $Links=shift;
    my $MinScore=shift;
    my $competitive=shift;

    my @LinkCluster;
    my ($x,$y);

    my @SrcLinks=$self->bestSrcLinks($MinScore,$competitive);
    my @TrgLinks=$self->bestTrgLinks($MinScore,$competitive);

    foreach (0..$#SrcLinks){
	if ((defined $SrcLinks[$_]) and
	    ($TrgLinks[$SrcLinks[$_]] eq $_)){
	    $self->add2LinkCluster($_,$SrcLinks[$_],\@LinkCluster);
	    $SrcLinks[$_]=undef;
	    $TrgLinks[$SrcLinks[$_]]=undef;
	}
    }

    $self->getClusterLinks(\@LinkCluster,$Links);
}











# ------------------------------------
# get best links from source to target words

sub bestSrcLinks{
    my $self=shift;
    my $MinScore=shift;                 # score threshold
    my $competitive=shift;              # enable/disable competive linking

    if ($competitive){
	return $self->competitiveSrcLinks($MinScore,@_);
    }

    my $LinkProb=$self->{matrix};
    my $Token=$self->{token};
    my $NrSrc=$#{$$Token{source}};
    my $NrTrg=$#{$$Token{target}};

    my @Links=();

    # ----------------------------
    my @LinkMatrix=();
    $self->cloneLinkMatrix($LinkProb,\@LinkMatrix);
    # ----------------------------

    foreach my $s (0..$NrSrc){
	my $bestLink=0;
	my $bestScore=$LinkMatrix[$s][$bestLink];
	foreach my $t (1..$NrTrg){
	    if ($LinkMatrix[$s][$t]>$bestScore){
		$bestLink=$t;
		$bestScore=$LinkMatrix[$s][$bestLink];
	    }
	}
	if ($LinkMatrix[$s][$bestLink]<$MinScore){next;}
#	if ($LinkMatrix[$s][$bestLink]<$MinScore){last;}
	$Links[$s]=$bestLink;
    }

    return @Links;
}

# ------------------------------------
# get best links from target to source words

sub bestTrgLinks{
    my $self=shift;
    my $MinScore=shift;                 # score threshold
    my $competitive=shift;              # enable/disable competive linking

    if ($competitive){
	return $self->competitiveTrgLinks($MinScore,@_);
    }

    my $LinkProb=$self->{matrix};
    my $Token=$self->{token};
    my $NrSrc=$#{$$Token{source}};
    my $NrTrg=$#{$$Token{target}};

    my @Links=();

    # ----------------------------
    my @LinkMatrix=();
    $self->cloneLinkMatrix($LinkProb,\@LinkMatrix);
    # ----------------------------

    foreach my $t (0..$NrTrg){
	my $bestLink=0;
	my $bestScore=$LinkMatrix[$bestLink][$t];
	foreach my $s (1..$NrSrc){
	    if ($LinkMatrix[$s][$t]>$bestScore){
		$bestLink=$s;
		$bestScore=$LinkMatrix[$bestLink][$t];
	    }
	}
	if ($LinkMatrix[$bestLink][$t]<$MinScore){next;}
#	if ($LinkMatrix[$bestLink][$t]<$MinScore){last;}
	$Links[$t]=$bestLink;
    }
    return @Links;
}


# ------------------------------------
# competitive linking from source to target


sub competitiveSrcLinks{
    my $self=shift;
    my $MinScore=shift;                 # score threshold

    my $LinkProb=$self->{matrix};
    my $Token=$self->{token};
    my $NrSrc=$#{$$Token{source}};
    my $NrTrg=$#{$$Token{target}};
    my @Links=();

    # ----------------------------
    my @LinkMatrix=();
    $self->cloneLinkMatrix($LinkProb,\@LinkMatrix);
    # ----------------------------

    my ($s,$t);
    while (($s,$t)=$self->getTopLink(\@LinkMatrix,$MinScore)){
	if ($LinkMatrix[$s][$t]<$MinScore){next;}
	$LinkMatrix[$s][$t]=0;

	$Links[$s]=$t;

	foreach my $x (0..$NrSrc){$LinkMatrix[$x][$t]=0;}
	foreach my $x (0..$NrTrg){$LinkMatrix[$s][$x]=0;}
    }
    return @Links;
}

# ------------------------------------
# competitive linking from target to source

sub competitiveTrgLinks{
    my $self=shift;
    my $MinScore=shift;                 # score threshold

    my $LinkProb=$self->{matrix};
    my $Token=$self->{token};
    my $NrSrc=$#{$$Token{source}};
    my $NrTrg=$#{$$Token{target}};
    my @Links=();

    # ----------------------------
    my @LinkMatrix=();
    $self->cloneLinkMatrix($LinkProb,\@LinkMatrix);
    # ----------------------------

    my ($s,$t);
    while (($s,$t)=$self->getTopLink(\@LinkMatrix,$MinScore)){
	if ($LinkMatrix[$s][$t]<$MinScore){next;}
	$LinkMatrix[$s][$t]=0;

	$Links[$t]=$s;

	foreach my $x (0..$NrSrc){$LinkMatrix[$x][$t]=0;}
	foreach my $x (0..$NrTrg){$LinkMatrix[$s][$x]=0;}
    }
    return @Links;
}


#==========================================================================
#
# get the word-to-word link with the highest score from the clue matrix
#
#==========================================================================

sub getTopLink{
    my $self=shift;
    my $LinkProb=shift;
    my $MinScore=shift;

    my $bestX=undef;
    my $bestY=undef;
    my $bestVal;

    if (not ref($self->{SORTEDLINKS})){
	$self->sortLinks($LinkProb,$MinScore);
    }
    my $top=shift @{$self->{SORTEDLINKS}};
    if (not defined $top){
	delete $self->{SORTEDLINKS};
    }
    my @link=split (':',$top);
    return @link;
}

sub sortLinks{
    my $self=shift;
    my $LinkProb=shift;
    my $MinScore=shift;
    $self->{ALLLINKS}={};
    foreach my $x (0..$#{$LinkProb}){
	foreach my $y (0..$#{$$LinkProb[$x]}){
	    if ($$LinkProb[$x][$y]<$MinScore){next;}
	    if ($$LinkProb[$x][$y]<=0){next;}
	    $self->{ALLLINKS}->{"$x:$y"}=$$LinkProb[$x][$y];
	}
    }
    @{$self->{SORTEDLINKS}}=
	sort {$self->{ALLLINKS}->{$b} <=> $self->{ALLLINKS}->{$a}} 
    keys %{$self->{ALLLINKS}};
}

sub getTopLinkOld{
    my $self=shift;
    my $LinkProb=shift;

    my $bestX=undef;
    my $bestY=undef;
    my $bestVal;

    foreach my $x (0..$#{$LinkProb}){
	my @sort = sort {$$LinkProb[$x][$b] <=> $$LinkProb[$x][$a]} 
	                (0..$#{$$LinkProb[$x]});
	if ($$LinkProb[$x][$sort[0]]>$bestVal){
	    $bestVal=$$LinkProb[$x][$sort[0]];
	    $bestX="$x";
	    $bestY="$sort[0]";
	}
    }
    if ((defined $bestX) and (defined $bestY)){
	return ($bestX,$bestY);
    }
    else{
	return ();
    }
}

#==========================================================================
#
# getClusterLinks:
#    make word/phrase links out of link clusters
#    (add all necessary information for storing links, 
#     e.g. token pairs, id's, byte spans)
#
#==========================================================================


sub getClusterLinks{
    my $self=shift;
    my $LinkCluster=shift;
    my $Links=shift;

    my $LinkProb=$self->{matrix};
    my $TokenAttr=$self->{tokenAttr};

    if (ref($Links) ne 'HASH'){$Links={};}

    foreach (0..$#{$LinkCluster}){
	if (keys %{$$LinkCluster[$_]{src}}){
	    if (keys %{$$LinkCluster[$_]{trg}}){
		my $src=join ':',sort {$a<=>$b} keys %{$$LinkCluster[$_]{src}};
		my $trg=join ':',sort {$a<=>$b} keys %{$$LinkCluster[$_]{trg}};
		my $score=$self->getMatrixScore($LinkProb,
						$$LinkCluster[$_]{src},
						$$LinkCluster[$_]{trg});
		my $link=$self->getLinkString($TokenAttr,$src,$trg);

		$$Links{$src}{link}=$link;
		$$Links{$src}{source}=
		    $self->ngramIDs($src,$TokenAttr,'source');
		$$Links{$src}{target}=
		    $self->ngramIDs($trg,$TokenAttr,'target');
#		my $span=$self->ngramSpans($src,$TokenAttr,'source');
#		if ($span){$$Links{$src}{src}=$span;}
#		$span=$self->ngramSpans($trg,$TokenAttr,'target');
#		if ($span){$$Links{$src}{trg}=$span;}
		$$Links{$src}{score}=$score;
	    }
	}
    }
    return $Links;
}


sub getMatrixScore{
    my $self=shift;
    my ($matrix,$src,$trg)=@_;
    my $score=0;
    my $count;
    foreach my $s (keys %{$src}){
	foreach my $t (keys %{$trg}){
	    if ($$matrix[$s][$t]>0){
		$score+=log($$matrix[$s][$t]);
		$count++;
	    }
	}
    }
    if ($count){
	$score/=$count;
    }
    return exp($score);
}


#==========================================================================
#
# add links to link clusters
#
#==========================================================================

sub add2LinkCluster{
    my $self=shift;
    my ($x,$y,$cluster)=@_;
    my @overlap=$self->findClusterOverlap($x,$y,$cluster);
    if ((not $self->parameter('allow_multiple_overlaps')) and (@overlap>1)){
#	print STDERR "disregard $x - $y (multi-overlap)!\n";
	return 0;
    }
    elsif (@overlap){
	if ($self->parameter('in_phrases_only')){
	    if ($self->parameter('fill_phrases')){
		if (not $self->fillPhrases($x,$y,$cluster,$overlap[0])){
#		    print STDERR "disregard $x - $y (fill phrase)!\n";
		    return 0;
		}
	    }
	    if (not $self->isInPhrase($x,$y,$$cluster[$overlap[0]])){
#		print STDERR "disregard $x - $y (not in phrase)!\n";
		return 0;
	    }
	}
	if ($self->parameter('adjacent_only')){
	    if (not $self->isAdjacent($x,$y,$$cluster[$overlap[0]])){
#		print STDERR "disregard $x - $y (not adjacent)!\n";
		return 0;
	    }
	}
	if ($self->parameter('adjacent_score')){
	    if (not $self->isAdjacentScore($x,$y,$$cluster[$overlap[0]],
				      $self->parameter('adjacent_score'))){
#s		print STDERR "disregard $x - $y (score difference to adjacent too big)!\n";
		return 0;
	    }
	}
	$$cluster[$overlap[0]]{src}{$x}=1;
	$$cluster[$overlap[0]]{trg}{$y}=1;
	if (@overlap>1){                              # join all overlapping
	    foreach my $o (1..$#overlap){             # link clusters!
		foreach (keys %{$$cluster[$overlap[$o]]{src}}){
		    delete $$cluster[$overlap[$o]]{src}{$_};
		    $$cluster[$overlap[0]]{src}{$_}=1;
		}
		foreach (keys %{$$cluster[$overlap[$o]]{trg}}){
		    delete $$cluster[$overlap[$o]]{trg}{$_};
		    $$cluster[$overlap[0]]{trg}{$_}=1;
		}
	    }
	}
    }
    else{
	$$cluster[$#{$cluster}+1]={};
	$$cluster[-1]{src}{$x}=1;
	$$cluster[-1]{trg}{$y}=1;
    }
    return 1;
}


sub isInPhrase{
    my $self=shift;
    my ($newX,$newY,$cluster)=@_;
    my @srcAccepted=keys %{$self->{srcToken}};
    my @trgAccepted=keys %{$self->{trgToken}};

    my %src=%{$cluster->{src}};
    my %trg=%{$cluster->{trg}};
    $src{$newX}=1;
    $trg{$newY}=1;

#    my $srcPhr=join ':',sort {$a <=> $b} keys %src;
#    my $trgPhr=join ':',sort {$a <=> $b} keys %trg;

    my $srcPhr=join '(:[0-9]+)?:',sort {$a <=> $b} keys %src;
    my $trgPhr=join '(:[0-9]+)?:',sort {$a <=> $b} keys %trg;

    if (grep(/$srcPhr/,@srcAccepted)){
	if (grep(/$trgPhr/,@trgAccepted)){
#	    my @missing=$self->getMissingTokens(\%src,\%trg);
	    return 1;
	}
    }
    return 0;
}

sub fillPhrases{
    my $self=shift;
    my ($newX,$newY,$cluster,$nr)=@_;

    my %link=();
    %{$link{src}}=%{$cluster->[$nr]->{src}};
    %{$link{trg}}=%{$cluster->[$nr]->{trg}};
    $link{src}{$newX}=1;
    $link{trg}{$newY}=1;

    my @missing=$self->getMissingTokens($link{src},$link{trg});
    if (not @missing){
	return 0;
    }
    my @missSrc=split(/:/,$missing[0]);
    my @missTrg=split(/:/,$missing[1]);
    my %overlap=();
    foreach my $s (@missSrc){
	$self->findSrcOverlap($s,$cluster,\%overlap);
	$link{src}{$s}=1;
    }
    foreach my $t (@missTrg){
	$self->findTrgOverlap($t,$cluster,\%overlap);
	$link{trg}{$t}=1;
    }
    foreach (keys %overlap){
	if (not $self->isIncluded($cluster->[$_],\%link)){
	    foreach (@missSrc){delete $link{src}{$_};}
	    foreach (@missTrg){delete $link{trg}{$_};}
	    return 0;
	}

#############  !!!!!!!!!!!!!! change this:
	print STDERR "delete cluster $_!\n";
	$cluster->[$_]->{src}=();   
	$cluster->[$_]->{trg}=();
#############  !!!!!!!!!!!!!! change this:

    }

    if (@missSrc or @missTrg){                  # ... just for information
	print STDERR "fill cluster $nr with missing tokens!\n";
    }

    foreach (keys %{$link{src}}){
	$cluster->[$nr]->{src}->{$_}=1;
    }
    foreach (keys %{$link{trg}}){
	$cluster->[$nr]->{trg}->{$_}=1;
    }
    return 1;
}


#sub removeClusterInclusions{
#    my $self=shift;
#    my $cluster=shift;
#    foreach my $c (@{$cluster}){
#	my $src=join '(:[0-9]+)?:',sort {$a <=> $b} keys %{$$cluster[$c]{src}};
#	my $trg=join '(:[0-9]+)?:',sort {$a <=> $b} keys %{$$cluster[$c]{trg}};
#    }
#}


sub isIncluded{
    my $self=shift;
    my ($cluster1,$cluster2)=@_;
    foreach (keys %{$cluster1->{src}}){
	if (not defined $cluster2->{src}->{$_}){return 0;}
    }
    foreach (keys %{$cluster1->{trg}}){
	if (not defined $cluster2->{trg}->{$_}){return 0;}
    }
    return 1;
}

sub findSrcOverlap{
    my $self=shift;
    return $self->findOverlap('src',@_);
}
sub findTrgOverlap{
    my $self=shift;
    return $self->findOverlap('trg',@_);
}

sub findOverlap{
    my $self=shift;
    my ($lang,$token,$cluster,$overlap)=@_;
    my @c=grep (defined $$cluster[$_]{$lang}{$token},0..$#{$cluster});
    foreach (@c){
	$$overlap{$_}=1;
    }
}

sub getMissingTokens{
    my $self=shift;
    my ($src,$trg)=@_;
    my @srcAccepted=keys %{$self->{srcToken}};
    my @trgAccepted=keys %{$self->{trgToken}};

    my $srcPhr=join '(:[0-9]+)?:',sort {$a <=> $b} keys %{$src};
    my $trgPhr=join '(:[0-9]+)?:',sort {$a <=> $b} keys %{$trg};

    my $missingSrc=undef;
    my $missingTrg=undef;

    my @match;

    if (@match=grep(/$srcPhr/,@srcAccepted)){

	@match=sort {length($a) <=> length($b)} @match;
	if ($match[0]=~/^(.*)$srcPhr(.*)$/){
	    $missingSrc="$1$2$3$4$5$6$7$8$9";
	}

	if (@match=grep(/$trgPhr/,@trgAccepted)){

	    @match=sort {length($a) <=> length($b)} @match;
	    if ($match[0]=~/^(.*)$trgPhr(.*)$/){
		$missingTrg="$1$2$3$4$5$6$7$8$9";
	    }
	    $missingSrc=~s/^://;$missingSrc=~s/:$//;
	    $missingTrg=~s/^://;$missingTrg=~s/:$//;
	    return ($missingSrc,$missingTrg);
	}
    }

    return ();
}


sub isAdjacent{
    my $self=shift;
    my ($x,$y,$cluster)=@_;
    if ((defined $$cluster{src}{$x}) and
	((defined $$cluster{trg}{$y-1}) or
	 ((defined $$cluster{trg}{$y+1})))){
	return 1;
    }
    if ((defined $$cluster{trg}{$y}) and
	((defined $$cluster{src}{$x-1}) or
	 ((defined $$cluster{src}{$x+1})))){
	return 1;
    }
    return 0;
}

sub isAdjacentScore{
    my $self=shift;
    my ($x,$y,$cluster,$p)=@_;

    if ((defined $$cluster{src}{$x}) and
	(defined $$cluster{trg}{$y-1})){
	if ($self->{matrix}->[$x]->[$y]>=$self->{matrix}->[$x]->[$y-1]*$p){
	    return 1;
	}
	return 0;
    }
    if ((defined $$cluster{src}{$x}) and
	(defined $$cluster{trg}{$y+1})){
	if ($self->{matrix}->[$x]->[$y]>=$self->{matrix}->[$x]->[$y+1]*$p){
	    return 1;
	}
	return 0;
    }
    if ((defined $$cluster{src}{$x-1}) and
	(defined $$cluster{trg}{$y})){
	if ($self->{matrix}->[$x]->[$y]>=$self->{matrix}->[$x-1]->[$y]*$p){
	    return 1;
	}
	return 0;
    }
    if ((defined $$cluster{src}{$x+1}) and
	(defined $$cluster{trg}{$y})){
	if ($self->{matrix}->[$x]->[$y]>=$self->{matrix}->[$x+1]->[$y]*$p){
	    return 1;
	}
	return 0;
    }
    return 0;
}


sub findClusterOverlap{
    my $self=shift;
    my ($x,$y,$cluster)=@_;
    my @overlap=();
    foreach (0..$#{$cluster}){
	if (defined $$cluster[$_]{src}{$x}){
	    push(@overlap,$_);
	}
	elsif (defined $$cluster[$_]{trg}{$y}){
	    push(@overlap,$_);
	}
    }
    return @overlap;
}



#========================================================================

sub cloneLinkMatrix{
    my $self=shift;
    my $matrix=shift;
    my $clone=shift;

    if (ref($matrix) ne 'ARRAY'){return ();}
    if (ref($clone) ne 'ARRAY'){$clone=[];}

    foreach my $x (0..$#{$matrix}){
	foreach my $y (0..$#{$$matrix[$x]}){
	    $$clone[$x][$y]=$$matrix[$x][$y];
	}
    }
    return $clone;
}




#==========================================================================
#
#
#
#==========================================================================



sub clueMatrixToHtml{
    my $self=shift;

    my $Matrix=$self->{matrix};
    my $Token=$self->{token};
    my $SrcTok=$$Token{source};
    my $TrgTok=$$Token{target};
    my $nrSrc=$#{$$Token{source}};
    my $nrTrg=$#{$$Token{target}};

    my $max;
    foreach my $s (0..$nrSrc){
        foreach my $t (0..$nrTrg){
            if ($Matrix->[$s]->[$t]>$max){$max=$Matrix->[$s]->[$t];}
        }
    }
    if (not $max){$max=1;}

    my $html="<p>\n";
    $html.="<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\">\n";
    $html.="<tr><th></th>\n";

    foreach my $t (0..$nrTrg){
        my $str=$TrgTok->[$t];
        $html.="<th>$str</th>\n";
    }

    foreach my $s (0..$nrSrc){
        $html.="</tr><tr>\n";
        my $str=$SrcTok->[$s];
        $html.="<th>$str</th>\n";
        foreach my $t (0..$nrTrg){
            my $score=0;
            if ($Matrix->[$s]){
                if ($Matrix->[$s]->[$t]){
                    $score=$Matrix->[$s]->[$t];
                }
            }
            my $color=255-$score*256/$max;
            if ($color==-1){$color=0;}
	    my $hex=sprintf("%X",$color);
	    if (length($hex)<2){$hex="0$hex";}
	    my $val=int(100*$score);
            if ($color<128){
		$html.="<td bgcolor=\"#$hex$hex$hex\">";
		$html.='<font color="#ffffff">';
		$html.="$val</font></td>\n";
	    }
	    else{
		$html.="<td bgcolor=\"#$hex$hex$hex\">";
                $html.="$val</td>\n";
	    }
        }
    }
    $html.="</tr></table><hr>\n";
    return $html;
}



sub printHtmlClueMatrix{
    my $self=shift;
    print STDERR $self->clueMatrixToHtml();
}



sub printClueMatrix{
    my $self=shift;

    my ($SrcTok,$TrgTok,$Matrix)=@_;

    my $nrSrc=$#{$SrcTok};
    my $nrTrg=$#{$TrgTok};


    print STDERR "\n=====================================================\n";
    print STDERR "final clue matrix scores";
    print STDERR "\n=====================================================\n";

    foreach my $s (0..$nrSrc){
	foreach my $t (0..$nrTrg){
	    my $score=$Matrix->[$s]->[$t];
	    if ($score>0){
#		printf STDERR "[%2d-%-2d] %15s - %-15s: %s\n",
		printf STDERR "[%d %d] %20s - %-20s %s\n",
		$s,$t,$$SrcTok[$s],$$TrgTok[$t],$score;
	    }
	}
    }
    print STDERR "\n=====================================================\n";
    print STDERR "clue matrix $nrSrc x $nrTrg";
    print STDERR "\n=====================================================\n";

    my @char=();
    &MakeCharArr($TrgTok,\@char);
    foreach my $c (0..$#char){
	printf STDERR "\n%10s",' '; 
	foreach (@{$char[$c]}){
	    printf STDERR "%4s",$_;
	}
    }

    print STDERR "\n";

    foreach my $s (0..$nrSrc){
	my $str=substr($SrcTok->[$s],0,10);
	$str=&Uplug::Encoding::convert($str,'utf-8','iso-8859-1');

	printf STDERR "%10s",$str; 
	foreach my $t (0..$nrTrg){
	    my $score=0;
	    if ($Matrix->[$s]){
		if ($Matrix->[$s]->[$t]){
		    $score=$Matrix->[$s]->[$t];
		}
	    }
	    printf STDERR " %3d",$score*100;
	}
	print STDERR "\n";
    }
}


sub MakeCharArr{
    my ($tok,$char)=@_;

    my @lat1=@{$tok};

#    my @lat1=();
#    foreach (0..$#{$tok}){
#	$lat1[$_]=&Uplug::Data::encode($tok->[$_],'utf-8','iso-8859-1');
#    }

    map ($lat1[$_]=&Uplug::Encoding::convert($lat1[$_],'utf-8','iso-8859-1'),
	 (0..$#lat1));

    my $max=&MaxLength(\@lat1);
    foreach my $t (0..$#{$tok}){
	my @c=split(//,$lat1[$t]);
	foreach (1..$max){
	    if (@c){
		$char->[$max-$_]->[$t]=pop(@c);
#		$char->[$max-$_]->[$t]=shift(@c);
	    }
	    else{$char->[$max-$_]->[$t]=' ';}
	}
    }
}

sub MaxLength{
    my ($tok)=@_;
    my $max=0;
    foreach (@{$tok}){
	if (length($_)>$max){$max=length($_);}
    }
    return $max;
}





######### return a true value

1;