The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package String::Alignment;

use warnings;
use strict;
use List::Util qw(max min);

=head1 NAME

String::Alignment - Pair Sentence Alignment

=head1 VERSION

Version 0.01

=cut

our $VERSION = '0.01';

=head1 SYNOPSIS

This module process string alignment.
Now it provide two kind of alignment method, Global and Local Alignment.

    use String::Alignment;

    use String::Alignment qw(do_alignment);

    # local alignment
    my $result = do_alignment($s1,$s2,1); 

    # global alignment
    my $result = do_alignment($s1,$s2); 

=head1 EXPORT

=cut 

require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(do_alignment);

=head1 BUILD-IN VARIABLES

=cut 

my ($s1,$s2);	    # string1, string2
my (@sa1, @sa2);    # string array 1, string array 2

my ($len_s1, $len_s2) = (0,0); # length of s1/s2
my $is_local = 1; # 0 for global alignment

my %table;	    # Dynamic Programming Table

my $max_len;	    # for global
my %best;	    # Best path, for local

=head1 FUNCTIONS

=cut 

sub new {
#    print STDERR "I'm loaded\n";
}

sub do_alignment {
    $s1 = shift;
    $s2 = shift;
    $is_local = shift;
    $is_local = 0 unless defined($is_local);
    give_string_pair($s1,$s2);
    calculate_matrix();
#    similarity_print();
    return get_align_result();
}
=head2 give_string_pair

=cut 

sub give_string_pair {
    $s1 = shift;
    $s2 = shift;
    @sa1 = split //,$s1;
    @sa2 = split //,$s2;
    %table = ();
    %best = ();
    $best{MAX} = 0;
    $table{0}{0} = 0;
    ($len_s1, $len_s2) = (0,0);
}

=head2 cululate_matrix

=cut

sub calculate_matrix {
    if ($is_local) {
	$max_len = 0;
    } else {
	$max_len = scalar(@sa1) > scalar(@sa2) ? scalar(@sa1): scalar(@sa2); # for global
    }
#    print STDERR "max_len is ".$max_len."\n";
    while ($len_s1 <= (scalar @sa1)) {
	while ($len_s2 <= scalar @sa2) {
	    my ($candidate1, $candidate2, $candidate3) = ($max_len,$max_len,$max_len);
	    if ($len_s1 > 0 and $len_s2 > 0) {
		# if match, we add 1 for local, 0 for global
		# else (not matched), we add -1 for local, 1 for global
		$candidate1 = int($table{$len_s1-1}{$len_s2-1}) + 
		    (   $is_local ? 1: -1) *
		    ( ( $sa1[$len_s1-1] eq $sa2[$len_s2-1] )? 1+(-1+$is_local) : -1 )
		;
	    }
	    if ($len_s1 > 0) {
		$candidate2 = int($table{$len_s1-1}{$len_s2}) + 
		    ( $is_local ? (-1) : 1);
	    }
	    if ($len_s2 > 0) {
		$candidate3 = int($table{$len_s1}{$len_s2 - 1})  + 
		    ( $is_local ? (-1) : 1);
	    }
#	    print STDERR "setting ($len_s1,$len_s2)...";
#	    print STDERR "(".$candidate1."\t".$candidate2."\t".$candidate3.")\n";
	    if ($is_local) {
		$table{$len_s1}{$len_s2} = max (
		    $candidate1, $candidate2, $candidate3, 0
		) if ($len_s1 > 0 or $len_s2 > 0);
		$best{X} = $len_s1 if $best{MAX} <= $table{$len_s1}{$len_s2};
		$best{Y} = $len_s2 if $best{MAX} <= $table{$len_s1}{$len_s2};
		$best{MAX} = $table{$len_s1}{$len_s2} if $best{MAX} <= $table{$len_s1}{$len_s2};
	    } else { # global
		$table{$len_s1}{$len_s2} = min (
		    $candidate1, $candidate2, $candidate3
		) if ($len_s1 > 0 or $len_s2 > 0);
	    }
	    $len_s2 +=1;
	}
	$len_s2 = 0;
	$len_s1 +=1;
    }
}

=head2 similarity_print

=cut

sub similarity_print {
    print STDERR "\n \t \t".join("\t",@sa2)."\n";
    for my $key (sort {int($a) <=> int($b)}(keys %table)) {
	print STDERR $sa1[$key-1]."\t" if $key > 0;
	print STDERR " \t" unless $key > 0;
	for my $subkey (sort {int($a) <=> int($b)} (keys %{$table{$key}})) {
	    print STDERR $table{$key}{$subkey}."\t";
	}
	print STDERR "\n";
    }
};

=head2 get_align_result

=cut

sub get_align_result {
    my ($i, $j) = (0, 0);
    my (@as1, @as2);
    my $baseline = 0;
    if ($is_local) {
	$i = $best{X};
	$j = $best{Y};
    } else {
	$i = scalar @sa1;
	$j = scalar @sa2;
    }
    while ( $table{$i}{$j} > 0) {
	if ($is_local) { 
	    $baseline = max($table{$i-1}{$j-1},$table{$i-1}{$j},$table{$i}{$j-1});
	} else {
	    $baseline = min($table{$i-1}{$j-1},$table{$i-1}{$j},$table{$i}{$j-1});
	}
	if ($table{$i-1}{$j-1} == $baseline) {
	    push @as1, $sa1[$i-1];
	    push @as2, $sa2[$j-1];
	    $i--;
	    $j--;
	} elsif ($table{$i}{$j-1} == $baseline) {
	    push @as1, "-"; # gap
	    push @as2, $sa2[$j-1];
	    $j--;
	} elsif ($table{$i-1}{$j} == $baseline) {
	    push @as1, $sa1[$i-1];
	    push @as2, "-"; # gap
	    $i--;
	} else {
	    die $!;
	}
    }
    return ( join ("",reverse @as2)."\t".join ("",reverse @as1) );
}

=head1 AUTHOR

Cheng-Lung Sung, C<< <clsung@cpan.org> >>

=head1 BUGS

Please report any bugs or feature requests to
C<bug-string-alignment@rt.cpan.org>, or through the web interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=String-Alignment>.
I will be notified, and then you'll automatically be notified of progress on
your bug as I make changes.

=head1 ACKNOWLEDGEMENTS

=head1 COPYRIGHT & LICENSE

Copyright 2006 Cheng-Lung Sung, All Rights Reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut

1; # End of String::Alignment