The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.

#
# GENERATED WITH PDL::PP! Don't modify!
#
package PDL::EditDistance;

@EXPORT_OK  = qw(   edit_costs  _edit_costs  edit_costs_static  edit_distance_full  _edit_distance_full PDL::PP _edit_distance_full  edit_align_full  _edit_align_full PDL::PP _edit_align_full  edit_distance_static  _edit_distance_static PDL::PP _edit_distance_static  edit_align_static  _edit_align_static PDL::PP _edit_align_static  align_op_insert1 PDL::PP align_op_insert1  align_op_insert2 PDL::PP align_op_insert2  align_op_match PDL::PP align_op_match  align_op_substitute PDL::PP align_op_substitute  align_op_insert  align_op_delete  align_ops  edit_bestpath  _edit_bestpath PDL::PP _edit_bestpath  edit_pathtrace  _edit_pathtrace PDL::PP _edit_pathtrace  edit_lcs  _edit_lcs PDL::PP _edit_lcs  lcs_backtrace  _lcs_backtrace PDL::PP _lcs_backtrace );
%EXPORT_TAGS = (Func=>[@EXPORT_OK]);

use PDL::Core;
use PDL::Exporter;
use DynaLoader;



   $PDL::EditDistance::VERSION = 0.06;
   @ISA    = ( 'PDL::Exporter','DynaLoader' );
   push @PDL::Core::PP, __PACKAGE__;
   bootstrap PDL::EditDistance $VERSION;




use strict;

=pod

=head1 NAME

PDL::EditDistance - Wagner-Fischer edit distance and alignment for PDLs.

=head1 SYNOPSIS

 use PDL;
 use PDL::EditDistance;

 ##-- input PDLs
 $a = pdl([map { ord($_) } qw(G U M B O)]);
 $b = pdl([map { ord($_) } qw(G A M B O L)]);

 $a1 = pdl([0, map { ord($_) } qw(G U M B O)]);
 $b1 = pdl([0, map { ord($_) } qw(G A M B O L)]);

 ##-------------------------------------------------------------
 ## Levenshtein distance
 $dist          = edit_distance_static($a,$b, 0,1,1,1);
 ($dist,$align) = edit_align_static($a,$b, 0,1,1,1);

 ##-------------------------------------------------------------
 ## Wagner-Fischer distance
 @costs         = ($costMatch=0,$costInsert=1,$costDelete=1,$costSubstitute=2);
 $dist          = edit_distance_static($a,$b, @costs);
 ($dist,$align) = edit_align_static($a,$b, @costs);

 ##-------------------------------------------------------------
 ## General edit distance
 $costsMatch = random($a->nelem+1, $b->nelem+1);
 $costsIns   = random($a->nelem+1, $b->nelem+1);
 $costsDel   = random($a->nelem+1, $b->nelem+1);
 $costsSubst = random($a->nelem+1, $b->nelem+1);
 @costs         = ($costsMatch,$costsIns,$costDel,$costsSubst);
 $dist          = edit_distance_full($a,$b,@costs);
 ($dist,$align) = edit_align_full($a,$b,@costs);

 ##-------------------------------------------------------------
 ## Alignment
 $op_match = align_op_match();      ##-- constant
 $op_del   = align_op_insert1();    ##-- constant
 $op_ins   = align_op_insert2();    ##-- constant
 $op_subst = align_op_substitute(); ##-- constant

 ($apath,$bpath,$pathlen) = edit_bestpath($align);
 ($ai,$bi,$ops,$pathlen)  = edit_pathtrace($align);

 ##-------------------------------------------------------------
 ## Longest Common Subsequence
 $lcs = edit_lcs($a,$b);
 ($ai,$bi,$lcslen) = lcs_backtrace($a,$b,$lcs);

=cut







=head1 FUNCTIONS



=cut






=pod

=head2 _edit_pdl

=for sig

  Signature: (a(N); [o]apdl(N+1))

Convenience method.
Returns a pdl $apdl() suitable for representing $a(),
which can be specified as a string, arrays of numbers, or as a PDL.
$apdl(0) is always set to zero.

=cut

sub _edit_pdl {
  return $_[0]->flat->reshape($_[0]->nelem+1)->rotate(1) if (UNIVERSAL::isa($_[0],'PDL'));
  return pdl(byte,[0, map { ord($_) } split(//,$_[0])]) if (!ref($_[0]));
  return pdl([0,@{$_[0]}]);
}





=pod

=head2 edit_costs

=for sig

  Signature: (PDL::Type type; int N; int M;
              [o]costsMatch(N+1,M+1); [o]costsIns(N+1,M+1); [o]costsDel(N+1,M+1); [o]costsSubst(N+1,M+1))

Convenience method.
Ensures existence and proper dimensionality of cost matrices for inputs
of length N and M.

=cut

sub edit_costs {
  return _edit_costs($_[0],$_[1]+1,$_[2]+1,@_[3..$#_]);
}





=pod

=head2 _edit_costs

=for sig

  Signature: (PDL::Type type; int N1; int M1;
              [o]costsMatch(N1,M1); [o]costsIns(N1,M1); [o]costsDel(N1,M1); [o]costsSubst(N1,M1))

Low-level method.
Ensures existence and proper dimensionality of cost matrices for inputs
of length N1-1 and M1-1.

=cut

sub _edit_costs {
  #my ($type,$n1,$m1,$costsMatch,$costsIns,$costsDel,$costsSubst) = @_;
  return (_edit_matrix(@_[0..2],$_[3]),
          _edit_matrix(@_[0..2],$_[4]),
          _edit_matrix(@_[0..2],$_[5]),
          _edit_matrix(@_[0..2],$_[6]));
}

##-- $matrix = _edit_matrix($type,$dim0,$dim1,$mat)
sub _edit_matrix {
  return zeroes(@_[0..2]) if (!defined($_[3]));
  $_[3]->reshape(@_[1,2]) if ($_[3]->ndims != 2 || $_[3]->dim(0) != $_[1] || $_[3]->dim(1) != $_[2]);
  return $_[3]->type == $_[0] ? $_[3] : $_[3]->convert($_[0]);
}




=pod

=head2 edit_costs_static

=for sig

  Signature: (PDL::Type type; int N; int M;
              staticCostMatch(); staticCostIns(); staticCostSubst();
              [o]costsMatch(N+1,M+1); [o]costsIns(N+1,M+1); [o]costsDel(N+1,M+1); [o]costsSubst(N+1,M+1))

Convenience method.

=cut

sub edit_costs_static {
  #my ($type,$n,$m, $cMatch,$cIns,$cDel,$cSubst, $costsMatch,$costsIns,$costsDel,$costsSubst) = @_;
  my @costs = edit_costs(@_[0..2],@_[7..$#_]);
  $costs[$_] .= $_[$_+3] foreach (0..3);
  return @costs;
}




=pod

=head2 edit_distance_full

=for sig

  Signature: (a(N); b(M);
              costsMatch(N+1,M+1); costsIns(N+1,M+1); costsDel(N+1,M+1); costsSubst(N+1,M+1);
              [o]dist(N+1,M+1); [o]align(N+1,M+1))

Convenience method.
Compute the edit distance matrix for inputs $a() and $b(), and
cost matrices $costsMatch(), $costsIns(), $costsDel(), and $costsSubst().
$a() and $b() may be specified as PDLs, arrays of numbers, or as strings.

=cut

sub edit_distance_full {
  return _edit_distance_full(_edit_pdl($_[0]), _edit_pdl($_[1]), @_[2..$#_]);
}





=head2 _edit_distance_full

=for sig

  Signature: (a1(N1); b1(M1); costsMatch(N1,M1); costsIns(N1,M1); costsDel(N1,M1); costsSubst(N1,M1); [o]dist(N1,M1))


Low-level method.
Compute the edit distance matrix for input PDLs $a1() and $b1() and
cost matrices $costsMatch(), $costsIns(), $costsDel(), and $costsSubst().

The first elements of $a1() and $b1() are ignored.


=for bad

_edit_distance_full does not process bad values.
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.


=cut






*_edit_distance_full = \&PDL::_edit_distance_full;




=pod

=head2 edit_align_full

=for sig

  Signature: (a(N); b(M);
              costsMatch(N+1,M+1); costsIns(N+1,M+1); costsDel(N+1,N+1); costsSubst(N+1,M+1);
              [o]dist(N+1,M+1); [o]align(N+1,M+1))

Convenience method.
Compute the edit distance and alignment matrices for inputs $a() and $b(), and
cost matrices $costsMatch(), $costsIns(), $costsDel(), and $costsSubst().
$a() and $b() may be specified as PDLs, arrays of numbers, or as strings.

=cut

sub edit_align_full {
  return _edit_align_full(_edit_pdl($_[0]), _edit_pdl($_[1]), @_[2..$#_]);
}





=head2 _edit_align_full

=for sig

  Signature: (a1(N1); b1(M1); costsMatch(N1,M1); costsIns(N1,M1); costsDel(N1,M1); costsSubst(N1,M1); [o]dist(N1,M1); byte [o]align(N1,M1))


Low-level method.
Compute the edit distance and alignment matrix for input PDLs $a1() and $b1() and
cost matrices $costsMatch(), $costsIns(), $costsDel(), and $costsSubst().

The first elements of $a1() and $b1() are ignored.


=for bad

_edit_align_full does not process bad values.
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.


=cut






*_edit_align_full = \&PDL::_edit_align_full;




=pod

=head2 edit_distance_static

=for sig

  Signature: (a(N); b(M);
              staticCostMatch(); staticCostIns(); staticCostDel(); staticCostSubst();
              [o]dist(N+1,M+1))

Convenience method.
Compute the edit distance matrix for inputs $a() and $b() given
a static cost schema @costs = ($staticCostMatch(), $staticCostIns(), $staticCostDel(), and $staticCostSubst()).
$a() and $b() may be specified as PDLs, arrays of numbers, or as strings.
Functionally equivalent to edit_distance_full($matches,@costs,$dist),
but slightly faster.

=cut

sub edit_distance_static {
  return _edit_distance_static(_edit_pdl($_[0]), _edit_pdl($_[1]), @_[2..$#_]);
}





=head2 _edit_distance_static

=for sig

  Signature: (a1(N1); b1(M1); costMatch(); costIns(); costDel(); costSubst(); [o]dist(N1,M1))


Low-level method.
Compute the edit distance matrix for input PDLs $a1() and $b1() given a
static cost schema @costs = ($costMatch(), $costIns(), $costDel(), $costSubst()).
Functionally identitical to _edit_distance_matrix_full($matches,@costs,$dist),
but slightly faster.

The first elements of $a1() and $b1() are ignored.


=for bad

_edit_distance_static does not process bad values.
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.


=cut






*_edit_distance_static = \&PDL::_edit_distance_static;




=pod

=head2 edit_align_static

=for sig

  Signature: (a(N); b(M);
              staticCostMatch(); staticCostIns(); staticCostDel(); staticCostSubst();
              [o]dist(N+1,M+1); [o]align(N+1,M+1))

Convenience method.
Compute the edit distance and alignment matrices for inputs $a() and $b() given
a static cost schema @costs = ($staticCostMatch(), $staticCostIns(), $staticCostDel(), and $staticCostSubst()).
$a() and $b() may be specified as PDLs, arrays of numbers, or as strings.
Functionally equivalent to edit_align_full($matches,@costs,$dist),
but slightly faster.

=cut

sub edit_align_static {
  return _edit_align_static(_edit_pdl($_[0]), _edit_pdl($_[1]), @_[2..$#_]);
}





=head2 _edit_align_static

=for sig

  Signature: (a1(N1); b1(M1); costMatch(); costIns(); costDel(); costSubst(); [o]dist(N1,M1); byte [o]align(N1,M1))


Low-level method.
Compute the edit distance and alignment matrices for input PDLs $a1() and $b1() given a
static cost schema @costs = ($costMatch(), $costIns(), $costDel(), $costSubst()).
Functionally identitical to _edit_distance_matrix_full($matches,@costs,$dist),
but slightly faster.

The first elements of $a1() and $b1() are ignored.


=for bad

_edit_align_static does not process bad values.
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.


=cut






*_edit_align_static = \&PDL::_edit_align_static;





=head2 align_op_insert1

=for sig

  Signature: ([o]a())

=for ref

Alignment matrix value constant for insertion operations on $a() string.

=for bad

align_op_insert1 does not process bad values.
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.


=cut






*align_op_insert1 = \&PDL::align_op_insert1;





=head2 align_op_insert2

=for sig

  Signature: ([o]a())

=for ref

Alignment matrix value constant for insertion operations on $a() string.

=for bad

align_op_insert2 does not process bad values.
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.


=cut






*align_op_insert2 = \&PDL::align_op_insert2;





=head2 align_op_match

=for sig

  Signature: ([o]a())

=for ref

Alignment matrix value constant for matches.

=for bad

align_op_match does not process bad values.
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.


=cut






*align_op_match = \&PDL::align_op_match;





=head2 align_op_substitute

=for sig

  Signature: ([o]a())

=for ref

Alignment matrix value constant for substitution operations.

=for bad

align_op_substitute does not process bad values.
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.


=cut






*align_op_substitute = \&PDL::align_op_substitute;




=pod

=head2 align_op_delete

Alias for align_op_insert1()

=head2 align_op_insert

Alias for align_op_insert2()

=cut

*align_op_delete = \&align_op_insert1;
*align_op_insert = \&align_op_insert2;



=pod

=head2 align_ops

=for sig

  Signature: ([o]ops(4))

Alignment matrix value constants 4-element pdl (match,insert1,insert2,substitute).a

=cut

sub align_ops { return PDL->sequence(PDL::byte(),4); }




=pod

=head2 edit_bestpath

=for sig

  Signature: (align(N+1,M+1); [o]apath(N+M+2); [o]bpath(N+M+2); [o]pathlen())

Convenience method.
Compute best path through alignment matrix $align().
Stores paths for original input strings $a() and $b() in $apath() and $bpath()
respectively.
Negative values in $apath() and $bpath() indicate insertion/deletion operations.
On completion, $pathlen() holds the actual length of the paths.

=cut

sub edit_bestpath {
  my ($align,$apath,$bpath,$len) = @_;
  $len=pdl(long,$align->dim(0)+$align->dim(1)) if (!defined($len));
  if (!defined($apath)) { $apath=zeroes(long,$len); }
  else { $apath->reshape($len) if ($apath->nelem < $len); }
  if (!defined($bpath)) { $bpath = zeroes(long,$len); }
  else { $bpath->reshape($len) if ($bpath->nelem < $len); }
  _edit_bestpath($align, $apath, $bpath, $len, $align->dim(0)-1, $align->dim(1)-1);
  return ($apath,$bpath,$len);
}





=head2 _edit_bestpath

=for sig

  Signature: (align(N1,M1); int [o]apath(L); int [o]bpath(L); int [o]len(); int ifinal; int jfinal)


Low-level method.
Compute best path through alignment matrix $align() from final index ($ifinal,$jfinal).
Stores paths for (original) input strings $a() and $b() in $apath() and $bpath()
respectively.
Negative values in $apath() and $bpath() indicate insertion/deletion operations.
On completion, $pathlen() holds the actual length of the paths.


=for bad

_edit_bestpath does not process bad values.
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.


=cut






*_edit_bestpath = \&PDL::_edit_bestpath;




=pod

=head2 edit_pathtrace

=for sig

  Signature: ( align(N+1,M+1); [o]ai(L); [o]bi(L); [o]ops(L); [o]$pathlen() )

Convenience method.
Compute alignment path backtrace through alignment matrix $align() from final index ($ifinal,$jfinal).
Stores raw paths for (original) input strings $a() and $b() in $ai() and $bi()
respectively.
Unlike edit_bestpath(), null-moves for $ai() and $bi() are not stored here as negative values.
Returned pdls ($ai,$bi,$ops) are trimmed to the appropriate path length.

=cut

sub edit_pathtrace {
  my ($align,$ai,$bi,$ops,$len) = @_;
  $len=pdl(long,$align->dim(0)+$align->dim(1)) if (!defined($len));
  if (!defined($ai)) { $ai=zeroes(long,$len); }
  else { $ai->reshape($len) if ($ai->nelem < $len); }
  if (!defined($bi)) { $bi = zeroes(long,$len); }
  else { $bi->reshape($len) if ($bi->nelem < $len); }
  if (!defined($ops)) { $ops = zeroes(long,$len); }
  else { $ops->reshape($len) if ($ops->nelem < $len); }
  _edit_pathtrace($align, $ai,$bi,$ops,$len, $align->dim(0)-1,$align->dim(1)-1);
  my $lens = ($len->sclr-1);
  return ((map { $_->slice("0:$lens") } ($ai,$bi,$ops)), $len);
}





=head2 _edit_pathtrace

=for sig

  Signature: (align(N1,M1); int [o]ai(L); int [o]bi(L); int [o]ops(L); int [o]len(); int ifinal; int jfinal)


Low-level method.
Compute alignment path backtrace through alignment matrix $align() from final index ($ifinal,$jfinal).
Stores raw paths for (original) input strings $a() and $b() in $ai() and $bi()
respectively.
Unlike edit_bestpath(), null-moves for $ai() and $bi() are not stored here as negative values.
Returned pdls ($ai,$bi,$ops) are trimmed to the appropriate path length.


=for bad

_edit_pathtrace does not process bad values.
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.


=cut






*_edit_pathtrace = \&PDL::_edit_pathtrace;




=pod

=head2 edit_lcs

=for sig

  Signature: (a(N); b(M); int [o]lcs(N+1,M+1);)

Convenience method.
Compute the longest common subsequence (LCS) matrix for input PDLs $a1() and $b1().
The output matrix $lcs() contains at cell ($i+1,$j+1) the length of the LCS
between $a1(0..$i) and $b1(0..$j); thus $lcs($N,$M) contains the
length of the LCS between $a() and $b().

=cut

sub edit_lcs {
  return _edit_lcs(_edit_pdl($_[0]), _edit_pdl($_[1]), @_[2..$#_]);
}





=head2 _edit_lcs

=for sig

  Signature: (a1(N1); b1(M1); int [o]lcs(N1,M1))


Low-level method.
Compute the longest common subsequence (LCS) matrix for input PDLs $a1() and $b1().
The initial (zeroth) elements of $a1() and $b1() are ignored.
The output matrix $lcs() contains at cell ($i,$j) the length of the LCS
between $a1(1..$i) and $b1(1..$j); thus $lcs($N1-1,$M1-1) contains the
length of the LCS between $a1() and $b1().


=for bad

_edit_lcs does not process bad values.
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.


=cut






*_edit_lcs = \&PDL::_edit_lcs;




=pod

=head2 lcs_backtrace

=for sig

  Signature: (a(N); b(M); int lcs(N+1,M+1); int ifinal(); int jfinal(); int [o]ai(L); int [o]bi(L); int [o]len())

Convenience method.
Compute longest-common-subsequence backtrace through LCS matrix $lcs()
for original input strings ($a(),$b()) from final index ($ifinal,$jfinal).
Stores raw paths for (original) input strings $a() and $b() in $ai() and $bi()
respectively.

=cut

sub lcs_backtrace {
  my ($a,$b,$lcs,$ifinal,$jfinal,$ai,$bi,$len) = @_;
  $len=pdl(long, pdl(long,$lcs->dims)->min) if (!defined($len));
  if (!defined($ai)) { $ai=zeroes(long,$len); }
  else { $ai->reshape($len) if ($ai->nelem < $len); }
  if (!defined($bi)) { $bi = zeroes(long,$len); }
  else { $bi->reshape($len) if ($bi->nelem < $len); }
  if (!defined($ifinal)) { $ifinal = $lcs->dim(0)-1; }
  if (!defined($jfinal)) { $jfinal = $lcs->dim(1)-1; }
  _lcs_backtrace(_edit_pdl($a),_edit_pdl($b), $lcs,$ifinal,$jfinal, $ai,$bi,$len);
  my $lens = ($len->sclr-1);
  return ($ai->slice("0:$lens"),$bi->slice("0:$lens"), $len);
}





=head2 _lcs_backtrace

=for sig

  Signature: (a1(N1); b1(M1); int lcs(N1,M1); int ifinal(); int jfinal(); [o]ai(L); [o]bi(L); int [o]len())


Low-level method.
Compute longest-common-subsequence backtrace through LCS matrix $lcs()
for initial-padded strings ($a1(),$b1()) from final index ($ifinal,$jfinal).
Stores raw paths for (original) input strings $a() and $b() in $ai() and $bi()
respectively.


=for bad

_lcs_backtrace does not process bad values.
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.


=cut






*_lcs_backtrace = \&PDL::_lcs_backtrace;




##---------------------------------------------------------------------
=pod

=head1 ACKNOWLEDGEMENTS

Perl by Larry Wall.

PDL by Karl Glazebrook, Tuomas J. Lukka, Christian Soeller, and others.

=cut

##----------------------------------------------------------------------
=pod

=head1 KNOWN BUGS

Probably many.

=cut


##---------------------------------------------------------------------
=pod

=head1 AUTHOR

Bryan Jurish E<lt>moocow@cpan.orgE<gt>

=head2 Copyright Policy

Copyright (C) 2006-2014, Bryan Jurish. All rights reserved.

This package is free software, and entirely without warranty.
You may redistribute it and/or modify it under the same terms
as Perl itself, either Perl 5.14.2, or at your option any later
version of Perl 5.

=head1 SEE ALSO

perl(1), PDL(3perl).

=cut



;



# Exit with OK status

1;