The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
our $VERSION = '0.08';
pp_setversion(qq{'$VERSION'});
$VERSION = eval $VERSION;

use PDL::Exporter;

if ($^O =~ /MSWin/) {
pp_addhdr('
#include <float.h>
');
}

pp_addhdr('
#include <math.h>

#if defined(PDL_CORE_VERSION) && PDL_CORE_VERSION < 10
typedef PDL_Long PDL_Indx;
#endif

typedef PDL_Long logical;
typedef PDL_Long integer;
typedef PDL_Long ftnlen;
typedef struct { double r, i; } dcomplex;

#ifndef abs
#define abs(x) ((x) >= 0 ? (x) : -(x))
#endif
#ifndef max
#define max(a,b) ((a) >= (b) ? (a) : (b))
#endif


#if __GLIBC__ > 1 && (defined __USE_MISC || defined __USE_XOPEN || defined __USE_ISOC9X)
# define CABS(r,i) hypot (r, i)
#else
static double CABS (double r, double i)
{
    double t;

    if (r < 0) r = - r;
    if (i < 0) i = - i;

    if (i > r)
      {
        t = r; r = i; i = t;
      }

    if (r + i == r){
      return r;
    }

    t = i / r;
    return r * sqrt (1 + t*t);
}
#endif

#define CSQRT(ar,ai,cr,ci) 		\
{					\
        double mag = CABS ((ar), (ai));		\
        double t;					\
                                                \
        if (mag == 0)				\
          (cr) = (ci) = 0;			\
        else if ((ar) > 0)			\
          {					\
            t = sqrt (0.5 * (mag + (ar)));	\
            (cr) = t;				\
            (ci) = 0.5 * (ai) / t;		\
          }					\
        else					\
          {					\
            t = sqrt (0.5 * (mag - (ar)));	\
                                                \
            if ((ai) < 0)			\
              t = -t;				\
                                                \
            (cr) = 0.5 * (ai) / t;		\
            (ci) = t;				\
          }					\
}

#define CMULT(ar,ai,br,bi,or,oi) 		\
        (or) = (ar)*(br) - (ai)*(bi);		\
        (oi) = (ar)*(bi) + (ai)*(br);


double z_abs(dcomplex *z)
{
	return( CABS( z->r, z->i ) );
}

');



pp_addpm({At=>'Top'},<<'EOD');
use PDL::Func;
use PDL::Core;
use PDL::Slices;
use PDL::Ops qw//;
use PDL::Math qw/floor/;
use PDL::Complex;
use PDL::NiceSlice;
use PDL::LinearAlgebra;
use PDL::LinearAlgebra::Real qw //;
use PDL::LinearAlgebra::Complex qw //;
use strict;

=encoding Latin-1
=head1 NAME

PDL::LinearAlgebra::Trans - Linear Algebra based transcendental functions for PDL

=head1 SYNOPSIS

 use PDL::LinearAlgebra::Trans;

 $a = random (100,100);
 $sqrt = msqrt($a);

=head1 DESCRIPTION

This module provides some transcendental functions for matrices.
Moreover it provides sec, asec, sech, asech, cot, acot, acoth, coth, csc,
acsc, csch, acsch. Beware, importing this module will overwrite the hidden
PDL routine sec. If you need to call it specify its origin module : PDL::Basic::sec(args)


EOD

pp_add_exported('', ' mexp mexpts mlog msqrt mpow 
			mcos msin mtan	msec mcsc mcot
			mcosh  msinh  mtanh  msech  mcsch  mcoth
			macos masin matan masec macsc macot 
			macosh masinh matanh masech macsch macoth
			sec asec sech asech 
			cot acot acoth coth mfun
			csc acsc csch acsch toreal pi');


pp_def('geexp',
          Pars => '[io,phys]A(n,n);int deg();scale();[io]trace();int [o]ns();int [o]info()',
       	  GenericTypes => [D],
          Code => '
/* ----------------------------------------------------------------------| 
 int dgpadm_(integer *ideg, integer *m, double *t, 
	double *h__, integer *ldh, double *wsp, integer *lwsp, 
	integer *ipiv, integer *iexph, integer *ns, integer *iflag) 

 -----Purpose----------------------------------------------------------| 

     Computes exp(t*H), the matrix exponential of a general matrix in 
     full, using the irreducible rational Pade approximation to the 
     exponential function exp(x) = r(x) = (+/-)( I + 2*(q(x)/p(x)) ), 
     combined with scaling-and-squaring. 

 -----Arguments--------------------------------------------------------| 

     ideg|deg : (input) the degre of the diagonal Pade to be used. 
                 a value of 6 is generally satisfactory. 

     m         : (input) order of H. 

     H|A(ldh,m): (input) argument matrix. 

     t|scale        : (input) time-scale (can be < 0). 

     wsp|exp(lwsp) : (workspace/output) lwsp .ge. 4*m*m+ideg+1. 
      ipiv(m)   : (workspace) 

 >>>> iexph|index     : (output) number such that wsp(iexph) points to exp(tH) 
                 i.e., exp(tH) is located at wsp(iexph ... iexph+m*m-1) 
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 
                 NOTE: if the routine was called with wsp(iptr), 
                       then exp(tH) will start at wsp(iptr+iexph-1). 

     ns        : (output) number of scaling-squaring used. 

     iflag|info     : (output) exit flag.
                      0 - no problem 
                     > 0 - Singularity in LU factorization when solving
                     Pade approximation
 ----------------------------------------------------------------------| 
	Roger B. Sidje (rbs@maths.uq.edu.au) 
		EXPOKIT: Software Package for Computing Matrix Exponentials. 
		ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
	Gregory Vanuxem (vanuxemg@yahoo.fr)
		Minor modifications (Fortran to C with help of f2c, memory allocation,
					PDL adaptation, null entry matrix, error return,
					exp(tH) in entry matrix, trace normalization).
 ----------------------------------------------------------------------| 
*/

	extern int dscal_(integer *, double *, double *, 
		integer *);
	extern int dgemm_(char *, char *, integer *, integer *, integer *, 
				double *, double *, integer *, double *, 
				integer *, double *, double *, integer *, 
				ftnlen, ftnlen);
	extern int dgesv_(integer *, integer *, double *, 
		integer *, integer *, double *, integer *, integer *);
	extern int daxpy_(integer *, double *, double *, 
		integer *, double *, integer *);
	extern double dlange_(char *norm, integer *m, integer *n, double *a, integer 
		*lda, double *work);

	integer i__1, i__2, i__, j, k;
	integer ip, mm, iq, ih2, ifree,  iused, iodd, iget, iput;
	integer *ipiv;

	double cp, cq, scale, scale2, hnorm, tracen;
	double *coef;
	double *wsp;

	integer c__1 = 1;
	integer c__2 = 2;
	double c_b7 = 0.;
	double c_b11 = 1.;
	double c_b19 = -1.;
	double c_b23 = 2.;
	double *h__ = $P(A);



	$info() = 0;

/////////////    
	mm = $PRIV(__n_size) * $PRIV(__n_size);
	ipiv = (integer *) malloc ( $PRIV(__n_size) * sizeof (integer));
	coef = (double *) malloc ( (($deg() + 1) + $PRIV(__n_size) * $PRIV(__n_size)) * sizeof (double));
	wsp = (double *) malloc ( 3 * mm * sizeof (double));    
//////////

	// ---  initialise pointers ... 
	ih2 = $deg() + 1;
	ip = 0;
	iq = mm;
	ifree = iq + mm;
	tracen = 0;

	if ($trace())
	{
		for (i__ = 0; i__ < $PRIV(__n_size); i__++)
			tracen += h__[i__ + i__ * $PRIV(__n_size)];
	
	
		tracen /= $PRIV(__n_size); 
		if (tracen > 0)
		{
			for (i__ = 0; i__ < $PRIV(__n_size); i__++) 
				h__[i__ + i__ * $PRIV(__n_size)] -= tracen;
	
		}
	}


	// ---  scaling: seek ns such that ||t*H/2^ns|| < 1/2; 
	//     and set scale = t/2^ns ... 

	// Compute Infinity norm
	hnorm = dlange_("I", &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, $P(A), 
			&(integer){$PRIV(__n_size)}, wsp);
	
	hnorm = abs($scale() * hnorm);
	if (hnorm == 0.)
	{
		free(ipiv);
		free(wsp);
		free(coef);
		coef = $P(A);
		$ns() = 0;
		for(i__ = 0 ; i__ < $PRIV(__n_size) ; i__++ ){
			for(i__1 = 0 ; i__1 < $PRIV(__n_size) ; i__1++ ){
				coef[i__1 +  i__ *  $PRIV(__n_size) ] = (i__ == i__1) ? 1 : 0;
			}
		}
	}
	else
	{
	

		i__2 = (integer ) (log(hnorm) / log(2.0) + 2);
		$ns() = max(0, i__2);
		scale = $scale() / pow(2.0,  (double )$ns());
		scale2 = scale * scale;
		
		// ---  compute Pade coefficients ...
		i__ = $deg() + 1;
		j = ($deg() << 1) + 1;
		coef[0] = 1.;
		i__1 = $deg();
		for (k = 1; k <= i__1; ++k) {
			coef[k] = coef[k - 1] * (double) (i__ - k) / (double) (k * (j - k));
		}
		
		// ---  H2 = scale2*H*H ... 
		
		dgemm_("n", "n", &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &scale2, h__, &(integer){$PRIV(__n_size)}, h__, 
		    &(integer){$PRIV(__n_size)}, &c_b7, &coef[ih2], &(integer){$PRIV(__n_size)}, (ftnlen)1, (ftnlen)1);
		
		// ---  initialize p (numerator) and q (denominator) ...
		cp = coef[$deg() - 1];
		cq = coef[$deg()];
		for (j = 0; j < $PRIV(__n_size); j++) {
			for (i__ = 1; i__ <= $PRIV(__n_size); ++i__) {
		    		wsp[ip + j  * $PRIV(__n_size) + i__ - 1] = 0.;
		    		wsp[iq + j  * $PRIV(__n_size) + i__ - 1] = 0.;
			}
			wsp[ip + j * ($PRIV(__n_size) + 1)] = cp;
			wsp[iq + j * ($PRIV(__n_size) + 1)] = cq;
		}
		
		// ---  Apply Horner rule ... 
		iodd = 1;
		k = $deg() - 2;
		do{
			iused = iodd * iq + (1 - iodd) * ip;
			dgemm_("n", "n", &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &c_b11, &wsp[iused], &(integer){$PRIV(__n_size)}, &coef[ih2], &(integer){$PRIV(__n_size)}, &c_b7, &
			    wsp[ifree], &(integer){$PRIV(__n_size)}, (ftnlen)1, (ftnlen)1);
			for (j = 0; j < $PRIV(__n_size); j++) {
				wsp[ifree + j  * ($PRIV(__n_size) + 1)] += coef[k];
			}
			ip = (1 - iodd) * ifree + iodd * ip;
			iq = iodd * ifree + (1 - iodd) * iq;
			ifree = iused;
			iodd = 1 - iodd;
			--k;
		}
		while (k >= 0);
	
	
		// ---  Obtain (+/-)(I + 2*(p\q)) ... 
		
		if (iodd == 1)
		{
			dgemm_("n", "n", &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &scale, &wsp[iq], &(integer){$PRIV(__n_size)}, h__, &(integer){$PRIV(__n_size)}, &
				c_b7, &wsp[ifree], &(integer){$PRIV(__n_size)}, (ftnlen)1, (ftnlen)1);
			iq = ifree;
		}
		else
		{
			dgemm_("n", "n", &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &scale, &wsp[ip], &(integer){$PRIV(__n_size)}, h__, &(integer){$PRIV(__n_size)}, &
				c_b7, &wsp[ifree], &(integer){$PRIV(__n_size)}, (ftnlen)1, (ftnlen)1);
			ip = ifree;
		}
		daxpy_(&mm, &c_b19, &wsp[ip], &c__1, &wsp[iq], &c__1);
		dgesv_(&(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &wsp[iq], &(integer){$PRIV(__n_size)}, ipiv, &wsp[ip], &(integer){$PRIV(__n_size)}, $P(info));
	
		if ($info() != 0)
		{
			free(ipiv);
			free(coef);
			free(wsp);
		}
		else
		{
			dscal_(&mm, &c_b23, &wsp[ip], &c__1);
			for (j = 0; j < $PRIV(__n_size); j++) {
				wsp[ip + j  * ($PRIV(__n_size) + 1)] += 1.;
			}
			iput = ip;
			if ($ns() == 0 && iodd == 1)
				dscal_(&mm, &c_b19, &wsp[ip], &c__1);
			else{
				// --   squaring : exp(t*H) = (exp(t*H))^(2^ns) ... 
				iodd = 1;
				i__1 = $ns();
				for (k = 0; k < i__1; k++) {
					iget = iodd * ip + (1 - iodd) * iq;
					iput = (1 - iodd) * ip + iodd * iq;
					dgemm_("n", "n", &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &c_b11, &wsp[iget], &(integer){$PRIV(__n_size)}, &wsp[iget], &(integer){$PRIV(__n_size)}, &c_b7,
						&wsp[iput], &(integer){$PRIV(__n_size)}, (ftnlen)1, (ftnlen)1);
					iodd = 1 - iodd;
				}
			}
			
			free(coef);
			coef = $P(A);
			i__2 = iput + mm;
			i__ = 0;
			$trace() = tracen;
			if (tracen > 0)
			{
				scale = exp(tracen);
				for (i__1 = iput; i__1 < i__2 ; i__1++)
					coef[i__++] = scale * wsp[i__1];

			}
			else
			{
				for (i__1 = iput; i__1 < i__2 ; i__1++)
					coef[i__++] = wsp[i__1];
			}
			free(wsp);
			free(ipiv);
		}
	}
',
	Doc => <<EOT
=for ref

Computes exp(t*A), the matrix exponential of a general matrix,
using the irreducible rational Pade approximation to the 
exponential function exp(x) = r(x) = (+/-)( I + 2*(q(x)/p(x)) ), 
combined with scaling-and-squaring and optionaly normalization of the trace.
The algorithm is described in Roger B. Sidje (rbs@maths.uq.edu.au)
"EXPOKIT: Software Package for Computing Matrix Exponentials".
ACM - Transactions On Mathematical Software, 24(1):130-156, 1998


     A:		On input argument matrix. On output exp(t*A).
     		Use Fortran storage type.

     deg:	the degre of the diagonal Pade to be used. 
                a value of 6 is generally satisfactory. 

     scale:	time-scale (can be < 0). 

     trace:	on input, boolean value indicating whether or not perform
     		a trace normalization. On output value used.

     ns:	on output number of scaling-squaring used. 

     info:	exit flag.
                      0 - no problem 
                     > 0 - Singularity in LU factorization when solving 
		     Pade approximation

=for example

 $a = random(5,5);
 $trace = pdl(1);
 $a->xchg(0,1)->geexp(6,1,$trace, ($ns = null), ($info = null));

=cut

EOT
);

pp_def('cgeexp',
        Pars => '[io,phys]A(2,n,n);int deg();scale();int trace();int [o]ns();int [o]info()',
	Doc => '

=for ref

Complex version of geexp. The value used for trace normalization is not returned.
The algorithm is described in Roger B. Sidje (rbs@maths.uq.edu.au)
"EXPOKIT: Software Package for Computing Matrix Exponentials".
ACM - Transactions On Mathematical Software, 24(1):130-156, 1998


',
       	  GenericTypes => [D],
          Code => '
/* ----------------------------------------------------------------------| 
 int dgpadm_(integer *ideg, integer *m, double *t, 
	dcomplex *h__, integer *ldh, dcomplex *wsp, integer *lwsp, 
	integer *ipiv, integer *iexph, integer *ns, integer *iflag) 

 -----Purpose----------------------------------------------------------| 

     Computes exp(t*H), the matrix exponential of a general matrix in 
     full, using the irreducible rational Pade approximation to the 
     exponential function exp(x) = r(x) = (+/-)( I + 2*(q(x)/p(x)) ), 
     combined with scaling-and-squaring. 

 -----Arguments--------------------------------------------------------| 

     ideg|deg : (input) the degre of the diagonal Pade to be used. 
                 a value of 6 is generally satisfactory. 

     m         : (input) order of H. 

     H|A(ldh,m): (input) argument matrix. 

     t|scale        : (input) time-scale (can be < 0). 

     wsp|exp(lwsp) : (workspace/output) lwsp .ge. 4*m*m+ideg+1. 
      ipiv(m)   : (workspace) 

 >>>> iexph     : (output) number such that wsp(iexph) points to exp(tH) 
                 i.e., exp(tH) is located at wsp(iexph ... iexph+m*m-1) 
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 
                 NOTE: if the routine was called with wsp(iptr), 
                       then exp(tH) will start at wsp(iptr+iexph-1). 

     ns        : (output) number of scaling-squaring used. 

     iflag|info     : (output) exit flag.
                      0 - no problem 
                     > 0 - Singularity in LU factorization when solving
                     Pade approximation
 ----------------------------------------------------------------------| 
	Roger B. Sidje (rbs@maths.uq.edu.au) 
		EXPOKIT: Software Package for Computing Matrix Exponentials. 
		ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
	Gregory Vanuxem (vanuxemg@yahoo.fr)
		Minor modifications (Fortran to C with help of f2c, memory allocation,
					PDL adaptation, null entry matrix, error return,
					exp(tH) in entry matrix), trace normalization.
 ----------------------------------------------------------------------| 
*/
	static dcomplex c_b1 = {0.,0.};
	static dcomplex c_b2 = {1.,0.};
	static integer c__2 = 2;
	static integer c__1 = 1;
	static double c_b19 = 2.;
	static double c_b21 = -1.;
	integer *ipiv;


	integer i__1, i__2, i__3, i__, j, k;
	integer ip, mm, iq, ih2, iodd, iget, iput, ifree, iused;

	double hnorm,d__1, d__2;
	dcomplex scale, tracen, cp, cq, z__1;	
	dcomplex scale2;
	dcomplex *wsp, *h__;


	extern int zgemm_(char *, char *, integer *, integer *, 
		integer *, dcomplex *, dcomplex *, integer *, 
		dcomplex *, integer *, dcomplex *, dcomplex *, 
		integer *, ftnlen, ftnlen);
	extern int zgesv_(integer *, integer *, dcomplex *, 
		integer *, integer *, dcomplex *, integer *, integer *);
	extern int zaxpy_(integer *, dcomplex *, 
		dcomplex *, integer *, dcomplex *, integer *), zdscal_(
		integer *, double *, dcomplex *, integer *);
	extern double zlange_(char *norm, integer *m, integer *n, dcomplex *a, 
		integer *lda, double *work);




	h__ = (dcomplex *) $P(A);
    
	wsp = (dcomplex *) malloc (( $deg() + 1 +   4 * $PRIV(__n_size) * $PRIV(__n_size)) * sizeof (dcomplex));
	ipiv = (integer *) malloc ( $PRIV(__n_size) * sizeof (integer));



	mm = $PRIV(__n_size) * $PRIV(__n_size);
	$info() = 0;
	
	ih2 = $deg() + 1;
	ip = ih2 + mm;
	iq = ip + mm;
	ifree = iq + mm;
	

	tracen.r = 0;
	tracen.i = 0;
	if ($trace())
	{
		for (i__ = 0; i__ < $PRIV(__n_size); i__++)
		{
			tracen.r = tracen.r + h__[i__ + i__ * $PRIV(__n_size)].r;
			tracen.i = tracen.i + h__[i__ + i__ * $PRIV(__n_size)].i;
		}
	
		tracen.r = tracen.r / $PRIV(__n_size);
		tracen.i = tracen.i / $PRIV(__n_size);

		if (tracen.r  < 0){
			tracen.r = tracen.i;
			tracen.i = 0;
		}
		for (i__ = 0; i__ < $PRIV(__n_size); i__++)
		{
			h__[i__ + i__ * $PRIV(__n_size)].r = h__[i__ + i__ * $PRIV(__n_size)].r - tracen.r ;
			h__[i__ + i__ * $PRIV(__n_size)].i = h__[i__ + i__ * $PRIV(__n_size)].i - tracen.i ;
		}
	}

	// ---  scaling: seek ns such that ||t*H/2^ns|| < 1/2; 
	//     and set scale = t/2^ns ... 
	// Compute Infinity norm
	hnorm = zlange_("I", &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &h__[0], 
			&(integer){$PRIV(__n_size)}, &wsp[0].r);

	    hnorm = abs($scale() * hnorm);
	    if (hnorm == 0.)
	    {
		$ns() = 0;
		for(i__ = 0 ; i__ < $PRIV(__n_size) ; i__++ ){
			for(i__1 = 0 ; i__1 < $PRIV(__n_size) ; i__1++ ){
				h__[i__1 +  i__ *  $PRIV(__n_size) ].r = (i__ == i__1) ? 1 : 0;
				h__[i__1 +  i__ *  $PRIV(__n_size) ].i = 0;
			}
		}
	    }
	    else
	    {
		    i__2 = (integer) ((log(hnorm) / log(2.)) + 2);
		    $ns() = max(0,i__2);
		    scale.r = $scale() / pow(2, (double )$ns());
		    scale.i = 0;
		    scale2.r = scale.r * scale.r - scale.i * scale.i;
		    scale2.i = scale.r * scale.i + scale.i * scale.r;
	
		// ---  compute Pade coefficients ...
		
		    i__ = $deg() + 1;
		    j = ($deg() << 1) + 1;
		    wsp[0].r = 1;
		    wsp[0].i = 0;
		    for (k = 1; k <= $deg(); ++k) {
			i__2 = k;
			i__3 = k - 1;
			d__1 = (double) (i__ - k);
			d__2 = (double) (k * (j - k));
			wsp[i__2].r = (d__1 * wsp[i__3].r) / d__2;
			wsp[i__2].i = (d__1 * wsp[i__3].i) / d__2;
		    }
		
		// ---  H2 = scale2*H*H ...
		    zgemm_("n", "n", &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &scale2, &h__[0], &(integer){$PRIV(__n_size)}, &h__[0], 
			    &(integer){$PRIV(__n_size)}, &c_b1, &wsp[ih2], &(integer){$PRIV(__n_size)}, (ftnlen)1, (ftnlen)1);
		
		// ---  initialise p (numerator) and q (denominator) ...
		
		    i__1 = $deg() - 1;
		    cp.r = wsp[i__1].r;
		    cp.i = wsp[i__1].i;
		    i__1 = $deg();
		    cq.r = wsp[i__1].r;
		    cq.i = wsp[i__1].i;

		    for (j = 0; j < $PRIV(__n_size); j++) {
			for (i__ = 1; i__ <= $PRIV(__n_size); ++i__) {
			    i__3 = ip + j  * $PRIV(__n_size) + i__ - 1;
			    wsp[i__3].r = 0., wsp[i__3].i = 0.;
			    i__3 = iq + j  * $PRIV(__n_size) + i__ - 1;
			    wsp[i__3].r = 0., wsp[i__3].i = 0.;
			}
			i__2 = ip + j  * ($PRIV(__n_size) + 1);
			wsp[i__2].r = cp.r;
			wsp[i__2].i = cp.i;
			i__2 = iq + j  * ($PRIV(__n_size) + 1);
			wsp[i__2].r = cq.r;
			wsp[i__2].i = cq.i;
		    }
		
		     // ---  Apply Horner rule ...
		    iodd = 1;
		    k = $deg() - 2;
		    do
		    {
			    iused = iodd * iq + (1 - iodd) * ip;
			    zgemm_("n", "n", &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &c_b2, &wsp[iused], &(integer){$PRIV(__n_size)}, &wsp[ih2], &(integer){$PRIV(__n_size)}, &c_b1, &
				    wsp[ifree], &(integer){$PRIV(__n_size)}, (ftnlen)1, (ftnlen)1);
			    for (j = 0; j < $PRIV(__n_size); j++) {
				i__1 = ifree + j  * ($PRIV(__n_size) + 1);
				i__2 = ifree + j  * ($PRIV(__n_size) + 1);
				i__3 = k;
				
				z__1.r = wsp[i__2].r + wsp[i__3].r;
				z__1.i = wsp[i__2].i + wsp[i__3].i;
				wsp[i__1].r = z__1.r;
				wsp[i__1].i = z__1.i;
			    }
			    ip = (1 - iodd) * ifree + iodd * ip;
			    iq = iodd * ifree + (1 - iodd) * iq;
			    ifree = iused;
			    iodd = 1 - iodd;
			    --k;
		    }
		    while(k >= 0);

		// ---  Obtain (+/-)(I + 2*(p\q)) ...
		
		    if (iodd != 0)
		    {
			zgemm_("n", "n", &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &scale, &wsp[iq], &(integer){$PRIV(__n_size)}, &h__[0], &(integer){$PRIV(__n_size)}, &
				c_b1, &wsp[ifree], &(integer){$PRIV(__n_size)}, (ftnlen)1, (ftnlen)1);
			iq = ifree;
		    }else
		    {
			zgemm_("n", "n", &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &scale, &wsp[ip], &(integer){$PRIV(__n_size)}, &h__[0], &(integer){$PRIV(__n_size)}, &
				c_b1, &wsp[ifree], &(integer){$PRIV(__n_size)}, (ftnlen)1, (ftnlen)1);
			ip = ifree;
		    }
		    z__1.r = -1.;
		    z__1.i = -0.;
		    zaxpy_(&mm, &z__1, &wsp[ip], &c__1, &wsp[iq], &c__1);
		    zgesv_(&(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &wsp[iq], &(integer){$PRIV(__n_size)}, ipiv, &wsp[ip], &(integer){$PRIV(__n_size)}, $P(info));

		    if ($info() == 0) 
		    {
			    zdscal_(&mm, &c_b19, &wsp[ip], &c__1);
			    for (j = 0; j < $PRIV(__n_size); j++)
			    {
				i__2 = ip + j  * ($PRIV(__n_size) + 1);
				i__3 = ip + j  * ($PRIV(__n_size) + 1);

				wsp[i__2].r = wsp[i__3].r + 1.;
				wsp[i__2].i = wsp[i__3].i + 0.;
			    }
			    iput = ip;
			    if ($ns() == 0 && iodd != 0) 
				zdscal_(&mm, &c_b21, &wsp[ip], &c__1);
			    else
			    {
				// --   squaring : exp(t*H) = (exp(t*H))^(2^ns) ...
				
				    iodd = 1;
				    for (k = 0; k < $ns(); k++)
				    {
					iget = iodd * ip + (1 - iodd) * iq;
					iput = (1 - iodd) * ip + iodd * iq;
					zgemm_("n", "n", &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &(integer){$PRIV(__n_size)}, &c_b2, &wsp[iget], &(integer){$PRIV(__n_size)}, &wsp[iget], &(integer){$PRIV(__n_size)}, &c_b1, 
						&wsp[iput], &(integer){$PRIV(__n_size)}, (ftnlen)1, (ftnlen)1);
					iodd = 1 - iodd;
				    }
			    }
		    }

   		    i__2 = iput + mm;
		    i__ = 0;
		    if ($trace())
		    {
		    	// exp(tracen)
			d__1 = exp(tracen.r);
		    	tracen.r = d__1 * cos(tracen.i); 
			tracen.i = d__1 * sin(tracen.i);
			
			for (i__1 = iput; i__1 < i__2 ; i__1++)
		    	{
				h__[i__].r = tracen.r * wsp[i__1].r - tracen.i * wsp[i__1].i;
				h__[i__++].i = tracen.r * wsp[i__1].i + tracen.i * wsp[i__1].r;
		    	}		    
		    }
		    else
		    {
		    	for (i__1 = iput; i__1 < i__2 ; i__1++)
		    	{
				h__[i__].r = wsp[i__1].r;
				h__[i__++].i = wsp[i__1].i;
		    	}
		    }
	    }
	    free(wsp);
	    free(ipiv);
',
);

pp_def('ctrsqrt',
        Pars => '[io,phys]A(2,n,n);int uplo();[phys,o] B(2,n,n);int [o]info()',
	Doc => '

=for ref

Root square of complex triangular matrix. Uses a recurrence of Björck and Hammarling.
(See Nicholas J. Higham. A new sqrtm for MATLAB. Numerical Analysis
Report No. 336, Manchester Centre for Computational Mathematics,
Manchester, England, January 1999. It\'s available at http://www.ma.man.ac.uk/~higham/pap-mf.html)
If uplo is true, A is lower triangular.

',
       	  GenericTypes => [D],
          Code => '
          dcomplex *cb, *ca;
          dcomplex s, snum, sdenum;
          double *b;
          double tt, dn;
          integer i, j, k, l, ind, ind1, ind2;
          
	ca = (dcomplex *) $P(A);
	b = $P(B); 
	cb = (dcomplex *) $P(B);

#define subscr(a_1,a_2)\
	($uplo()) ? (a_2) * $PRIV(__n_size) + (a_1) : (a_1) * $PRIV(__n_size) + (a_2)

	$info() = 0;
	ind = $PRIV(__n_size) * $PRIV(__n_size) * 2;

	for (i = 0; i < ind;i++)
		b[i] = 0;
		

  for (i = 0; i < $PRIV(__n_size); i++){
	ind = subscr(i,i);
	CSQRT(ca[ind].r, ca[ind].i, cb[ind].r, cb[ind].i)
  }

  for (k = 0; k < $PRIV(__n_size)-1; k++)
    {
      for (i = 0; i < $PRIV(__n_size)-(k+1); i++)
	{
 		j = i + k + 1;
		ind = subscr(i,j);
		s.r = 0;
		s.i = 0;
		
	  for (l = i+1; l < j; l++)
	  {
	  	ind1 = subscr(i,l);
	  	ind2 =  subscr(l,j);
	  	s.r += cb[ind1].r * cb[ind2].r - cb[ind1].i * cb[ind2].i;
	  	s.i += cb[ind1].r * cb[ind2].i + cb[ind1].i * cb[ind2].r; 
	  }

	  ind1 = subscr(i,i);
  	  ind2 =  subscr(j,j);
	  sdenum.r = cb[ind1].r + cb[ind2].r;
	  sdenum.i = cb[ind1].i + cb[ind2].i;
	  snum.r = ca[ind].r - s.r;
	  snum.i = ca[ind].i - s.i;

           if (fabs (sdenum.r) > fabs (sdenum.i))
             {
               if (sdenum.r == 0){
               		$info() = -1;
               		goto ENDCTRSQRT;
		}
	       tt = sdenum.i / sdenum.r;
               dn = sdenum.r + tt * sdenum.i;
               cb[ind].r = (snum.r + tt * snum.i) / dn;
       	       cb[ind].i = (snum.i - tt * snum.r) / dn;
             }
           else
             {
               if (sdenum.i == 0){
               		$info() = -1;
               		goto ENDCTRSQRT;
		}
               tt = sdenum.r / sdenum.i;
               dn = sdenum.r * tt + sdenum.i;
               cb[ind].r = (snum.r * tt + snum.i) / dn;
               cb[ind].i = (snum.i * tt - snum.r) / dn;
             }
	}
    }
ENDCTRSQRT:
    ;
#undef subscr
',
);

pp_addhdr('

void dfunc_wrapper(dcomplex *p, integer n, SV* dfunc)
{
   dSP ;

   int count;
   SV *pdl1;
   HV *bless_stash;

   pdl *pdl;
   PDL_Indx odims[1];

   PDL_Indx dims[] = {2,n};
   pdl = PDL->pdlnew();
   PDL->setdims (pdl, dims, 2);
   pdl->datatype = PDL_D;
   pdl->data = (double *) &p[0].r;
   pdl->state |= PDL_DONTTOUCHDATA | PDL_ALLOCATED;
   bless_stash = gv_stashpv("PDL::Complex", 0);

   ENTER ;   SAVETMPS ;   PUSHMARK(sp) ;

   pdl1 = sv_newmortal();
   PDL->SetSV_PDL(pdl1, pdl);
   pdl1 = sv_bless(pdl1, bless_stash); /* bless in PDL::Complex  */
   XPUSHs(pdl1);

   PUTBACK ;

   count = perl_call_sv(dfunc, G_SCALAR);

   SPAGAIN;



   // For pdl_free
   odims[0] = 0;
   PDL->setdims (pdl, odims, 0);
   pdl->state &= ~ (PDL_ALLOCATED |PDL_DONTTOUCHDATA);
   pdl->data=NULL;

   if (count !=1)
      croak("Error calling perl function\n");

   PUTBACK ;   FREETMPS ;   LEAVE ;
}

');

pp_def('ctrfun',
        Pars => '[io,phys]A(2,n,n);int uplo();[phys,o] B(2,n,n);int [o]info()',
	OtherPars => "SV* func" ,
	Doc => '

=for ref

Apply an arbitrary function to a complex triangular matrix. Uses a recurrence of Parlett.
If uplo is true, A is lower triangular.


',
       	  GenericTypes => [D],
          Code => '
          dcomplex *cb, *ca, *diag;
          dcomplex s, snum, sdenum;
          double *b;
          double tt, dn;
          integer i, j, k, l, p, ind, ind1, ind2, tmp, c__1;
          extern double zdotu_(double *ret, integer *n, double *dx, integer *incx, double *dy,
			integer *incy);
	diag = (dcomplex *) malloc ($PRIV(__n_size) * sizeof(dcomplex));

	ca = (dcomplex *) $P(A);
	b = $P(B); 
	cb = (dcomplex *) $P(B);
	c__1 = 1;
	
#define subscr(a_1,a_2) \
	( $uplo() ) ? (a_2)+ $PRIV(__n_size) * (a_1) : (a_1)+ $PRIV(__n_size) * (a_2)

	$info() = 0;
	ind = $PRIV(__n_size) * $PRIV(__n_size) * 2;

	for (i = 0; i < ind;i++)
		b[i] = 0;
		
	for (i = 0; i < $PRIV(__n_size);i++)
	{
		ind = subscr(i,i);
		diag[i].r = ca[ind].r;
		diag[i].i = ca[ind].i;
	}

	dfunc_wrapper(diag, $PRIV(__n_size), $PRIV(func));
	for (i = 0; i < $PRIV(__n_size); i++)
	{
		ind = subscr(i,i);
		cb[ind].r = diag[i].r;
		cb[ind].i = diag[i].i;
	}
  
	for (p = 1; p < $PRIV(__n_size); p++)
	{
		tmp = $PRIV(__n_size) - p;
		for (i = 0; i < tmp; i++)
		{
			j = i + p;
			//$s = $T(,($j),($i))->Cmul($F(,($j),($j))->Csub($F(,($i),($i))));
			ind1 = subscr(i,i);
			ind2 = subscr(j,j);
			s.r = cb[ind2].r - cb[ind1].r;
			s.i = cb[ind2].i - cb[ind1].i;
			ind = subscr(j,i);
		  	CMULT(ca[ind].r, ca[ind].i, s.r, s.i, snum.r, snum.i)
		  	s.r = snum.r;
		  	s.i = snum.i;

			if (i < (j-1))
			{
                                //$s = $s + $T(,$i+1:$j-1,($i))->cdot(1, $F(,($j), $i+1:$j-1),1)->Csub($F(,$i+1:$j-1,($i))->cdot(1,$T(,($j), $i+1:$j-1),1));			
				
				// $T(,$i+1:$j-1,($i))->cdot(1, $F(,($j), $i+1:$j-1),1)
				l = 0;
				for(k = i+1; k < j; k++)
				{
					ind = subscr(j,k);
					diag[l].r = cb[ind].r;
					diag[l++].i = cb[ind].i;
				}
				ind = subscr(i+1,i);
                                // TODO : Humm
				zdotu_(&s.r, &l, &ca[ind].r, &c__1, &diag[0].r, &c__1);
				snum.r += s.r; 
				snum.i += s.i;

				// $F(,$i+1:$j-1,($i))->cdot(1,$T(,($j), $i+1:$j-1),1)
				l = 0;
				for(k = i+1; k < j; k++)
				{
					ind = subscr(j,k);
					diag[l].r = ca[ind].r;
					diag[l++].i = ca[ind].i;
				}
				ind = subscr(i+1,i);
                                // TODO : Humm
				zdotu_(&s.r, &l, &cb[ind].r, &c__1, &diag[0].r, &c__1);
				snum.r -= s.r; 
				snum.i -= s.i;
				ind = subscr(j,i);
			}

		  	//$sdenum = $T(,($j),($j))->Csub($T(,($i),($i)));
			sdenum.r = ca[ind2].r - ca[ind1].r;
			sdenum.i = ca[ind2].i - ca[ind1].i;

			//$s = $s / $sdenum;
			if (fabs (sdenum.r) > fabs (sdenum.i))
			{
				if (sdenum.r == 0)
				{
					$info() = -1;
					goto ENDCTRFUN;
				}
				tt = sdenum.i / sdenum.r;
				dn = sdenum.r + tt * sdenum.i;
				cb[ind].r = (snum.r + tt * snum.i) / dn;
				cb[ind].i = (snum.i - tt * snum.r) / dn;
			}
			else
			{
				if (sdenum.i == 0)
				{
					$info() = -1;
					goto ENDCTRFUN;
				}
				tt = sdenum.r / sdenum.i;
				dn = sdenum.r * tt + sdenum.i;
				cb[ind].r = (snum.r * tt + snum.i) / dn;
				cb[ind].i = (snum.i * tt - snum.r) / dn;
			}
		}
	}
ENDCTRFUN:
	;
#undef subscr

',
);

pp_addpm(<<'EOD');
my $pi;
BEGIN { $pi = pdl(3.1415926535897932384626433832795029) }
sub pi () { $pi->copy };

*sec = \&PDL::sec;
sub PDL::sec{1/cos($_[0])}

*csc = \&PDL::csc;
sub PDL::csc($) {1/sin($_[0])}

*cot = \&PDL::cot;
sub PDL::cot($) {1/(sin($_[0])/cos($_[0]))}

*sech = \&PDL::sech;
sub PDL::sech($){1/pdl($_[0])->cosh}

*csch = \&PDL::csch;
sub PDL::csch($) {1/pdl($_[0])->sinh}

*coth = \&PDL::coth;
sub PDL::coth($) {1/pdl($_[0])->tanh}

*asec = \&PDL::asec;
sub PDL::asec($) {my $tmp = 1/pdl($_[0]) ; $tmp->acos}

*acsc = \&PDL::acsc;
sub PDL::acsc($) {my $tmp = 1/pdl($_[0]) ; $tmp->asin}

*acot = \&PDL::acot;
sub PDL::acot($) {my $tmp = 1/pdl($_[0]) ; $tmp->atan}

*asech = \&PDL::asech;
sub PDL::asech($) {my $tmp = 1/pdl($_[0]) ; $tmp->acosh}

*acsch = \&PDL::acsch;
sub PDL::acsch($) {my $tmp = 1/pdl($_[0]) ; $tmp->asinh}

*acoth = \&PDL::acoth;
sub PDL::acoth($) {my $tmp = 1/pdl($_[0]) ; $tmp->atanh}

my $_tol = 9.99999999999999e-15;

sub toreal{
	return $_[0] if $_[0]->isempty;
	$_tol = $_[1] if defined $_[1];
	my ($min, $max, $tmp);
	($min, $max) = $_[0]->slice('(1)')->minmax;
	return re($_[0])->sever unless (abs($min) > $_tol || abs($max) > $_tol);
	$_[0];
}

=head2 mlog

=for ref

Return matrix logarithm of a square matrix.

=for usage

 PDL = mlog(PDL(A))

=for example

 my $a = random(10,10);
 my $log = mlog($a);

=cut

*mlog = \&PDL::mlog;
sub PDL::mlog {
	my ($m, $tol) = @_;
	my @dims = $m->dims;
	barf("mlog requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );
	mfun($m, sub{$_[0].=log $_[0]} , 0, $tol);
}

=head2 msqrt

=for ref

Return matrix square root (principal) of a square matrix.

=for usage

 PDL = msqrt(PDL(A))

=for example

 my $a = random(10,10);
 my $sqrt = msqrt($a);

=cut

*msqrt = \&PDL::msqrt;

sub PDL::msqrt {
	my ($m, $tol) = @_;
	my @dims = $m->dims;
	barf("msqrt requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	$m = $m->r2C unless @dims == 3;
	my ($t, undef, $z, undef, $info) = $m->mschur(1);
	if ($info){
		warn "msqrt: Can't compute Schur form\n";
		return;		
	}
	($t, $info) = $t->ctrsqrt(0);
	if($info){
		warn "msqrt: can't compute square root\n";
		return;
	}
	$m = $z x $t x $z->t(1);
	return (@dims ==3) ? $m : toreal($m, $tol);
}

=head2 mexp

=for ref

Return matrix exponential of a square matrix.

=for usage

 PDL = mexp(PDL(A))

=for example

 my $a = random(10,10);
 my $exp = mexp($a);

=cut

*mexp = \&PDL::mexp;
sub PDL::mexp {
	my ($m, $order, $trace) = @_;
	my @dims = $m->dims;
	barf("mexp requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my ($info, $ns);
	$info  = PDL::null;
	$ns = PDL::null;
	$trace = 1 unless defined $trace;
	$order = 6 unless defined $order;
	$m = $m->copy;
	@dims == 3 ? $m->xchg(1,2)->cgeexp($order, 1, $trace, $ns, $info) :
			$m->xchg(0,1)->geexp($order, 1, $trace, $ns, $info);
	if ($info){
		warn "mexp: Error $info";
	}
	else{
		return $m;
	}
}

#*mexp2 = \&PDL::mexp2;
#sub PDL::mexp2 {
#	my ($m, $order) = @_;
#	my @dims = $m->dims;
#	barf("mexp requires a 2-D square matrix")
#		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

#	my ($norm, $X, $c, $D, $N, $fact, $cX, $trace);
#	if (defined $order){
#		$order++;
#	}
#	else{
#		$order = 8;
#	}
	

	# Trace normalization
#	$m = $m->copy;
#	$trace = $m->diag->sumover / $m->dim(1);

#	if (@dims == 3){
#		$trace = $trace->im->r2C if ($trace->re < 0);
#		$m->diagonal(1,2) .= $m->diagonal(1,2) - $trace;	
#	}
#	elsif ($trace > 0){
#		$m->diagonal(0,1) -= $trace;
#	}
	



	# Scale M
#	$norm = $m->mnorm(0);
#	$norm = $norm > 0 ?  PDL::floor(1 + ($norm->log / log(2))) : 0;
#	$norm = 0 unless $norm > 0;
#	$m = $m / 2**$norm if $norm > 0;

#	$X = $m;
#	$N = $m / 2;
#	$D = -$m / 2;
#	$c = 0.5;
#	if (@dims == 3){
#		$N->re->diagonal(0,1)++;
#		$D->re->diagonal(0,1)++;
#	}
#	else{
#		$N->diagonal(0,1)++;
#		$D->diagonal(0,1)++;
#	}
#
#	for ($fact = 2; $fact <= $order;$fact++){
#		# Padé coeff
#		$c = $c * ($order - $fact + 1 ) / ($fact * (2 * $order - $fact +1));
#
#		if (@dims == 3){
#			$X = PDL::cmmult($m, $X);
#			$cX = PDL::Complex::Cmul($X, PDL::Complex::r2C(PDL->pdl($c)));
#		}
#		else{
#			$X = PDL::mmult($m, $X);
#			$cX = PDL::mult($X, PDL->pdl($c),0);
#		}
#		$N = PDL::plus($N,$cX,0);
#		$D = ($fact % 2) ? PDL::minus($D,$cX,0) :
#			PDL::plus($D,$cX,0);
#	}	
#
#	$X = PDL::msolvex($D,$N, equilibrate=>1);
#
#	# Squaring
#	if($norm > 0){
#		for(1..$norm){
#			$X x= $X;
#		}
#	}
#	
#	# Reverse trace normalization	
#	$X = $trace->exp * $X if (@dims == 3 || $trace > 0);
#	$X;
#}


*mexpts = \&PDL::mexpts;
sub PDL::mexpts {
	my ($m, $order, $tol) = @_;
	my @dims = $m->dims;
	barf("mexp1 requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my ($em, $trm);
	$order = 20 unless defined $order;
	$em = (@dims == 3 ) ? diag(r2C ones($dims[1])) : diag(ones($dims[1]));
	$trm = $em->copy;
	for (1..($order - 1)){
		$trm =  $trm x ($m / $_);
	        $em += $trm;
	}
	return (@dims ==3) ? $em : toreal($em, $tol);
}

=head2 mpow

=for ref

Return matrix power of a square matrix.

=for usage

 PDL = mpow(PDL(A), SCALAR(exponent))

=for example

 my $a = random(10,10);
 my $powered = mpow($a,2.5);

=cut

#TODO: improve it (really crappy)

*mpow = \&PDL::mpow;
sub PDL::mpow {
	my ($m, $power, $tol, $eigen) = @_;
	my @dims = $m->dims;
	barf("mpow requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my $ret;
	if (UNIVERSAL::isa($power,'PDL') and $power->dims > 1){
		my ($e, $v) = $m->meigen(0,1);
		$ret = $v * $e->Cpow($power) x $v->minv;
	}
	elsif( 1/$dims[-1] * 1000 > abs($power)  and !$eigen){
		$ret = $m;
		my $pow = floor($power);
		$pow++ if ($power < 0 and $power != $pow);
                
                # TODO: what a beautiful thing (is it a game ?)
		for(my $i = 1; $i < abs($pow); $i++){	$ret x= $m;}
		$ret = $ret->minv if $power < 0;
		if ($power = $power - $pow){
			if($power == 0.5){
				my $v = $m->msqrt;
				$ret = ($pow == 0) ? $v : $ret x $v;
			}
			else{
				my ($e, $v) = $m->meigen(0,1);
				$ret = ($pow == 0) ? ($v * $e**$power x $v->minv) : $ret->r2C x ($v * $e**$power x $v->minv);
			}			
		}
	}
	else{
		my ($e, $v) = $m->meigen(0,1);
		$ret = $v * $e**$power x $v->minv;
	}
	return (@dims ==3) ? $ret : toreal($ret, $tol);

}

=head2 mcos

=for ref

Return matrix cosine of a square matrix.

=for usage

 PDL = mcos(PDL(A))

=for example

 my $a = random(10,10);
 my $cos = mcos($a);

=cut

*mcos = \&PDL::mcos;
sub PDL::mcos {
	my $m = shift;
	my @dims = $m->dims;
	barf("mcos requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	return (@dims == 3) ? (mexp(i*$m) + mexp(- i*$m)) / 2 : re(mexp(i*$m))->sever;
}

=head2 macos

=for ref

Return matrix inverse cosine of a square matrix.

=for usage

 PDL = macos(PDL(A))

=for example

 my $a = random(10,10);
 my $acos = macos($a);

=cut

*macos = \&PDL::macos;
sub PDL::macos {
	my ($m, $tol) = @_;
	my @dims = $m->dims;
	barf("macos requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my ($id, $ret);
	$id = PDL::zeroes $m;
	(@dims == 3) ? $id->slice('(0),:,:')->diagonal(0,1) .= 1 : $id->diagonal(0,1) .= 1;
	$ret = i * mlog( ($m->r2C - i * msqrt( ($id - $m x $m), $tol)));
	return (@dims ==3) ? $ret : toreal($ret, $tol);
}

=head2 msin

=for ref

Return matrix sine of a square matrix.

=for usage

 PDL = msin(PDL(A))

=for example

 my $a = random(10,10);
 my $sin = msin($a);

=cut

*msin = \&PDL::msin;
sub PDL::msin {
	my $m = shift;
	my @dims = $m->dims;
	barf("msin requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	return (@dims == 3) ? (mexp(i*$m) - mexp(- i*$m))/(2*i) : im(mexp(i*$m))->sever;

}

=head2 masin

=for ref

Return matrix inverse sine of a square matrix.

=for usage

 PDL = masin(PDL(A))

=for example

 my $a = random(10,10);
 my $asin = masin($a);

=cut

*masin = \&PDL::masin;
sub PDL::masin {
	my ($m, $tol) = @_;
	my @dims = $m->dims;
	barf("masin requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my ($ret, $id);
	$id = PDL::zeroes $m;
	(@dims == 3) ? $id->slice('(0),:,:')->diagonal(0,1) .= 1 : $id->diagonal(0,1) .= 1;
	$ret = (- i) * mlog(((i*$m) + msqrt($id - $m x $m, $tol)));
	return (@dims ==3) ? $ret : toreal($ret, $tol);
}

=head2 mtan

=for ref

Return matrix tangent of a square matrix.

=for usage

 PDL = mtan(PDL(A))

=for example

 my $a = random(10,10);
 my $tan = mtan($a);

=cut

*mtan = \&PDL::mtan;
sub PDL::mtan {
	my ($m, $id) = @_;
	my @dims = $m->dims;
	barf("mtan requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	return scalar msolvex(mcos($m), msin($m),equilibrate=>1) unless $id;

	
	if (@dims == 3){
		$id = PDL::zeroes $m;
		$id->slice('(0),:,:')->diagonal(0,1) .= 1;
		$m = mexp(-2*i*$m);
		return scalar msolvex( ($id + $m ),( (- i) * ($id - $m)),equilibrate=>1);
	}
	else{
		$m = mexp(i * $m);
		return scalar $m->re->msolvex($m->im,equilibrate=>1);
	}

}

=head2 matan

=for ref

Return matrix inverse tangent of a square matrix.

=for usage

 PDL = matan(PDL(A))

=for example

 my $a = random(10,10);
 my $atan = matan($a);

=cut

*matan = \&PDL::matan;
sub PDL::matan {
	my ($m, $tol) = @_;
	my @dims = $m->dims;
	barf("matan requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my ($id, $ret);
	$id = PDL::zeroes($m)->r2C;
	$id->re->diagonal(0,1) .= 1;
	$ret = - i/2 * mlog( scalar PDL::msolvex( ($id - i*$m) ,($id + i*$m),equilibrate=>1 ));
	return (@dims ==3) ? $ret : toreal($ret, $tol);
}

=head2 mcot

=for ref

Return matrix cotangent of a square matrix.

=for usage

 PDL = mcot(PDL(A))

=for example

 my $a = random(10,10);
 my $cot = mcot($a);

=cut

*mcot = \&PDL::mcot;
sub PDL::mcot {
	my ($m, $id) = @_;
	my @dims = $m->dims;
	barf("mcot requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	return scalar msolvex(msin($m),mcos($m),equilibrate=>1) unless $id;

	if (@dims == 3){
		$id = PDL::zeroes $m;
		$id->slice('(0),:,:')->diagonal(0,1) .= 1;
		$m = mexp(-2*i*$m);
		return  scalar msolvex( ($id - $m ),(  i * ($id + $m)),equilibrate=>1);
	}
	else{
		$m = mexp(i * $m);
		return scalar $m->im->msolvex($m->re,equilibrate=>1);
	}
}

=head2 macot

=for ref

Return matrix inverse cotangent of a square matrix.

=for usage

 PDL = macot(PDL(A))

=for example

 my $a = random(10,10);
 my $acot = macot($a);

=cut

*macot = \&PDL::macot;
sub PDL::macot {
	my ($m, $tol, $id) = @_;
	my @dims = $m->dims;
	barf("macot requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my ($inv, $info) = $m->minv;
	if ($info){
		warn "macot: singular matrix";
		return;
	}
	return matan($inv,$tol);
}

=head2 msec

=for ref

Return matrix secant of a square matrix.

=for usage

 PDL = msec(PDL(A))

=for example

 my $a = random(10,10);
 my $sec = msec($a);

=cut

*msec = \&PDL::msec;
sub PDL::msec {
	my $m = shift;
	my @dims = $m->dims;
	barf("msec requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	return (@dims == 3) ? PDL::minv(mexp(i+$m) + mexp(- i*$m)) * 2 : scalar PDL::minv(re(mexp(i*$m)));
}

=head2 masec

=for ref

Return matrix inverse secant of a square matrix.

=for usage

 PDL = masec(PDL(A))

=for example

 my $a = random(10,10);
 my $asec = masec($a);

=cut

*masec = \&PDL::masec;
sub PDL::masec {
	my ($m, $tol) = @_;
	my @dims = $m->dims;
	barf("masec requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my ($inv, $info) = $m->minv;
	if ($info){
		warn "masec: singular matrix";
		return;
	}
	return macos($inv,$tol);
}

=head2 mcsc

=for ref

Return matrix cosecant of a square matrix.

=for usage

 PDL = mcsc(PDL(A))

=for example

 my $a = random(10,10);
 my $csc = mcsc($a);

=cut

*mcsc = \&PDL::mcsc;
sub PDL::mcsc {
	my $m = shift;
	my @dims = $m->dims;
	barf("mcsc requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	return (@dims == 3) ? PDL::minv(mexp(i*$m) - mexp(- i*$m)) * 2*i : scalar PDL::minv(im(mexp(i*$m)));

}

=head2 macsc

=for ref

Return matrix inverse cosecant of a square matrix.

=for usage

 PDL = macsc(PDL(A))

=for example

 my $a = random(10,10);
 my $acsc = macsc($a);

=cut

*macsc = \&PDL::macsc;
sub PDL::macsc {
	my ($m, $tol) = @_;
	my @dims = $m->dims;
	barf("macsc requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my ($inv, $info) = $m->minv;
	if ($info){
		warn "macsc: singular matrix";
		return;
	}
	return masin($inv,$tol);
}

=head2 mcosh

=for ref

Return matrix hyperbolic cosine of a square matrix.

=for usage

 PDL = mcosh(PDL(A))

=for example

 my $a = random(10,10);
 my $cos = mcosh($a);

=cut

*mcosh = \&PDL::mcosh;

sub PDL::mcosh {
	my $m  = shift;
	my @dims = $m->dims;
	barf("mcosh requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );
	( $m->mexp + mexp(-$m) )/2;
}

=head2 macosh

=for ref

Return matrix hyperbolic inverse cosine of a square matrix.

=for usage

 PDL = macosh(PDL(A))

=for example

 my $a = random(10,10);
 my $acos = macosh($a);

=cut

*macosh = \&PDL::macosh;

sub PDL::macosh {
	my ($m, $tol)  =  @_;
	my @dims = $m->dims;
	barf("macosh requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my ($id, $ret);
	$id = PDL::zeroes $m;
	(@dims == 3) ? $id->slice('(0),:,:')->diagonal(0,1) .= 1 : $id->diagonal(0,1) .= 1;
	$ret = msqrt($m x $m - $id);
	$m = $m->r2C if $ret->getndims > @dims;
	mlog($m + $ret, $tol);
}

=head2 msinh

=for ref

Return matrix hyperbolic sine of a square matrix.

=for usage

 PDL = msinh(PDL(A))

=for example

 my $a = random(10,10);
 my $sinh = msinh($a);

=cut

*msinh = \&PDL::msinh;

sub PDL::msinh {
	my $m  = shift;
	my @dims = $m->dims;
	barf("msinh requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );
	( $m->mexp - mexp(-$m) )/2;
}

=head2 masinh

=for ref

Return matrix hyperbolic inverse sine of a square matrix.

=for usage

 PDL = masinh(PDL(A))

=for example

 my $a = random(10,10);
 my $asinh = masinh($a);

=cut

*masinh = \&PDL::masinh;

sub PDL::masinh {
	my ($m, $tol)  = @_;
	my @dims = $m->dims;
	barf("masinh requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );
	my ($id, $ret);
	$id = PDL::zeroes $m;
	(@dims == 3) ? $id->slice('(0),:,:')->diagonal(0,1) .= 1 : $id->diagonal(0,1) .= 1;
	$ret = msqrt($m x $m + $id);
	$m = $m->r2C if $ret->getndims > @dims;	
	mlog(($m + $ret), $tol);

}

=head2 mtanh

=for ref

Return matrix hyperbolic tangent of a square matrix.

=for usage

 PDL = mtanh(PDL(A))

=for example

 my $a = random(10,10);
 my $tanh = mtanh($a);

=cut

*mtanh = \&PDL::mtanh;

sub PDL::mtanh {
	my ($m, $id)  = @_;
	my @dims = $m->dims;
	barf("mtanh requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	return scalar msolvex(mcosh($m), msinh($m),equilibrate=>1) unless $id;

	$id = PDL::zeroes $m;
	(@dims == 3) ? $id->slice('(0),:,:')->diagonal(0,1) .= 1 : $id->diagonal(0,1) .= 1;
	$m = mexp(-2*$m);
	return  scalar msolvex( ($id + $m ),($id - $m), equilibrate=>1);
}

=head2 matanh

=for ref

Return matrix hyperbolic inverse tangent of a square matrix.

=for usage

 PDL = matanh(PDL(A))

=for example

 my $a = random(10,10);
 my $atanh = matanh($a);

=cut

*matanh = \&PDL::matanh;

sub PDL::matanh {
	my ($m, $tol)  = @_;
	my @dims = $m->dims;
	barf("matanh requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my ($id, $ret);
	$id = PDL::zeroes $m;
	(@dims == 3) ? $id->slice('(0),:,:')->diagonal(0,1) .= 1 : $id->diagonal(0,1) .= 1;
	mlog( scalar msolvex( ($id - $m ),($id + $m),equilibrate=>1), $tol ) / 2;
}

=head2 mcoth

=for ref

Return matrix hyperbolic cotangent of a square matrix.

=for usage

 PDL = mcoth(PDL(A))

=for example

 my $a = random(10,10);
 my $coth = mcoth($a);

=cut

*mcoth = \&PDL::mcoth;

sub PDL::mcoth {
	my ($m, $id)  = @_;
	my @dims = $m->dims;
	barf("mcoth requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );
	
	scalar msolvex(msinh($m), mcosh($m),equilibrate=>1) unless $id;
	$id = PDL::zeroes $m;
	(@dims == 3) ? $id->slice('(0),:,:')->diagonal(0,1) .= 1 : $id->diagonal(0,1) .= 1;
	$m = mexp(-2*$m);
	return  scalar msolvex( ($id - $m ),($id + $m),equilibrate=>1);
}

=head2 macoth

=for ref

Return matrix hyperbolic inverse cotangent of a square matrix.

=for usage

 PDL = macoth(PDL(A))

=for example

 my $a = random(10,10);
 my $acoth = macoth($a);

=cut

*macoth = \&PDL::macoth;

sub PDL::macoth {
	my ($m, $tol)  = @_;
	my @dims = $m->dims;
	barf("macoth requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my ($inv, $info) = $m->minv;
	if ($info){
		warn "macoth: singular matrix";
		return;
	}
	return matanh($inv,$tol);
}

=head2 msech

=for ref

Return matrix hyperbolic secant of a square matrix.

=for usage

 PDL = msech(PDL(A))

=for example

 my $a = random(10,10);
 my $sech = msech($a);

=cut

*msech = \&PDL::msech;

sub PDL::msech {
	my $m  = shift;
	my @dims = $m->dims;
	barf("msech requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );
	PDL::minv( $m->mexp + mexp(-$m) ) * 2;
}

=head2 masech

=for ref

Return matrix hyperbolic inverse secant of a square matrix.

=for usage

 PDL = masech(PDL(A))

=for example

 my $a = random(10,10);
 my $asech = masech($a);

=cut

*masech = \&PDL::masech;

sub PDL::masech {
	my ($m, $tol)  = @_;
	my @dims = $m->dims;
	barf("masech requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my ($inv, $info) = $m->minv;
	if ($info){
		warn "masech: singular matrix";
		return;
	}
	return macosh($inv,$tol);
}

=head2 mcsch

=for ref

Return matrix hyperbolic cosecant of a square matrix.

=for usage

 PDL = mcsch(PDL(A))

=for example

 my $a = random(10,10);
 my $csch = mcsch($a);

=cut

*mcsch = \&PDL::mcsch;

sub PDL::mcsch {
	my $m  = shift;
	my @dims = $m->dims;
	barf("mcsch requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );
	PDL::minv( $m->mexp - mexp(-$m) ) * 2;
}

=head2 macsch

=for ref

Return matrix hyperbolic inverse cosecant of a square matrix.

=for usage

 PDL = macsch(PDL(A))

=for example

 my $a = random(10,10);
 my $acsch = macsch($a);

=cut

*macsch = \&PDL::macsch;

sub PDL::macsch {
	my ($m, $tol)  = @_;
	my @dims = $m->dims;
	barf("macsch requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

	my ($inv, $info) = $m->minv;
	if ($info){
		warn "macsch: singular matrix";
		return;
	}
	return masinh($inv,$tol);
}

=head2 mfun

=for ref

Return matrix function of second argument of a square matrix.
Function will be applied on a PDL::Complex object.

=for usage

 PDL = mfun(PDL(A),'cos')

=for example

 my $a = random(10,10);
 my $fun = mfun($a,'cos');
 sub sinbycos2{
 	$_[0]->set_inplace(0);
 	$_[0] .= $_[0]->Csin/$_[0]->Ccos**2;
 }
 # Try diagonalization
 $fun = mfun($a, \&sinbycos2,1);
 # Now try Schur/Parlett
 $fun = mfun($a, \&sinbycos2);
 # Now with function.
 scalar msolve($a->mcos->mpow(2), $a->msin);

=cut

*mfun = \&PDL::mfun;

sub PDL::mfun {
	my ($m, $method, $diag, $tol)  = @_;
	my @dims = $m->dims;
	barf("mfun requires a 2-D square matrix")
		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );
	
	if ($diag){
		my ($e, $v, $inv, $info);
		($e, $v) = $m->meigen(0,1);
	
		($inv, $info) = $v->minv;
		unless ($info){
			$method = 'PDL::Complex::'.$method unless ref($method);
			eval {$v = ($v * $e->$method) x $v->minv;};
			if ($@){
				warn "mfun: Error $@\n";
				return;
			}
		}
		else{
			warn "mfun: Non invertible matrix in computation of $method\n";
			return;
		}
		return (@dims ==3) ? $v : toreal($v, $tol);
	}
	else{
		$m = $m->r2C unless @dims == 3;
		my ($t, undef, $z, undef, $info) = $m->mschur(1);

		if ($info){
			warn "mfun: Can't compute Schur form\n";
			return;		
		}

		$method = 'PDL::Complex::'.$method unless ref($method);
		($t, $info) = $t->ctrfun(0,$method);
		if($info){
			warn "mfun: Can't compute $method\n";
			return;
		}
		$m = $z x $t x $z->t(1);
		return (@dims ==3) ? $m : toreal($m, $tol);	
	
	}
}

#*mspfun = \&PDL::mspfun;

#sub PDL::mspfun {
#	my ($m, $method, $tol)  = @_;
#	my @dims = $m->dims;
#	barf("mspfun requires a 2-D square matrix")
#		unless( ((@dims == 2) || (@dims == 3)) && $dims[-1] == $dims[-2] );

#	my ($T, $Z, $F, $p, $i, $j, $sden, $s );
#	($T, undef, $Z) = $m->r2C->mschur(1);
 	
#	$F = $T->diagonal(1,2)->$method->diag;
#	for $p (1..($dims[-1] - 1 )){
#		for $i (0..($dims[-1]-$p-1)){
#			$j = $i + $p;
#			$s = $T(,($j),($i))->Cmul($F(,($j),($j))->Csub($F(,($i),($i))));
#			if ($i < ($j-1)){
#				$s = $s + $T(,$i+1:$j-1,($i))->cdot(1, $F(,($j), $i+1:$j-1),1)->Csub($F(,$i+1:$j-1,($i))->cdot(1,$T(,($j), $i+1:$j-1),1));
#			}
#			$sden = $T(,($j),($j))->Csub($T(,($i),($i)));
#			if ($sden != 0){
#	                	$s = $s / $sden;
#  			}
#                       else{
#				barf "Illegal division by zero occured\n";
#                       }
#			$F(,($j),($i)) .= $s;
#        	}
#	}
#	print $F;
#	$m = $Z x $F x $Z->t(1);
#	return (@dims ==3) ? $m : toreal($m, $tol);
#
#}

=head1 TODO

Improve error return and check singularity.
Improve (msqrt,mlog) / r2C

=head1 AUTHOR

Copyright (C) Grégory Vanuxem 2005-2007.

This library is free software; you can redistribute it and/or modify
it under the terms of the artistic license as specified in the Artistic
file.

=cut


EOD

pp_done();

1;