pp_addpm({At=>'Top'},<<'EOD');

=head1 NAME

PDL::ImageND - useful image processing in N dimensions

=head1 DESCRIPTION

These routines act on PDLs as N-dimensional objects, not as threaded 
sets of 0-D or 1-D objects.  The file is sort of a catch-all for 
broadly functional routines, most of which could legitimately 
be filed elsewhere (and probably will, one day).  

ImageND is not a part of the PDL core (v2.4) and hence must be explicitly
loaded.

=head1 SYNOPSIS

 use PDL::ImageND;

 $b = $a->convolveND($kernel,{bound=>'periodic'});
 $b = $a->rebin(50,30,10);
 
=cut


EOD

pp_addpm({At=>'Bot'},<<'EOD');

=head1 AUTHORS

Copyright (C) Karl Glazebrook and Craig DeForest, 1997, 2003
All rights reserved. There is no warranty. You are allowed
to redistribute this software / documentation under certain
conditions. For details, see the file COPYING in the PDL
distribution. If this file is separated from the PDL distribution,
the copyright notice should be included in the file.

=cut


EOD

# N-dim utilities

pp_addhdr('

/* Compute offset of (x,y,z,...) position in row-major list */

PDL_Indx ndim_get_offset(PDL_Indx* pos, PDL_Indx* dims, PDL_Long ndims) {
   PDL_Long i;
   PDL_Indx result,size;
   size = 1;
   result = 0;
   for (i=0; i<ndims; i++) {
       if (i>0)
          size = size*dims[i-1];
       result = result + pos[i]*size;
   }
   return result;
}

/* Increrement a position pointer array by one row */

void ndim_row_plusplus ( PDL_Indx* pos, PDL_Indx* dims, PDL_Long ndims ) {

    PDL_Long  noescape;
    PDL_Indx i;

    i=1; noescape=1;

    while(noescape) {

       (pos[i])++;

       if (pos[i]==dims[i]) { /* Carry */
          if (i>=(ndims)-1)  {
             noescape = 0; /* Exit */
          }else{
             pos[i]=0;
             i++;
          }
       }else{
          noescape = 0;    /* Exit */
       }
    }
}

');

pp_addpm(<<'EOD');

use Carp;

EOD

pp_add_exported('','kernctr');


pp_def('convolve',Doc=><<'EOD',
=for ref

N-dimensional convolution (Deprecated; use convolveND)

=for usage

$new = convolve $a, $kernel

Convolve an array with a kernel, both of which are N-dimensional.  This 
routine does direct convolution (by copying) but uses quasi-periodic
boundary conditions: each dim "wraps around" to the next higher row in
the next dim.  

This routine is kept for backwards compatibility with earlier scripts; 
for most purposes you want L<convolveND|PDL::ImageND/convolveND> instead:
it runs faster and handles a variety of boundary conditions.

=cut


EOD
        Pars => 'a(m); b(n); indx adims(p); indx bdims(q); [o]c(m);',
        PMCode => '

# Custom Perl wrapper

sub PDL::convolve{
    my($a,$b,$c) = @_;
    barf("Usage: convolve(a(*), b(*), [o]c(*)") if $#_<1 || $#_>2;
    $c = PDL->null if $#_<2;
    &PDL::_convolve_int( $a->clump(-1), $b->clump(-1),
       long([$a->dims]), long([$b->dims]),
       ($c->getndims>1? $c->clump(-1) : $c)
     );
     $c->setdims([$a->dims]);

    if($a->is_inplace) {
      $a .= $c;
      $a->set_inplace(0);
      return $a;
    }
    return $c;
}

',
        Code => '
   PDL_Indx *dimsa = $P(adims);
   PDL_Indx *dimsb = $P(bdims);
   PDL_Indx andims = $SIZE(p);
   PDL_Indx bndims = $SIZE(q);
   PDL_Indx anvals = $SIZE(m);
   PDL_Indx bnvals = $SIZE(n);
   PDL_Indx *pos,*off;
   double cc;

   PDL_Indx i,i2,j,k,n,offcen,cen,ncen,nrow;

   if (andims != bndims)
      barf("Arguments do not have the same dimensionality");
   for(i=0; i<andims; i++)
         if (dimsb[i]>dimsa[i])
             barf("Second argument must be smaller in all dimensions that first"
);

   pos = (PDL_Indx*) malloc( andims * sizeof(PDL_Indx) ); /* Init pos[] */
   if (pos==NULL)
      barf("Out of Memory\n");
   for (i=0; i<andims; i++) /* Zero */
       pos[i]=0;

   /* Find middle pixel in b */
   i=0; nrow = dimsb[0];
   while(i<bnvals) {
      for (j=0; j<nrow; j++) { /* For each row */
           pos[0]=j;

           for(k=0;k<bndims;k++) {       /* Is centre? */
               if (pos[k] != dimsb[k]/2)
                   goto getout_$GENERIC();
           }
           ncen = i;
getout_$GENERIC():    i++;
      }
      pos[0]=0;
      ndim_row_plusplus( pos, dimsb, bndims );
   }

   for (i=0; i<andims; i++) /* Zero */
       pos[i]=0;

   /* Initialise offset array to handle the relative coords efficiently */

   off = (PDL_Indx*) malloc(bnvals*sizeof(PDL_Indx)); /* Offset array */
   if (off==NULL)
      barf("Out of Memory\n");

   i=0;
   while(i<bnvals) {
      n = ndim_get_offset(pos, dimsa, andims); /* Start of row in A */
      for (j=0; j<nrow; j++) { /* Fill row */
           off[i] = n+j;
           if (i==ncen)
              offcen = off[i]; /* Offset to middle */
           i++;
      }
      ndim_row_plusplus( pos, dimsa, andims );
   }

   for(i=0;i<bnvals;i++)    /* Subtract center offset */
       off[i]=offcen-off[i];

   /* Now convolve the data */

    for(i=0; i<anvals; i++) {
        cc = 0;
        for(j=0; j<bnvals; j++) {
            i2 = (i+off[j]+anvals) % anvals ;
            cc += $a( m=> i2 ) * $b(n=>j) ;
        }
        $c(m=>i) = cc;
     }
     free(pos); free(off);

');


pp_add_exported('',"ninterpol");

pp_addpm(<<'EOD');

=head2 ninterpol()

=for ref

N-dimensional interpolation routine

=for sig

 Signature: ninterpol(point(),data(n),[o]value())

=for usage

      $value = ninterpol($point, $data);

C<ninterpol> uses C<interpol> to find a linearly interpolated value in
N dimensions, assuming the data is spread on a uniform grid.  To use
an arbitrary grid distribution, need to find the grid-space point from
the indexing scheme, then call C<ninterpol> -- this is far from
trivial (and ill-defined in general).

See also L<interpND|PDL::Primitive/interpND>, which includes boundary 
conditions and allows you to switch the method of interpolation, but
which runs somewhat slower.

=cut


*ninterpol = \&PDL::ninterpol;

sub PDL::ninterpol {
    use PDL::Math 'floor';
    use PDL::Primitive 'interpol';
    print 'Usage: $a = ninterpolate($point(s), $data);' if $#_ != 1;
    my ($p, $y) = @_;
    my ($ip) = floor($p);
    # isolate relevant N-cube
    $y = $y->slice(join (',',map($_.':'.($_+1),list $ip)));
    for (list ($p-$ip)) { $y = interpol($_,$y->xvals,$y); }
    $y;
}

EOD

pp_def('rebin',Doc=><<'EOD',
=for ref

N-dimensional rebinning algorithm

=for usage

$new = rebin $a, $dim1, $dim2,..;.
$new = rebin $a, $template;
$new = rebin $a, $template, {Norm => 1};

Rebin an N-dimensional array to newly specified dimensions.
Specifying `Norm' keeps the sum constant, otherwise the intensities
are kept constant.  If more template dimensions are given than for the
input pdl, these dimensions are created; if less, the final dimensions
are maintained as they were.

So if C<$a> is a 10 x 10 pdl, then C<rebin($a,15)> is a 15 x 10 pdl,
while C<rebin($a,15,16,17)> is a 15 x 16 x 17 pdl (where the values
along the final dimension are all identical).

Expansion is performed by sampling; reduction is performed by averaging.
If you want different behavior, use L<PDL::Transform::map|PDL::Transform/map>
instead.  PDL::Transform::map runs slower but is more flexible.

=cut


EOD
        Pars => 'a(m); [o]b(n);',
        OtherPars => 'int ns => n',
        PMCode => '

# Custom Perl wrapper

sub PDL::rebin {
    my($a) = shift;
    my($opts) = ref $_[-1] eq "HASH" ? pop : {};
    my(@idims) = $a->dims;
    my(@odims) = ref $_[0] ? $_[0]->dims : @_;
    my($i,$b);
    foreach $i (0..$#odims) {
      if ($i > $#idims) {  # Just dummy extra dimensions
          $a = $a->dummy($i,$odims[$i]);
          next;
      # rebin_int can cope with all cases, but code
      # 1->n and n->1 separately for speed
      } elsif ($odims[$i] != $idims[$i]) {       # If something changes
         if (!($odims[$i] % $idims[$i])) {      # Cells map 1 -> n
               my ($r) = $odims[$i]/$idims[$i];
               $b = $a->mv($i,0)->dummy(0,$r)->clump(2);
         } elsif (!($idims[$i] % $odims[$i])) { # Cells map n -> 1
               my ($r) = $idims[$i]/$odims[$i];
               $a = $a->mv($i,0);
               # -> copy so won\'t corrupt input PDL
               $b = $a->slice("0:-1:$r")->copy;
               foreach (1..$r-1) {
                  $b += $a->slice("$_:-1:$r");
               }
               $b /= $r;
         } else {                               # Cells map n -> m
             &PDL::_rebin_int($a->mv($i,0), $b = null, $odims[$i]);
         }
         $a = $b->mv(0,$i);
      }
    }
    if (exists $opts->{Norm} and $opts->{Norm}) {
      my ($norm) = 1;
      for $i (0..$#odims) {
         if ($i > $#idims) {
              $norm /= $odims[$i];
         } else {
              $norm *= $idims[$i]/$odims[$i];
         }
      }
      return $a * $norm;
    } else {
      # Explicit copy so i) can\'t corrupt input PDL through this link
      #                 ii) don\'t waste space on invisible elements
      return $a -> copy;
    }
}
',
        Code => '
        int ms = $SIZE(m);
        int nv = $PRIV(ns);
      int i;
      double u, d;
      $GENERIC(a) av;
         threadloop %{
          i = 0;
          d = -1;
          loop (n) %{ $b() = 0; %}
          loop (m) %{
              av = $a();
              u = nv*((m+1.)/ms)-1;
              while (i <= u) {
                 $b(n => i) +=  (i-d)*av;
                 d = i;
                 i++;
              }
              if (i < nv) $b(n => i) +=  (u-d)*av;
              d = u;
          %}
      %}
');


pp_addpm(<<'EOD');

=head2 circ_mean_p

=for ref

Calculates the circular mean of an n-dim image and returns
the projection. Optionally takes the center to be used.

=for usage

   $cmean=circ_mean_p($im);
   $cmean=circ_mean_p($im,{Center => [10,10]});

=cut


sub circ_mean_p {
 my ($a,$opt) = @_;
 my ($rad,$sum,$norm);

 if (defined $opt) {
   $rad = long PDL::rvals($a,$opt);
 }
 else {
   $rad = long rvals $a;
 }
 $sum = zeroes($rad->max+1);
 PDL::indadd $a->clump(-1), $rad->clump(-1), $sum; # this does the real work
 $norm = zeroes($rad->max+1);
 PDL::indadd pdl(1), $rad->clump(-1), $norm;       # equivalent to get norm
 $sum /= $norm;
 return $sum;
}

=head2 circ_mean

=for ref

Smooths an image by applying circular mean.
Optionally takes the center to be used.

=for usage

   circ_mean($im);
   circ_mean($im,{Center => [10,10]});

=cut


sub circ_mean {
 my ($a,$opt) = @_;
 my ($rad,$sum,$norm,$a1);

 if (defined $opt) {
   $rad = long PDL::rvals($a,$opt);
 }
 else {
   $rad = long rvals $a;
 }
 $sum = zeroes($rad->max+1);
 PDL::indadd $a->clump(-1), $rad->clump(-1), $sum; # this does the real work
 $norm = zeroes($rad->max+1);
 PDL::indadd pdl(1), $rad->clump(-1), $norm;       # equivalent to get norm
 $sum /= $norm;
 $a1 = $a->clump(-1);
 $a1 .= $sum->index($rad->clump(-1));

 return $a;
}

EOD

pp_add_exported('','circ_mean circ_mean_p');


pp_addpm(<<'EOPM');

=head2 kernctr

=for ref

`centre' a kernel (auxiliary routine to fftconvolve)

=for usage

	$kernel = kernctr($image,$smallk);
	fftconvolve($image,$kernel);

kernctr centres a small kernel to emulate the behaviour of the direct
convolution routines.

=cut


*kernctr = \&PDL::kernctr;

sub PDL::kernctr {
    # `centre' the kernel, to match kernel & image sizes and
    # emulate convolve/conv2d.  FIX: implement with phase shifts
    # in fftconvolve, with option tag
    barf "Must have image & kernel for kernctr" if $#_ != 1;
    my ($imag, $kern) = @_;
    my (@ni) = $imag->dims;
    my (@nk) = $kern->dims;
    barf "Kernel and image must have same number of dims" if $#ni != $#nk;
    my ($newk) = zeroes(double,@ni);
    my ($k,$n,$d,$i,@stri,@strk,@b);
    for ($i=0; $i <= $#ni; $i++) {
	$k = $nk[$i];
	$n = $ni[$i];
	barf "Kernel must be smaller than image in all dims" if ($n < $k);
	$d = int(($k-1)/2);
        $stri[$i][0] = "0:$d,";
        $strk[$i][0] = (-$d-1).":-1,";
        $stri[$i][1] = $d == 0 ? '' : ($d-$k+1).':-1,';
        $strk[$i][1] = $d == 0 ? '' : '0:'.($k-$d-2).',';
    }
    # kernel is split between the 2^n corners of the cube
    my ($nchunk) = 2 << $#ni;
    CHUNK:
      for ($i=0; $i < $nchunk; $i++) {
	my ($stri,$strk);
	for ($n=0, $b=$i; $n <= $#ni; $n++, $b >>= 1) {
        next CHUNK if $stri[$n][$b & 1] eq '';
	  $stri .= $stri[$n][$b & 1];
	  $strk .= $strk[$n][$b & 1];
	}
	chop ($stri); chop ($strk);
	($t = $newk->slice($stri)) .= $kern->slice($strk);
    }
    $newk;
}

EOPM


pp_def(
       'convolveND',
       Doc=><<'EOD',

=for ref

Speed-optimized convolution with selectable boundary conditions

=for usage

$new = convolveND($a, $kernel, [ {options} ]);

Conolve an array with a kernel, both of which are N-dimensional.

If the kernel has fewer dimensions than the array, then the extra array
dimensions are threaded over.  There are options that control the boundary 
conditions and method used.

The kernel's origin is taken to be at the kernel's center.  If your
kernel has a dimension of even order then the origin's coordinates get
rounded up to the next higher pixel (e.g. (1,2) for a 3x4 kernel).
This mimics the behavior of the earlier L<convolve|convolve> and
L<fftconvolve|PDL::FFT/fftconvolve()> routines, so convolveND is a drop-in
replacement for them.


The kernel may be any size compared to the image, in any dimension.

The kernel and the array are not quite interchangeable (as in mathematical
convolution): the code is inplace-aware only for the array itself, and
the only allowed boundary condition on the kernel is truncation.

convolveND is inplace-aware: say C<convolveND(inplace $a ,$k)> to modify
a variable in-place.  You don't reduce the working memory that way -- only
the final memory.

OPTIONS

Options are parsed by PDL::Options, so unique abbreviations are accepted.

=over 3

=item boundary (default: 'truncate')

The boundary condition on the array, which affects any pixel closer
to the edge than the half-width of the kernel.  

The boundary conditions are the same as those accepted by
L<range|PDL::Slices/range>, because this option is passed directly
into L<range|PDL::Slices/range>.  Useful options are 'truncate' (the
default), 'extend', and 'periodic'.  You can select different boundary 
conditions for different axes -- see L<range|PDL::Slices/range> for more 
detail.

The (default) truncate option marks all the near-boundary pixels as BAD if
you have bad values compiled into your PDL and the array's badflag is set. 

=item method (default: 'auto')

The method to use for the convolution.  Acceptable alternatives are
'direct', 'fft', or 'auto'.  The direct method is an explicit
copy-and-multiply operation; the fft method takes the Fourier
transform of the input and output kernels.  The two methods give the
same answer to within double-precision numerical roundoff.  The fft
method is much faster for large kernels; the direct method is faster
for tiny kernels.  The tradeoff occurs when the array has about 400x
more pixels than the kernel.

The default method is 'auto', which chooses direct or fft convolution
based on the size of the input arrays.

=back

NOTES

At the moment there's no way to thread over kernels.  That could/should
be fixed.

The threading over input is cheesy and should probably be fixed:
currently the kernel just gets dummy dimensions added to it to match
the input dims.  That does the right thing tersely but probably runs slower
than a dedicated threadloop.  

The direct copying code uses PP primarily for the generic typing: it includes
its own threadloops.

=cut


EOD
       PMCode => <<'EOD',

use PDL::Options;

# Perl wrapper conditions the data to make life easier for the PP sub.

sub PDL::convolveND {
  my($a0,$k,$opt0) = @_;
  my $inplace = $a0->is_inplace;
  my $a = $a0->new_or_inplace;

 
  barf("convolveND: kernel (".join("x",$k->dims).") has more dims than source (".join("x",$a->dims).")\n")
    if($a->ndims < $k->ndims);
  

  # Coerce stuff all into the same type.  Try to make sense.
  # The trivial conversion leaves dataflow intact (nontrivial conversions
  # don't), so the inplace code is OK.  Non-inplace code: let the existing
  # PDL code choose what type is best.
  my $type;
  if($inplace) {
	$type = $a0->get_datatype;
  } else {
	my $z = $a->flat->index(0) + $k->flat->index(0);
	$type = $z->get_datatype;
  }
  $a = $a->convert($type);
  $k = $k->convert($type);
	

  ## Handle options -- $def is a static variable so it only gets set up once.
  our $def;
  unless(defined($def)) {
    $def = new PDL::Options( {
                              Method=>'a',
                              Boundary=>'t'
                             }
			     );
    $def->minmatch(1);
    $def->casesens(0);
  }

  my $opt = $def->options(PDL::Options::ifhref($opt0));

  ### 
  # If the kernel has too few dimensions, we thread over the other
  # dims -- this is the same as supplying the kernel with dummy dims of
  # order 1, so, er, we do that.
  $k = $k->dummy($a->dims - 1, 1)
    if($a->ndims > $k->ndims);
  my $kdims = pdl($k->dims); 

  ###
  # Decide whether to FFT or directly convolve: if we're in auto mode,
  # choose based on the relative size of the image and kernel arrays.
  my $fft = ( ($opt->{Method} =~ m/^a/i) ?
	       ( $a->nelem > 2500 and ($a->nelem) <= ($k->nelem * 500) ) :
  	       ( $opt->{Method} !~ m/^[ds]/i )
	      );

  ###
  # Pad the array to include boundary conditions
  my $adims = pdl($a->dims);
  my $koff = ($kdims/2)->ceil - 1;

  my $aa = $a->range( -$koff, $adims + $kdims, $opt->{Boundary} )
               ->sever;

  if($fft) {
    #  The eval here keeps conflicts from happening at compile time
    eval "use PDL::FFT" ;

    print "convolveND: using FFT method\n" if($PDL::debug);

    # FFT works best on doubles; do our work there then cast back
    # at the end.  
    $aa = double($aa);
    my $aai = $aa->zeroes;

    my $kk = $aa->zeroes;
    my $kki = $aa->zeroes;
    my $tmp;  # work around new perl -d "feature"
    ($tmp = $kk->range( - ($kdims/2)->floor, $kdims, 'p')) .= $k;
    PDL::fftnd($kk, $kki);
    PDL::fftnd($aa, $aai);

    {
      my($ii) = $kk * $aai   +    $aa * $kki;
      $aa =     $aa * $kk    -   $kki * $aai;
      $aai .= $ii;
    }

    PDL::ifftnd($aa,$aai);
    $a .= $aa->range( $koff, $adims);

  } else {
    print "convolveND: using direct method\n" if($PDL::debug);

    ### The first argument is a dummy to set $GENERIC.	
    &PDL::_convolveND_int( $k->flat->index(0), $k, $aa, $a );

  }


  $a;
}

EOD
  Pars=>'k0()',
  OtherPars=>'SV *k; SV *aa; SV *a;',

Code => <<'EOD'
/*
 * Direct convolution 
 *
 * Because the kernel is usually the smaller of the two arrays to be convolved,
 * we thread kernel-first to keep it in the processor's cache.  The strategy:
 * work on a padded copy of the original image, so that (even with boundary 
 * conditions) the geometry of the kernel is linearly related to the input 
 * array.  Otherwise, follow the path blazed by Karl in convolve(): keep track
 * of the offsets for each kernel element in a flattened original PDL.
 *
 * The first (PP) argument is a dummy that's only used to set the GENERIC()
 * macro.  The other three arguments should all have the same type as the
 * first arguments, and are all passed in as SVs.  They are: the kernel, 
 * the padded copy of the input PDL, and a pre-allocated output PDL.  The 
 * input PDL should be padded by the dimensionality of the kernel.
 *
 */


  short ndims;

  PDL_Indx   *koffs, *koff;        /* the "s<foo>" variables are static  */
  $GENERIC() *kvals, *kval;        /* scratchspace designed to avoid dynamic */
  static PDL_Indx skoffs[256];     /* allocation.  The cost ia about 2k per */
  static $GENERIC() skvals[256];   /* datatype, or about 20k of memory. */

  $GENERIC() *aptr;
  $GENERIC() *aaptr;

  PDL_Indx *ivec;
  static PDL_Indx sivec[16];

  PDL_Indx i,j;

  pdl *k     = PDL->SvPDLV($COMP(k));
  pdl *a =     PDL->SvPDLV($COMP(a));
  pdl *aa =    PDL->SvPDLV($COMP(aa));

  if(!k || !a || !aa) 
    barf("convolveND: Can't convert args to PDLs (should never happen)\n");

  PDL->make_physical(aa);
  PDL->make_physical(a);
  PDL->make_physical(k); 

  ndims = aa->ndims;
  if(ndims != k->ndims || ndims != aa->ndims)
     barf("convolveND: dims don't agree (should never happen)\n");

  /* Allocate scratchpads if necessary */
  /* This is done in boneheaded but safe manner ('coz we can't be sure  */
  /* of the relationship between the size of GENERIC and the size of    */
  /* a pointer).  */
  if(k->nvals <= 256) {
	koffs = skoffs;
	kvals = skvals;
  } else {	
	koffs = (PDL_Indx *)  (PDL->smalloc((STRLEN) (k->nvals * sizeof(PDL_Indx))));
	kvals = ($GENERIC() *)(PDL->smalloc((STRLEN) (k->nvals * sizeof($GENERIC()))));
  }

  if(ndims < 16) {
	ivec = sivec;
  } else {
	ivec = (PDL_Indx *) (PDL->smalloc((STRLEN) (ndims * sizeof(PDL_Indx))));
  }

  if(!ivec || !koffs || !kvals) 
	barf("convolveND: out of memory\n");

  /************************************/
  /* Fill up the koffs & kvals arrays */
  /* koffs gets relative offsets into aa for each kernel value; */
  /* kvals gets the kernel values in the same order (flattened) */
  for(i=0;i<ndims;ivec[i++] = 0) ; 
  koff = koffs;
  kval = kvals;
  j    = 0;                      /* j gets current aa data offset */
  aptr = ($GENERIC() *)k->data + k->nvals - 1;  


  do {
	PDL_Indx acc; 

	*(kval++) = *aptr;     /* Copy kernel value into kernel list */

	*(koff++) = j;	       /* Copy current aa offset into koffs list */

	/* Advance k-vector */
	for(i=0; 
	      (i < ndims) && 
	      (aptr -=  k->dimincs[i]) &&    /* Funky pre-test part of loop */
	      (j    += aa->dimincs[i]) &&    /* Funky pre-test part of loop */
	      (++(ivec[i]) >= k->dims[i]); 
	    i++) {
	  ivec[i] = 0;
	  aptr +=  k->dimincs[i] *  k->dims[i];
	  j    -= aa->dimincs[i] *  k->dims[i];
        }
  } while(i<ndims);

  /******************************/
  /* Now do the actual convolution: for each vector in a,   */
  /* accumulate the appropriate aa-sum and stick it into a. */
  for(i=0;i<ndims;ivec[i++] = 0) ;
  aptr  = a->data;
  aaptr = aa->data;
  do {
	$GENERIC() acc = 0;
	koff = koffs;
	kval = kvals;
	for(i=0;i<k->nvals;i++)
	  acc += aaptr[*(koff++)] * (*(kval++));
	*aptr = acc;

	/* Advance a-vector and aa-vector */
        for(i=0;
	        (i<ndims) &&
		(aptr  +=  a->dimincs[i]) && /* Funky pre-test part of loop */
	        (aaptr += aa->dimincs[i]) && /* Funky pre-test part of loop */
                (++(ivec[i]) >= a->dims[i]);
	    i++) {
	    ivec[i] = 0;
	    aptr  -=  a->dimincs[i] * ( a->dims[i]);
	    aaptr -= aa->dimincs[i] * ( a->dims[i]); /* sic */
	}
  } while(i<ndims);

EOD
);	  

  
pp_done();