The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Statistics::Cluto;

use 5.008005;
use strict;
use warnings;
use Carp;

require Exporter;
use AutoLoader;

our @ISA = qw(Exporter);

# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.

# This allows declaration	use Statistics::Cluto ':all';
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.
our %EXPORT_TAGS = ( 'all' => [ qw(
	CLUTO_CLFUN_CLINK
	CLUTO_CLFUN_CLINK_W
	CLUTO_CLFUN_CUT
	CLUTO_CLFUN_E1
	CLUTO_CLFUN_G1
	CLUTO_CLFUN_G1P
	CLUTO_CLFUN_H1
	CLUTO_CLFUN_H2
	CLUTO_CLFUN_I1
	CLUTO_CLFUN_I2
	CLUTO_CLFUN_MMCUT
	CLUTO_CLFUN_NCUT
	CLUTO_CLFUN_RCUT
	CLUTO_CLFUN_SLINK
	CLUTO_CLFUN_SLINK_W
	CLUTO_CLFUN_UPGMA
	CLUTO_CLFUN_UPGMA_W
	CLUTO_COLMODEL_IDF
	CLUTO_COLMODEL_NONE
	CLUTO_CSTYPE_BESTFIRST
	CLUTO_CSTYPE_LARGEFIRST
	CLUTO_CSTYPE_LARGESUBSPACEFIRST
	CLUTO_DBG_APROGRESS
	CLUTO_DBG_CCMPSTAT
	CLUTO_DBG_CPROGRESS
	CLUTO_DBG_MPROGRESS
	CLUTO_DBG_PROGRESS
	CLUTO_DBG_RPROGRESS
	CLUTO_GRMODEL_ASYMETRIC_DIRECT
	CLUTO_GRMODEL_ASYMETRIC_LINKS
	CLUTO_GRMODEL_EXACT_ASYMETRIC_DIRECT
	CLUTO_GRMODEL_EXACT_ASYMETRIC_LINKS
	CLUTO_GRMODEL_EXACT_SYMETRIC_DIRECT
	CLUTO_GRMODEL_EXACT_SYMETRIC_LINKS
	CLUTO_GRMODEL_INEXACT_ASYMETRIC_DIRECT
	CLUTO_GRMODEL_INEXACT_ASYMETRIC_LINKS
	CLUTO_GRMODEL_INEXACT_SYMETRIC_DIRECT
	CLUTO_GRMODEL_INEXACT_SYMETRIC_LINKS
	CLUTO_GRMODEL_NONE
	CLUTO_GRMODEL_SYMETRIC_DIRECT
	CLUTO_GRMODEL_SYMETRIC_LINKS
	CLUTO_MEM_NOREUSE
	CLUTO_MEM_REUSE
	CLUTO_MTYPE_HEDGE
	CLUTO_MTYPE_HSTAR
	CLUTO_MTYPE_HSTAR2
	CLUTO_OPTIMIZER_MULTILEVEL
	CLUTO_OPTIMIZER_SINGLELEVEL
	CLUTO_ROWMODEL_LOG
	CLUTO_ROWMODEL_MAXTF
	CLUTO_ROWMODEL_NONE
	CLUTO_ROWMODEL_SQRT
	CLUTO_SIM_CORRCOEF
	CLUTO_SIM_COSINE
	CLUTO_SIM_EDISTANCE
	CLUTO_SIM_EJACCARD
	CLUTO_SUMMTYPE_MAXCLIQUES
	CLUTO_SUMMTYPE_MAXITEMSETS
	CLUTO_TREE_FULL
	CLUTO_TREE_TOP
	CLUTO_VER_MAJOR
	CLUTO_VER_MINOR
	CLUTO_VER_SUBMINOR
) ] );

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw(
	CLUTO_CLFUN_CLINK
	CLUTO_CLFUN_CLINK_W
	CLUTO_CLFUN_CUT
	CLUTO_CLFUN_E1
	CLUTO_CLFUN_G1
	CLUTO_CLFUN_G1P
	CLUTO_CLFUN_H1
	CLUTO_CLFUN_H2
	CLUTO_CLFUN_I1
	CLUTO_CLFUN_I2
	CLUTO_CLFUN_MMCUT
	CLUTO_CLFUN_NCUT
	CLUTO_CLFUN_RCUT
	CLUTO_CLFUN_SLINK
	CLUTO_CLFUN_SLINK_W
	CLUTO_CLFUN_UPGMA
	CLUTO_CLFUN_UPGMA_W
	CLUTO_COLMODEL_IDF
	CLUTO_COLMODEL_NONE
	CLUTO_CSTYPE_BESTFIRST
	CLUTO_CSTYPE_LARGEFIRST
	CLUTO_CSTYPE_LARGESUBSPACEFIRST
	CLUTO_DBG_APROGRESS
	CLUTO_DBG_CCMPSTAT
	CLUTO_DBG_CPROGRESS
	CLUTO_DBG_MPROGRESS
	CLUTO_DBG_PROGRESS
	CLUTO_DBG_RPROGRESS
	CLUTO_GRMODEL_ASYMETRIC_DIRECT
	CLUTO_GRMODEL_ASYMETRIC_LINKS
	CLUTO_GRMODEL_EXACT_ASYMETRIC_DIRECT
	CLUTO_GRMODEL_EXACT_ASYMETRIC_LINKS
	CLUTO_GRMODEL_EXACT_SYMETRIC_DIRECT
	CLUTO_GRMODEL_EXACT_SYMETRIC_LINKS
	CLUTO_GRMODEL_INEXACT_ASYMETRIC_DIRECT
	CLUTO_GRMODEL_INEXACT_ASYMETRIC_LINKS
	CLUTO_GRMODEL_INEXACT_SYMETRIC_DIRECT
	CLUTO_GRMODEL_INEXACT_SYMETRIC_LINKS
	CLUTO_GRMODEL_NONE
	CLUTO_GRMODEL_SYMETRIC_DIRECT
	CLUTO_GRMODEL_SYMETRIC_LINKS
	CLUTO_MEM_NOREUSE
	CLUTO_MEM_REUSE
	CLUTO_MTYPE_HEDGE
	CLUTO_MTYPE_HSTAR
	CLUTO_MTYPE_HSTAR2
	CLUTO_OPTIMIZER_MULTILEVEL
	CLUTO_OPTIMIZER_SINGLELEVEL
	CLUTO_ROWMODEL_LOG
	CLUTO_ROWMODEL_MAXTF
	CLUTO_ROWMODEL_NONE
	CLUTO_ROWMODEL_SQRT
	CLUTO_SIM_CORRCOEF
	CLUTO_SIM_COSINE
	CLUTO_SIM_EDISTANCE
	CLUTO_SIM_EJACCARD
	CLUTO_SUMMTYPE_MAXCLIQUES
	CLUTO_SUMMTYPE_MAXITEMSETS
	CLUTO_TREE_FULL
	CLUTO_TREE_TOP
	CLUTO_VER_MAJOR
	CLUTO_VER_MINOR
	CLUTO_VER_SUBMINOR
);

our $VERSION = '0.01';

sub AUTOLOAD {
    # This AUTOLOAD is used to 'autoload' constants from the constant()
    # XS function.

    my $constname;
    our $AUTOLOAD;
    ($constname = $AUTOLOAD) =~ s/.*:://;
    croak "&Statistics::Cluto::constant not defined" if $constname eq 'constant';
    my ($error, $val) = constant($constname);
    if ($error) { croak $error; }
    {
	no strict 'refs';
	# Fixed between 5.005_53 and 5.005_61
#XXX	if ($] >= 5.00561) {
#XXX	    *$AUTOLOAD = sub () { $val };
#XXX	}
#XXX	else {
	    *$AUTOLOAD = sub { $val };
#XXX	}
    }
    goto &$AUTOLOAD;
}

sub DESTROY {}

require XSLoader;
XSLoader::load('Statistics::Cluto', $VERSION);

# Preloaded methods go here.

our $MATRIX_TYPE_DENSE = 0;
our $MATRIX_TYPE_SPARSE = 1;

our $NO_OPTIMIZE_SOLUTION = 0;
our $OPTIMIZE_SOLUTION = 1;

sub new {
    my $class = shift;
    my $self = {
        nrows => 0,
        ncols => 0,
        nnz => 0,
        rowptr => [],
        rowind => [],
        rowval => [],

        # global (not method-specific) defaults
        simfun => CLUTO_SIM_COSINE(),
        cstype => CLUTO_CSTYPE_BESTFIRST(),
        rowmodel => CLUTO_ROWMODEL_NONE(),
        colprune => 1.0,
        nnbrs => 40,
        grmodel => CLUTO_GRMODEL_EXACT_SYMETRIC_DIRECT(),
        edgeprune => -1,
        vtxprune => -1,
        mincomponent => 5,
        kwayrefine => $NO_OPTIMIZE_SOLUTION,
        ntrials => 10,
        niter => 10,
        seed => time,
        dbglvl => 0,
        nclusters => 1,
        nfeatures => 5,

        pretty_format => 0,

        @_,
    };

    bless $self, $class;
    return $self;
}

sub set_options {
    my ($self, $opts) = @_;

    while (my ($key, $val) = each(%$opts)) {
        $self->{$key} = $val;
    }
}

#
# matrix loading functions
#

sub set_sparse_matrix {
    my ($self, $nrows, $ncols, $rowval) = @_;

    die ('number of rows does not match') if ($nrows != $#$rowval + 1);

    $self->{matrix_type} = $MATRIX_TYPE_SPARSE;
    $self->{nrows} = $nrows;
    $self->{ncols} = $ncols;
    $self->{nnz} = 0;

    my @rowptr = ();
    my @rowind = ();
    my @rowval = ();
    for (my $rowptr = 0; $rowptr < $nrows; $rowptr++) {
        my $row = $$rowval[$rowptr];
        push @rowptr, $#rowind + 1;
        for (my $j = 0; $j <= $#$row; $j+=2) {
            my $col = $$row[$j];
            die ("inappropriate col#$col in row#".($rowptr+1)) if ($col > $ncols);
            push @rowind, $col - 1;
            push @rowval, $$row[$j + 1];
            $self->{nnz} ++;
        }
    }
    push @rowptr, $#rowind + 1;
    $self->{rowptr} = \@rowptr;
    $self->{rowind} = \@rowind;
    $self->{rowval} = \@rowval;
}

sub set_raw_sparse_matrix {
    my ($self, $nrows, $ncols, $rowptr, $rowind, $rowval) = @_;

    # $$rowptr[$#$rowptr + 1] = $#$rowind + 1 if ($$rowptr[-1] != $#$rowind + 1);
    if ($$rowptr[-1] != $#$rowind + 1 or $#$rowptr != $nrows) {
        die('rowptr not appropriate');
    }

    $self->{matrix_type} = $MATRIX_TYPE_SPARSE;
    $self->{nrows} = $nrows;
    $self->{ncols} = $ncols;
    $self->{nnz} = $#$rowval + 1;

    $self->{rowptr} = $rowptr;
    $self->{rowind} = $rowind;
    $self->{rowval} = $rowval;
}

sub set_dense_matrix {
    my ($self, $nrows, $ncols, $rowval) = @_;

    die ('number of rows does not match') if ($nrows != $#$rowval + 1);

    $self->{matrix_type} = $MATRIX_TYPE_DENSE;
    $self->{nrows} = $nrows;
    $self->{ncols} = $ncols;
    $self->{nnz} = -1;

    my @rowval = ();
    for (my $i = 0; $i <= $#$rowval; $i++) {
        my $row = $$rowval[$i];
        die ('number of cols does not match: row #'.($i+1)) if ($#$row+1 != $self->{ncols});
        push @rowval, @$row;
    }
    $self->{rowval} = \@rowval;
}

sub set_dense_matrix_as_sparse {
    my ($self, $nrows, $ncols, $matrix) = @_;
    my $rowval = [];

    die ('number of rows does not match') if ($nrows != $#$matrix + 1);

    for my $row_n (0..$nrows-1) {
        die ('number of cols does not match: row #'.($row_n+1)) if ($#{$matrix->[$row_n]} + 1 != $ncols);
        $rowval->[$row_n] = [];
        for my $col_n (0..$ncols-1) {
            my $val = $matrix->[$row_n][$col_n];
            if ($val) {
                push @{$rowval->[$row_n]}, $col_n + 1;
                push @{$rowval->[$row_n]}, $val;
            }
        }
    }
    $self->set_sparse_matrix($nrows, $ncols, $rowval);
}


#
# API wrappers
#

sub VP_ClusterDirect {
    my $self = shift;

    # set method-specific defaults
    $self->{crfun} ||= CLUTO_CLFUN_I2();
    $self->{colmodel} ||=
        ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF());

    # init return values
    $self->{part} = [];

    # call xs
    &_VP_ClusterDirect($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{crfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{ntrials}, $self->{niter}, $self->{seed}, $self->{dbglvl}, $self->{nclusters}, $self->{part});

    return $self->{pretty_format} && $self->format_cluster
        || $self->{part};
}

sub VP_ClusterRB {
    my $self = shift;

    # set method-specific defaults
    $self->{crfun} ||= CLUTO_CLFUN_I2();
    $self->{colmodel} ||=
        ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF());

    # init return values
    $self->{part} = [];

    # call xs
    &_VP_ClusterRB($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{crfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{ntrials}, $self->{niter}, $self->{seed}, $self->{cstype}, $self->{kwayrefine}, $self->{dbglvl}, $self->{nclusters}, $self->{part});

    return $self->{pretty_format} && $self->format_cluster
        || $self->{part};
}

sub VA_Cluster {
    my $self = shift;

    # set method-specific defaults
    $self->{crfun} ||= CLUTO_CLFUN_UPGMA();
    $self->{colmodel} ||=
        ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF());

    # init return values
    $self->{part} = [];
    $self->{ptree} = [];
    $self->{tsims} = [];
    $self->{gains} = [];

    # call xs
    &_VA_Cluster($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{crfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{dbglvl}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{tsims}, $self->{gains});

    return $self->{pretty_format} && {
        clusters => $self->format_cluster,
        tree => $self->format_tree
    }
        || map $self->{$_}, qw(part ptree tsims gains);
}

sub VA_ClusterBiased {
    my $self = shift;

    # set method-specific defaults
    $self->{crfun} ||= CLUTO_CLFUN_UPGMA();
    $self->{colmodel} ||=
        ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF());
    $self->{npclusters} = int($self->{nrows}**.5);

    # init return values
    $self->{part} = [];
    $self->{ptree} = [];
    $self->{tsims} = [];
    $self->{gains} = [];

    # call xs
    &_VA_ClusterBiased($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{crfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{dbglvl}, $self->{npclusters}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{tsims}, $self->{gains});

    return $self->{pretty_format} && {
        clusters => $self->format_cluster,
        tree => $self->format_tree
    }
        || map $self->{$_}, qw(part ptree tsims gains);
}

sub SP_ClusterDirect {
    my $self = shift;

    # set method-specific defaults
    $self->{crfun} ||= CLUTO_CLFUN_I2();
    warn ("number of rows not equal to number of cols") if ($self->{nrows} != $self->{ncols});

    # init return values
    $self->{part} = [];

    # call xs
    &_SP_ClusterDirect($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{crfun}, $self->{ntrials}, $self->{niter}, $self->{seed}, $self->{dbglvl}, $self->{nclusters}, $self->{part});

    return $self->{pretty_format} && $self->format_cluster
        || $self->{part};
}

sub SP_ClusterRB {
    my $self = shift;

    # set method-specific defaults
    $self->{crfun} ||= CLUTO_CLFUN_I2();
    warn ("number of rows not equal to number of cols") if ($self->{nrows} != $self->{ncols});

    # init return values
    $self->{part} = [];

    # call xs
    &_SP_ClusterRB($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{crfun}, $self->{ntrials}, $self->{niter}, $self->{seed}, $self->{cstype}, $self->{kwayrefine}, $self->{dbglvl}, $self->{nclusters}, $self->{part});

    return $self->{pretty_format} && $self->format_cluster
        || $self->{part};
}

sub VP_GraphClusterRB {
    my $self = shift;

    # method-specific defaults
    $self->{crfun} ||= CLUTO_CLFUN_CUT();
    $self->{colmodel} ||=
        ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF());

    # init return values
    $self->{part} = [];
    $self->{crvalue} = 0;

    # call xs
    my $rtn = &_VP_GraphClusterRB($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{grmodel}, $self->{nnbrs}, $self->{edgeprune}, $self->{vtxprune}, $self->{mincomponent}, $self->{ntrials}, $self->{seed}, $self->{cstype}, $self->{dbglvl}, $self->{nclusters}, $self->{part}, $self->{crvalue});

    return $self->{pretty_format} && $self->format_cluster
        || [ $rtn, $self->{part}, $self->{crvalue} ];
}

sub SP_GraphClusterRB {
    my $self = shift;

    # method-specific defaults
    $self->{crfun} ||= CLUTO_CLFUN_CUT();
    $self->{colmodel} ||=
        ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF());

    # init return values
    $self->{part} = [];
    $self->{crvalue} = 0;

    # call xs
    my $rtn = &_SP_GraphClusterRB($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{nnbrs}, $self->{edgeprune}, $self->{vtxprune}, $self->{mincomponent}, $self->{ntrials}, $self->{seed}, $self->{cstype}, $self->{dbglvl}, $self->{nclusters}, $self->{part}, $self->{crvalue});

    return $self->{pretty_format} && $self->format_cluster
        || [ $rtn, $self->{part}, $self->{crvalue} ];
}

sub SA_Cluster {
    my $self = shift;

    # set method-specific defaults
    $self->{crfun} ||= CLUTO_CLFUN_UPGMA();

    # init return values
    $self->{part} = [];
    $self->{ptree} = [];
    $self->{tsims} = [];
    $self->{gains} = [];

    # call xs
    &_SA_Cluster($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{crfun}, $self->{dbglvl}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{tsims}, $self->{gains});

    return $self->{pretty_format} && {
        clusters => $self->format_cluster,
        tree => $self->format_tree 
    }
        || map $self->{$_}, qw(part ptree tsims gains);
}

sub V_BuildTree {
    my $self = shift;

    # set method-specific defaults
    $self->{crfun} ||= CLUTO_CLFUN_I2();
    $self->{colmodel} ||=
        ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF());
    $self->{treetype} ||= CLUTO_TREE_TOP();

    # init return values
    $self->{ptree} = [];
    $self->{tsims} = [];
    $self->{gains} = [];

    # call xs
    &_V_BuildTree($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{crfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{treetype}, $self->{dbglvl}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{tsims}, $self->{gains});

    return $self->{pretty_format} && $self->format_tree
        || map $self->{$_}, qw(ptree tsims gains);
}

sub S_BuildTree {
    my $self = shift;

    # set method-specific defaults
    $self->{crfun} ||= CLUTO_CLFUN_I2();
    warn ("number of rows not equal to number of cols") if ($self->{nrows} != $self->{ncols});
    $self->{treetype} ||= CLUTO_TREE_TOP();

    # init return values
    $self->{ptree} = [];
    $self->{tsims} = [];
    $self->{gains} = [];

    # call xs
    &_S_BuildTree($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{crfun}, $self->{treetype}, $self->{dbglvl}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{tsims}, $self->{gains});

    return $self->{pretty_format} && $self->format_tree
        || map $self->{$_}, qw(ptree tsims gains);
}

sub V_GetGraph {
    my $self = shift;

    # set method-specific defaults
    $self->{colmodel} ||=
        ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF());

    # init return values
    $self->{growptr} = [];
    $self->{growind} = [];
    $self->{growval} = [];

    # call xs
    &_V_GetGraph($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{grmodel}, $self->{nnbrs}, $self->{dbglvl}, $self->{growptr}, $self->{growind}, $self->{growval});

    return map $self->{$_}, qw(growptr growind growval);
}

sub S_GetGraph {
    my $self = shift;

    # set method-specific defaults

    # init return values
    $self->{growptr} = [];
    $self->{growind} = [];
    $self->{growval} = [];

    # call xs
    &_S_GetGraph($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{grmodel}, $self->{nnbrs}, $self->{dbglvl}, $self->{growptr}, $self->{growind}, $self->{growval});

    return map $self->{$_}, qw(growptr growind growval);
}

sub V_GetSolutionQuality {
    my $self = shift;

    # set method-specific defaults
    $self->{crfun} ||= CLUTO_CLFUN_I2();
    $self->{colmodel} ||=
        ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF());

    # init return values

    # call xs
    return &_V_GetSolutionQuality($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{crfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{nclusters}, $self->{part});
}

sub S_GetSolutionQuality {
    my $self = shift;

    # set method-specific defaults
    $self->{crfun} ||= CLUTO_CLFUN_I2();
    warn ("number of rows not equal to number of cols") if ($self->{nrows} != $self->{ncols});

    # init return values

    # call xs
    return &_S_GetSolutionQuality($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{crfun}, $self->{nclusters}, $self->{part});
}

sub V_GetClusterStats {
    my $self = shift;

    # set method-specific defaults

    # init return values
    $self->{pwgts} = [];
    $self->{cintsim} = [];
    $self->{cintsdev} = [];
    $self->{izscores} = [];
    $self->{cextsim} = [];
    $self->{cextsdev} = [];
    $self->{ezscores} = [];

    # call xs
    &_V_GetClusterStats($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{nclusters}, $self->{part}, $self->{pwgts}, $self->{cintsim}, $self->{cintsdev}, $self->{izscores}, $self->{cextsim}, $self->{cextsdev}, $self->{ezscores});

    return $self->{pretty_format} && $self->format_cluster_stats
        || map $self->{$_}, qw(pwgts cintsim cintsdev izscores cextsim cextsdev ezscores);
}

sub S_GetClusterStats {
    my $self = shift;

    # set method-specific defaults
    warn ("number of rows not equal to number of cols") if ($self->{nrows} != $self->{ncols});

    # init return values
    $self->{pwgts} = [];
    $self->{cintsim} = [];
    $self->{cintsdev} = [];
    $self->{izscores} = [];
    $self->{cextsim} = [];
    $self->{cextsdev} = [];
    $self->{ezscores} = [];

    # call xs
    &_S_GetClusterStats($self->{matrix_type}, $self->{nrows}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{nclusters}, $self->{part}, $self->{pwgts}, $self->{cintsim}, $self->{cintsdev}, $self->{izscores}, $self->{cextsim}, $self->{cextsdev}, $self->{ezscores});

    return $self->{pretty_format} && $self->format_cluster_stats
        || map $self->{$_}, qw(pwgts cintsim cintsdev izscores cextsim cextsdev ezscores);
}

sub V_GetClusterFeatures {
    my $self = shift;

    # init return values
    $self->{internalids} = [];
    $self->{internalwgts} = [];
    $self->{externalids} = [];
    $self->{externalwgts} = [];

    # call xs
    &_V_GetClusterFeatures($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{nclusters}, $self->{part}, $self->{nfeatures}, $self->{internalids}, $self->{internalwgts}, $self->{externalids}, $self->{externalwgts});

    return $self->{pretty_format} && $self->format_cluster_features
        || map $self->{$_}, qw(internalids internalwgts externalids externalwgts);
}

sub V_GetClusterSummaries {
    my $self = shift;

    # set method-specific defaults
    $self->{colmodel} ||=
        ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF());
    $self->{sumtype} ||= CLUTO_SUMMTYPE_MAXCLIQUES();

    # init return values
    $self->{r_nsum} = undef;
    $self->{r_spid} = [];
    $self->{r_swgt} = [];
    $self->{r_sumptr} = [];
    $self->{r_sumind} = [];

    # call xs
    &_V_GetClusterSummaries($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{nclusters}, $self->{part}, $self->{sumtype}, $self->{nfeatures}, $self->{r_nsum}, $self->{r_spid}, $self->{r_swgt}, $self->{r_sumptr}, $self->{r_sumind});


    return $self->{pretty_format} && $self->format_cluster_summaries
        || map $self->{$_}, qw(r_nsum r_spid r_swgt r_sumptr r_sumind);
}

sub V_GetTreeStats {
    my $self = shift;

    # set method-specific defaults
    $self->{colmodel} ||=
        ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF());
    warn ("ptree not set or size does not equal to 2*nclusters") if ($#{$self->{ptree}}+1 != $self->{nclusters}*2);

    # init return values
    $self->{pwgts} = [];
    $self->{cintsim} = [];
    $self->{cextsim} = [];

    # call xs
    &_V_GetTreeStats($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{pwgts}, $self->{cintsim}, $self->{cextsim});


    return $self->{pretty_format} && $self->format_tree_stats
        || map $self->{$_}, qw(pwgts cintsim cextsim);
}

sub V_GetTreeFeatures {
    my $self = shift;

    # set method-specific defaults
    $self->{colmodel} ||=
        ($self->{simfun} == CLUTO_SIM_CORRCOEF() ? CLUTO_COLMODEL_NONE() : CLUTO_COLMODEL_IDF());
    warn ("ptree not set or size does not equal to 2*nclusters")
        if ($#{$self->{ptree}}+1 != $self->{nclusters}*2);

    # init return values
    $self->{internalids} = [];
    $self->{internalwgts} = [];
    $self->{externalids} = [];
    $self->{externalwgts} = [];

    # call xs
    &_V_GetTreeFeatures($self->{matrix_type}, $self->{nrows}, $self->{ncols}, $self->{nnz}, $self->{rowptr}, $self->{rowind}, $self->{rowval}, $self->{simfun}, $self->{rowmodel}, $self->{colmodel}, $self->{colprune}, $self->{nclusters}, $self->{part}, $self->{ptree}, $self->{nfeatures}, $self->{internalids}, $self->{internalwgts}, $self->{externalids}, $self->{externalwgts});


    return $self->{pretty_format} && $self->format_tree_features
        || map $self->{$_}, qw(internalids internalwgts externalids externalwgts);
}




#
# for prertty_format option
#

sub format_cluster {
    my $self = shift;

    my $clusters = [];
    for my $i (0..$self->{nrows}-1) {
        push @{$clusters->[$self->{part}->[$i]]}, {
            row => $i,
            rowlabel => $self->{rowlabels}->[$i]
        } if ($self->{part}->[$i] >= 0);
    }
    return $clusters;

#        return [ map {
#            rowlabel => $self->{rowlabels}->[$_],
#            cluster => $self->{part}->[$_]
#        }, (0..$self->{nrows}-1)]
}

sub format_cluster_stats {
    my $self = shift;

    return {
        clusters => [ map {
            pwgt => $self->{pwgts}->[$_],
            cintsim => $self->{cintsim}->[$_],
            cintsdev => $self->{cintsdev}->[$_],
            cextsim => $self->{cextsim}->[$_],
            cextsdev => $self->{cextsdev}->[$_],
        }, (0..$self->{nclusters}-1) ],
        rows => [ map {
            rowlabel => $self->{rowlabels}->[$_],
            izscore => $self->{izscores}->[$_],
            exscore => $self->{ezscores}->[$_]
        }, (0..$self->{nrows}-1) ]
    }
}

sub format_cluster_features {
    my $self = shift;

    return [ map {
        descriptive =>
            [ map {
                internalid => $self->{internalids}->[$_],
                collabel => $self->{collabels}->[$self->{internalids}->[$_]],
                internalwgt => $self->{internalwgts}->[$_]
            }, (($_*$self->{nfeatures})..(($_ + 1)*$self->{nfeatures} - 1)) ],
        discriminating =>
            [ map {
                externalid => $self->{externalids}->[$_],
                collabel => $self->{collabels}->[$self->{externalids}->[$_]],
                externalwgt => $self->{externalwgts}->[$_]
            }, (($_*$self->{nfeatures})..(($_ + 1)*$self->{nfeatures} - 1)) ],
        }, (0..$self->{nclusters}-1)];
}

sub format_cluster_summaries {
    my $self = shift;

    return [ map {
        cluster => $self->{r_spid}->[$_],
        swgt => $self->{r_swgt}->[$_],
        features => [ map $self->{r_sumind}->[$_], ($self->{r_sumptr}->[$_]..($self->{r_sumptr}->[$_+1]-1)) ],
    },  (0..($self->{r_nsum}-1)) ];
}

sub format_tree {
    my $self = shift;

    return [ map {
        parent => $self->{ptree}->[$_],
        tsims => $self->{tsims}->[$_],
        gains => $self->{gains}->[$_]
    }, (0..$#{$self->{ptree}}-1) ];
}

sub format_tree_stats {
    my $self = shift;

    return [ map {
        cintsim => $self->{cintsim}->[$_],
        cextsim => $self->{cextsim}->[$_]
    }, (0..$self->{nclusters}*2-1) ];
}

sub format_tree_features {
    my $self = shift;

    return [ map
        [ map {
            descriptive =>
                [ map {
                    internalid => $self->{internalids}->[$_],
                    collabel => $self->{collabels}->[$self->{internalids}->[$_]],
                    internalwgt => $self->{internalwgts}->[$_]
                },
                  grep defined($self->{internalids}->[$_]),
                  (($_*$self->{nfeatures})..(($_ + 1)*$self->{nfeatures} - 1)) ],
            discriminating =>
                [ map {
                    externalid => $self->{externalids}->[$_],
                    collabel => $self->{collabels}->[$self->{externalids}->[$_]],
                    externalwgt => $self->{externalwgts}->[$_]
                },
                  grep defined($self->{externalids}->[$_]),
                  (($_*$self->{nfeatures})..(($_ + 1)*$self->{nfeatures} - 1)) ],
            }, ($_..$_+$self->{nclusters}-1)]
     , (0..$self->{nclusters}*2-1) ];
}


# Autoload methods go after =cut, and are processed by the autosplit program.

1;
__END__

# Below is stub documentation for your module. You'd better edit it!


=head1 NAME

Statistics::Cluto - Perl binding for CLUTO

=head1 INSTALLATION

Download CLUTO from L<http://glaros.dtc.umn.edu/gkhome/views/cluto>.

Find C<libcluto.a> which matches your environment and place it under
your library path (or specify its path with LIBS option as shown below).

Then do:

   perl Makefile.PL [LIBS='-L/where/to/find/libcluto.a -lcluto']
   make
   make test
   make install

Tested with cluto-2.1.2/Darwin-i386, cluto-2.1.2/Darwin-ppc and
cluto-2.1.1/Linux-i686.


=head1 SYNOPSIS

   use Statistics::Cluto;
   use Data::Dumper;
   
   my $c = new Statistics::Cluto;
   
   $c->set_dense_matrix(4, 5, [
     [8, 8, 0, 3, 2],
     [2, 9, 9, 1, 4],
     [7, 6, 1, 2, 3],
     [1, 7, 8, 2, 1]
   ]);
   $c->set_options({
     rowlabels => [ 'row0', 'row1', 'row2', 'row3' ],
     collabels => [ 'col0', 'col1', 'col2', 'col3', 'col4' ],
     nclusters => 2,
     rowmodel => CLUTO_ROWMODEL_NONE,
     colmodel => CLUTO_COLMODEL_NONE,
     pretty_format => 1,
   });
   
   my $clusters = $c->VP_ClusterRB;
   print Dumper $clusters;
   
   my $cluster_features = $c->V_GetClusterFeatures;
   print Dumper $cluster_features;


=head1 DESCRIPTION

This is a perl binding for CLUTO.
Please refer to the CLUTO's manual sections 5.6 - 5.8 for details
of each function. Basically, Statistics::Cluto has all
corresponding methods for functions described in the manual.

=head2 loading matrix

Initial matrix can be set either via C<set_dense_matrix> or via
C<set_sparse_matrix> method.

   # loading 4x5 dense matrix
   #
   # 1 1 0 1 1
   # 1 0 0 1 0
   # 0 1 1 0 0
   # 0 0 1 0 0
   
   my $c = new Statistics::Cluto;
   my $nrows = 4;
   my $ncols = 5;
   my $rowval = [
     [1, 1, 0, 0, 1],
     [1, 1, 0, 1, 1],
     [1, 0, 1, 1, 0],
     [1, 0, 1, 0, 0]
   ];
   $c->set_dense_matrix($nrows, $ncols, $rowval);


   # loading 4x5 sparse matrix
   #
   # 1 1 0 1 1
   # 1 0 0 1 0
   # 0 1 1 0 0
   # 0 0 1 0 0
   
   my $c = new Statistics::Cluto;
   my $nrows = 4;
   my $ncols = 5;
   my $rowval = [
     [1, 1, 2, 1, 4, 1, 5, 1],
     [1, 1, 4, 1],
     [2, 1, 3, 1],
     [3, 1]
   ];
   $c->set_sparse_matrix($nrows, $ncols, $rowval)

Sparse matrix can also be set with C<set_raw_sparse_matrix>,
using the data format described in the manual section 3.3, Fig 16.

   # loading sparse matrix via set_raw_sparse_matrix()
   #
   # 1 1 0 1 1
   # 1 0 0 1 0
   # 0 1 1 0 0
   # 0 0 1 0 0
   
   my $c = new Statistics::Cluto;
   my $nrows = 4;
   my $ncols = 5;
   my $rowptr = [0, 4, 6, 8, 9];
   my $rowind = [0, 1, 3, 4, 0, 3, 1, 2, 2];
   my $rowval = [1, 1, 1, 1, 1, 1, 1, 1, 1];
   $c->set_raw_sparse_matrix($nrows, $ncols, $rowptr, $rowind, $rowval);

=head2 setting input parameters

Input parameters C<nrows>, C<ncols>, C<rowptr>, C<rowind>, C<rowval> are
set automatically when initial matrix is loaded. All other input
parameters should be set before calling clustering functions via
C<set_options> method. See sections 5.6 - 5.8 for necessary parameters.

   $c->set_options({
       rowlabels => ['row0', 'row1', 'row2', 'row3', 'row4'],
       collabels => ['col0', 'col1', 'col2', 'col3', 'col4'],
       nclusters => 2,
       nfeatures => 2,
       clfun => CLUTO_CLFUN_I2,
       treetype => CLUTO_TREE_TOP,
   });

=head2 calling functions

CLUTO's api functions described in the manual sections from 5.6 to 5.8 can be
called with methods of the same name, but without prefix "CLUTO_".

e.g. C<CLUTO_VP_ClusterDirect> (in section 5.6.1) is named C<VP_ClusterDirect>
in this package.

Routines with a single output parameter will return a single value / arrayref.
Routines with multiple output parameters will return an array, each member of
the array being the output parameters appearing in the same order as the manual.

   # suppose $c is initialized with 5x5 sparse matrix:
   #     col0 ... col4
   # row0: 2 2 0 2 2
   # row1: 2 1 0 1 4
   # row2: 0 2 5 0 0
   # row3: 0 1 6 0 0
   # row4: 2 1 0 3 4
   
   $c->set_options({
       rowlabels => ['row0', 'row1', 'row2', 'row3', 'row4'],
       collabels => ['col0', 'col1', 'col2', 'col3', 'col4'],
       nclusters => 2,
       nfeatures => 2,
   });
   my $part = $c->VP_ClusterDirect;
   
   # $part =   [
   #             '1',
   #             '1',
   #             '0',
   #             '0',
   #             '1'
   #           ];
   
   my ($internalids, $internalwgts, $externalids, $externalwgts) = $c->V_GetClusterFeatures;
   
   # $internalids =
   #           [
   #             '2',
   #             '0',
   #             '4',
   #             '0'
   #           ]
   # $internalwgts =
   #           [
   #             '1',
   #             '0',
   #             '0.598181843757629',
   #             '0.209491595625877'
   #           ]
   # $externalids =
   #           [
   #             '2',
   #             '4',
   #             '2',
   #             '4'
   #           ]
   # $externalwgts =
   #           [
   #             '0.5',
   #             '0.299090921878815',
   #             '0.5',
   #             '0.299090921878815'
   #           ]

Please refer to the manual for the details of the returned data structure.

When C<pretty_format> option is set to 1, results are returned in a single
hashref, and in a (hopefully) little bit more comprehensible way.
Meaning of the returned data should be pretty much self-explanatory.

   # with the same matrix and options as above...
   
   $c->set_options({ pretty_format => 1 });
   my $result = $c->VP_ClusterDirect;
   
   # $result =
   #         [
   #           [
   #             { 'row' => 2, 'rowlabel' => 'row2' },
   #             { 'row' => 3, 'rowlabel' => 'row3' }
   #           ],
   #           [
   #             { 'row' => 0, 'rowlabel' => 'row0' },
   #             { 'row' => 1, 'rowlabel' => 'row1' },
   #             { 'row' => 4, 'rowlabel' => 'row4' }
   #           ]
   #         ];
   
   $result = $c->V_GetClusterFeatures;
   
   # $result =
   #         [
   #           [
   #             {
   #               'discriminating' => [
   #                                     {
   #                                       'externalwgt' => '0.5',
   #                                       'collabel' => 'col2',
   #                                       'externalid' => 2
   #                                     },
   #                                     {
   #                                       'externalwgt' => '0.299090921878815',
   #                                       'collabel' => 'col4',
   #                                       'externalid' => 4
   #                                     }
   #                                   ],
   #               'descriptive' => [
   #                                  {
   #                                    'internalid' => 2,
   #                                    'internalwgt' => '1',
   #                                    'collabel' => 'col2'
   #                                  },
   #                                  {
   #                                    'internalid' => 0,
   #                                    'internalwgt' => '0',
   #                                    'collabel' => 'col0'
   #                                  }
   #                                ]
   #             },
   #             {
   #               'discriminating' => [
   #                                     {
   #                                       'externalwgt' => '0.5',
   #                                       'collabel' => 'col2',
   #                                       'externalid' => 2
   #                                     },
   #                                     {
   #                                       'externalwgt' => '0.299090921878815',
   #                                       'collabel' => 'col4',
   #                                       'externalid' => 4
   #                                     }
   #                                   ],
   #               'descriptive' => [
   #                                  {
   #                                    'internalid' => 4,
   #                                    'internalwgt' => '0.598181843757629',
   #                                    'collabel' => 'col4'
   #                                  },
   #                                  {
   #                                    'internalid' => 0,
   #                                    'internalwgt' => '0.209491595625877',
   #                                    'collabel' => 'col0'
   #                                  }
   #                                ]
   #             }
   #           ]
   #         ];

=head1 Exportable constants

  use Statistics::Cluto qw(:all)

will export all constants defined in C<cluto.h>. (Auto generated by
h2xs).
See section 5 of CLUTO's manual, or cluto.h for details.

  CLUTO_CLFUN_CLINK
  CLUTO_CLFUN_CLINK_W
  CLUTO_CLFUN_CUT
  CLUTO_CLFUN_E1
  CLUTO_CLFUN_G1
  CLUTO_CLFUN_G1P
  CLUTO_CLFUN_H1
  CLUTO_CLFUN_H2
  CLUTO_CLFUN_I1
  CLUTO_CLFUN_I2
  CLUTO_CLFUN_MMCUT
  CLUTO_CLFUN_NCUT
  CLUTO_CLFUN_RCUT
  CLUTO_CLFUN_SLINK
  CLUTO_CLFUN_SLINK_W
  CLUTO_CLFUN_UPGMA
  CLUTO_CLFUN_UPGMA_W
  CLUTO_COLMODEL_IDF
  CLUTO_COLMODEL_NONE
  CLUTO_CSTYPE_BESTFIRST
  CLUTO_CSTYPE_LARGEFIRST
  CLUTO_CSTYPE_LARGESUBSPACEFIRST
  CLUTO_DBG_APROGRESS
  CLUTO_DBG_CCMPSTAT
  CLUTO_DBG_CPROGRESS
  CLUTO_DBG_MPROGRESS
  CLUTO_DBG_PROGRESS
  CLUTO_DBG_RPROGRESS
  CLUTO_GRMODEL_ASYMETRIC_DIRECT
  CLUTO_GRMODEL_ASYMETRIC_LINKS
  CLUTO_GRMODEL_EXACT_ASYMETRIC_DIRECT
  CLUTO_GRMODEL_EXACT_ASYMETRIC_LINKS
  CLUTO_GRMODEL_EXACT_SYMETRIC_DIRECT
  CLUTO_GRMODEL_EXACT_SYMETRIC_LINKS
  CLUTO_GRMODEL_INEXACT_ASYMETRIC_DIRECT
  CLUTO_GRMODEL_INEXACT_ASYMETRIC_LINKS
  CLUTO_GRMODEL_INEXACT_SYMETRIC_DIRECT
  CLUTO_GRMODEL_INEXACT_SYMETRIC_LINKS
  CLUTO_GRMODEL_NONE
  CLUTO_GRMODEL_SYMETRIC_DIRECT
  CLUTO_GRMODEL_SYMETRIC_LINKS
  CLUTO_MEM_NOREUSE
  CLUTO_MEM_REUSE
  CLUTO_MTYPE_HEDGE
  CLUTO_MTYPE_HSTAR
  CLUTO_MTYPE_HSTAR2
  CLUTO_OPTIMIZER_MULTILEVEL
  CLUTO_OPTIMIZER_SINGLELEVEL
  CLUTO_ROWMODEL_LOG
  CLUTO_ROWMODEL_MAXTF
  CLUTO_ROWMODEL_NONE
  CLUTO_ROWMODEL_SQRT
  CLUTO_SIM_CORRCOEF
  CLUTO_SIM_COSINE
  CLUTO_SIM_EDISTANCE
  CLUTO_SIM_EJACCARD
  CLUTO_SUMMTYPE_MAXCLIQUES
  CLUTO_SUMMTYPE_MAXITEMSETS
  CLUTO_TREE_FULL
  CLUTO_TREE_TOP
  CLUTO_VER_MAJOR
  CLUTO_VER_MINOR
  CLUTO_VER_SUBMINOR


=head1 SEE ALSO

http://glaros.dtc.umn.edu/gkhome/views/cluto

=head1 AUTHOR

Ikuhiro IHARA E<lt>tsukue@gmail.comE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2007 by Ikuhiro IHARA

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.5 or,
at your option, any later version of Perl 5 you may have available.


=cut