The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
######################################################
# Tree.pm
######################################################
# Author:  Weigang Qiu, Chengzhi Liang, Peter Yang, Thomas Hladish
# $Id: Tree.pm,v 1.62 2007/09/21 23:09:09 rvos Exp $

#################### START POD DOCUMENTATION ##################

=head1 NAME

Bio::NEXUS::Tree - Provides functions for manipulating trees

=head1 SYNOPSIS

new Bio::NEXUS::Tree;

=head1 DESCRIPTION

Provides a few useful functions for trees.

=head1 FEEDBACK

All feedback (bugs, feature enhancements, etc.) are all greatly appreciated. There are no mailing lists at this time for the Bio::NEXUS::Tree module, so send all relevant contributions to Dr. Weigang Qiu (weigang@genectr.hunter.cuny.edu).

=head1 AUTHORS

 Eugene Melamud (melamud@carb.nist.gov)
 Thomas Hladish (tjhladish at yahoo)
 Weigang Qiu (weigang@genectr.hunter.cuny.edu)
 Chengzhi Liang (liangc@umbi.umd.edu)
 Peter Yang (pyang@rice.edu)

=head1 METHODS

=cut

package Bio::NEXUS::Tree;

use strict;
use Bio::NEXUS::Functions;
use Bio::NEXUS::Node;
#use Data::Dumper; # XXX this is not used, might as well not import it!
#use Carp;
use Bio::NEXUS::Util::Exceptions;
use Bio::NEXUS::Util::Logger;
use vars qw($VERSION $AUTOLOAD);
use Bio::NEXUS; $VERSION = $Bio::NEXUS::VERSION;

my $logger = Bio::NEXUS::Util::Logger->new();

=head2 new

 Title   : new
 Usage   : $tree = new Bio::NEXUS::Tree();
 Function: Creates a new Bio::NEXUS::Tree object
 Returns : Bio::NEXUS::Tree object
 Args    : none

=cut

sub new {
    my ($class) = @_;
    my $root_node = new Bio::NEXUS::Node;
    my $self = { name => undef, root_node => $root_node };
    bless $self, $class;
    return $self;
}

=head2 clone

 Name    : clone
 Usage   : my $new_tree = $self->clone();
 Function: clone a Bio::NEXUS::Tree (self) object. All the nodes are also cloned. 
 Returns : new Bio::NEXUS::Tree object
 Args    : none

=cut

sub clone {
    my ($self) = @_;
    my $class = ref($self);
    my $newtree = bless( { %{$self} }, $class );

    # clone nodes
    $newtree->set_rootnode( $self->get_rootnode()->clone() );
    return $newtree;
}

=head2 set_rootnode

 Title   : set_rootnode
 Usage   : $tree->set_rootnode($newnode);
 Function: Sets the root node to a new node
 Returns : none
 Args    : root node (Bio::NEXUS::Node object)

=cut

sub set_rootnode {
    my $self    = shift;
    my $newroot = shift;
    $self->{root_node} = $newroot;
}

=head2 get_rootnode

 Title   : get_rootnode
 Usage   : $node = $tree->get_rootnode();
 Function: Returns the tree root node 
 Returns : root node (Bio::NEXUS::Node object)
 Args    : none

=cut

sub get_rootnode {
    my $self = shift;
    if ( defined $self->{'root_node'} ) {
        return $self->{'root_node'};
    }
}

=begin comment

 Title   : _parse_newick
 Usage   : $tree->_parse_newick($tree_string);
 Function: Creates a tree out of the existing tree string
 Returns : none
 Args    : array ref of NEXUS 'words' (a newick tree string that has been parsed by &_parse_nexus_words)

=end comment 

=cut

sub _parse_newick {
    my ( $self, $tree_words ) = @_;

    my $root = $self->get_rootnode();
    $root->_parse_newick($tree_words);
    $self->set_depth();
    $self->determine_cladogram();
    return;
}

=head2 set_name

 Title   : set_name
 Usage   : $tree->set_name($name);
 Function: Sets the tree name
 Returns : none
 Args    : name (string)

=cut

sub set_name {
    my ( $self, $name ) = @_;
    $self->{'name'} = $name;
}

=head2 get_name

 Title   : get_name
 Usage   : $name = $tree->get_name();
 Function: Returns the tree's name
 Returns : name (string) or undef if name doesn't exist
 Args    : none

=cut

sub get_name {
    if ( defined $_[0]->{'name'} ) {
        return $_[0]->{'name'};
    }
    else {
        return undef;
    }
}

=head2 set_as_default

 Title   : set_as_default
 Usage   : $tree->set_as_default();
 Function: assigns is_default variable for this object to 1. (default : 0)
 Returns : none
 Args    : none

=cut

sub set_as_default {
    my $self = shift;
    $self->{'is_default'} = 1;
}

=head2 is_default

 Title   : is_default
 Usage   : $is_default_tree = $tree->is_default();
 Function: check whether the tree is assigned as the default.
 Returns : 0 (false) or 1 (true)
 Args    : none

=cut

sub is_default {
    my $self = shift;
    return $self->{'is_default'};
}

=head2 set_as_unrooted

 Title   : set_as_unrooted
 Usage   : $tree->set_as_unrooted();
 Function: assigns is_unrooted variable for this object to 1. (default : 0)
 Returns : none
 Args    : none

=cut

sub set_as_unrooted {
    my $self = shift;
    $self->{'is_unrooted'} = 1;
}

=head2 is_rooted

 Title   : is_rooted
 Usage   : $is_rooted_tree = $tree->is_rooted();
 Function: Check whether the tree is rooted.
 Returns : 0 (false) or 1 (true)
 Args    : none

=cut

sub is_rooted {
    my $self = shift;
    return !$self->{'is_unrooted'};
}

=head2 determine_cladogram

 Title   : determine_cladogram
 Usage   : $tree->determine_cladogram();
 Function: Determine if a tree is a cladogram or not (that is, whether branch lengths are present)
 Returns : none
 Args    : none

=cut

sub determine_cladogram {
    my $self = shift;
    my $root = $self->get_rootnode();
    if ( $root->find_lengths() ) {
        $self->{'is_cladogram'} = 0;
    }
    else {
        $self->{'is_cladogram'} = 1;
    }
}

=head2 set_output_format 

 Title   : set_output_format 
 Usage   : $tree->set_output_format('STD');  
 Function: Sets the output format for the Tree, (options : STD or NHX)
 Returns : none
 Args    : string: 'STD' or 'NHX'

=cut

sub set_output_format {
    my ( $self, $format ) = @_;
    $self->{'_out_format'} = $format;
}

=head2 get_output_format 

 Title   : get_output_format 
 Usage   : $output_format = $tree->get_output_format();  
 Function: Returns the output format for the Tree, (options : STD or NHX)
 Returns : string: 'STD' or 'NHX'
 Args    : none

=cut

sub get_output_format {
    my ($self) = @_;
    if ( defined $self->{_out_format} ) {
        return $self->{_out_format};
    }
    else {
        my $format = 'STD';
        my $nodes  = $self->get_nodes();
        my @otus;
        for my $node ( @{$nodes} ) {
            if ( $node->{is_nhx} ) {
                $format = 'NHX';
                last;
            }
        }
        $self->{_out_format} = $format;
    }
    return $self->{_out_format};
}

=head2 is_cladogram

 Title   : is_cladogram
 Usage   : &dothis() if $tree->is_cladogram();
 Function: Returns whether tree is a cladogram or not
 Returns : 0 (no) or 1 (yes)
 Args    : none

=cut

sub is_cladogram {
    my $self = shift;
    return $self->{'is_cladogram'};
}

=head2 as_string

 Title   : as_string
 Usage   : $treestring = $tree->as_string();
 Function: Returns the tree as a string
 Returns : tree string (string)
 Args    : none

=cut

sub as_string {
    my $self = shift;
    my $root = $self->get_rootnode();
    my $string;
    $root->to_string( \$string, 0, $self->get_output_format );
    $string =~ s/\,$/\;/;
    return $string;
}

=head2 as_string_inodes_nameless

 Title   : as_string_inodes_nameless
 Usage   : $treestring = $tree->as_string_inodes_nameless();
 Function: Returns the tree as a string without internal node names
 Returns : tree string (string)
 Args    : none

=cut

sub as_string_inodes_nameless {
    my $self = shift;
    my $root = $self->get_rootnode();
    my $string;
    $root->to_string( \$string, 1, $self->get_output_format );
    $string =~ s/\,$/\;/;
    return $string;
}

=head2 get_nodes

 Title   : get_nodes
 Usage   : @nodes = @{$tree->get_nodes()};
 Function: Returns the list of ALL nodes in the tree
 Returns : reference to array of nodes (Bio::NEXUS::Node objects)
 Args    : none

=cut

sub get_nodes {
    my $self = shift;
    my $root = $self->get_rootnode();
    my @nodes;
    my $i = 1;
    $root->walk( \@nodes, \$i );
    $root->set_name('root')
        if !$root->get_name() || $root->get_name() =~ /^inode1/;
    return \@nodes;
}

=head2 get_node_names

 Title   : get_node_names
 Usage   : @otu_names = @{$tree->get_node_names()};
 Function: Returns the list of names of otus (terminal nodes)
 Returns : array ref of node names
 Args    : none

=cut

sub get_node_names {
    my $self  = shift;
    my $nodes = $self->get_nodes();
    my @otus;
    for my $node ( @{$nodes} ) {
        if ( $node->is_otu() ) {
            push @otus, $node->get_name();
        }
    }
    return \@otus;
}

=head2 get_distances

 Title   : get_distances
 Usage   : %distances = %{$tree->get_distances()};
 Function: Finds the distances from the root node for all OTUs
 Returns : reference to a hash of OTU names as keys and distances as values
 Args    : none

=cut

sub get_distances {
    my $self  = shift;
    my $nodes = $self->get_nodes();
    my $root  = $self->get_rootnode();
    my %distances;
    for my $node ( @{$nodes} ) {
        $distances{ $node->get_name() } = $root->get_distance($node);
    }
    return \%distances;
}

=head2 get_tree_length

 Title   : get_tree_length
 Usage   : $tre_length  = $self->get_tree_length;
 Function: Gets the total branch lengths in the tree.
 Returns : total branch length
 Args    : none

=cut

sub get_tree_length {
    my $self = shift;
    my $root = $self->get_rootnode();
    return $root->get_total_length();
}

=head2 get_support_values

 Title   : get_support_values
 Usage   : %bootstraps = %{$tree->get_support_values()};
 Function: Finds all branch support values for all OTUs
 Returns : reference to a hash where OTU names are keys and branch support values are values
 Args    : none

=cut

sub get_support_values {
    my $self  = shift;
    my $nodes = $self->get_nodes();
    my %bootstraps;
    for my $node ( @{$nodes} ) {
        my $boot = $node->get_support_value();
        $bootstraps{ $node->get_name() } = $boot if $boot;
    }
    return \%bootstraps;
}

=begin comment

 Title   : _set_xcoord
 Usage   : $tree->_set_xcoord($xpos,$maxx);
 Function: Determines x coords of OTUs and internal nodes
 Returns : none
 Args    : maximum x (number)

=end comment 

=cut

sub _set_xcoord {
    my ( $self, $maxx, $cladogramMethod ) = @_;
    my $xcoord =
        [ { 'node' => '', 'xcoord' => '' }, { 'node' => '', 'xcoord' => '' } ];
    my $root  = $self->get_rootnode();
    my @nodes = @{ $self->get_nodes() };
    if ( $self->is_cladogram() || $cladogramMethod ) {
        $cladogramMethod = 'normal' unless $cladogramMethod;
        my $maxdepth = $self->max_depth();
        my $unit     = $maxx / $maxdepth;
        my @xcoord;
        if ( $cladogramMethod eq "accelerated" ) {
            for my $node (@nodes) {
                if ( $node->is_otu() ) {
                    $node->_set_xcoord( $maxdepth * $unit );
                }
                else {
                    $node->_set_xcoord( $node->get_depth() * $unit );
                }
            }
        }
        elsif ( $cladogramMethod eq "normal" ) {
            my %depth = %{ $self->get_depth() };
            for my $node (@nodes) {
                $node->_set_xcoord( $node->get_depth() * $unit );
            }
        }
    }
    else {
        for my $node (@nodes) {
            $node->_set_xcoord( $root->get_distance($node) );
        }
    }
}

=begin comment

 Title   : _set_ycoord
 Usage   : $tree->_set_ycoord($ypos,$spacing);
 Function: Determines y coords of OTUs and internal nodes
 Returns : none
 Args    : initial y position (number), space between OTUs (number)

=end comment 

=cut

sub _set_ycoord {
    my ( $self, $ypos, $spacing ) = @_;
    my $root = $self->get_rootnode();
    $root->_assign_otu_ycoord( \$ypos, \$spacing );
    $root->_assign_inode_ycoord();
}

=head2 set_depth

 Title   : set_depth
 Usage   : $tree->set_depth();
 Function: Sets depth of root node
 Returns : none
 Args    : none

=cut

sub set_depth {
    my $self = shift;
    my $root = $self->get_rootnode();
    $root->set_depth(0);
}

=head2 get_depth

 Title   : get_depth
 Usage   : %depth=%{$tree->get_depth()};
 Function: Get depth in tree of all OTUs and internal nodes
 Returns : reference to hash with keys = node names and values = depth
 Args    : none

=cut

sub get_depth {
    my $self  = shift;
    my $nodes = $self->get_nodes();
    my %depth;
    for my $node ( @{$nodes} ) {
        my $d = $node->get_depth();
        $depth{ $node->get_name() } = $d if ( $d || ( $d == 0 ) );
    }
    return \%depth;
}

=head2 max_depth

 Title   : max_depth
 Usage   : $maxdepth=%{$tree->max_depth()};
 Function: Get maximum depth of tree
 Returns : integer indicating maximum depth
 Args    : none

=cut

sub max_depth {
    my $self   = shift;
    my %depth  = %{ $self->get_depth() };
    my @sorted = sort { $a <=> $b } values %depth;
    return ( pop @sorted );
}

=head2 find

 Title   : find
 Usage   : $node = $tree->find($name);
 Function: Finds the first occurrence of a node called 'name' in the tree
 Returns : Bio::NEXUS::Node object
 Args    : name (string)

=cut

sub find {
    my ( $self, $name ) = @_;
    my $rootnode = $self->get_rootnode();
    my $node     = $rootnode->find($name);
    return $node;
}

=head2 find_all

 Title   : find_all
 Usage   : @nodes = @{ $tree->find_all($name) };
 Function: find all occurrences of nodes called 'name' in the tree
 Returns : Bio::NEXUS::Node objects
 Args    : name (string)

=cut

sub find_all {
    my $self = shift;
    my @nodes;
    my @all_nodes = @{ $self->get_nodes() };
    my $name      = shift;
    for my $node (@all_nodes) {
        if ( $name eq $node->get_name() ) {
            push( @nodes, $node );
        }
    }
    return \@nodes;
}

=head2 prune

 Name    : prune
 Usage   : $tree->prune($OTUlist);
 Function: Removes everything from the tree except for OTUs specified in $OTUlist
 Returns : none
 Args    : list of OTUs (string)

=cut

sub prune {
    my ( $self, $OTUlist ) = @_;
    $OTUlist = ' ' . $OTUlist . ' ';
    my $rootnode = $self->get_rootnode();
    $rootnode->prune($OTUlist);
}

=head2 equals

 Name    : equals
 Usage   : $tree->equals($another_tree);
 Function: compare if two trees are equivalent in topology
 Returns : 1 if equal or 0 if not
 Args    : another Bio::NEXUS::Tree object

=cut

sub equals {
    my ( $self, $tree ) = @_;

    if ( $self->get_name() ne $tree->get_name() ) { return 0; }
    return $self->get_rootnode()->equals( $tree->get_rootnode() );
}

sub _equals_test {
    my ( $self, $tree ) = @_;

    if ( $self->get_name() ne $tree->get_name() ) { return 0; }
    return $self->get_rootnode()->_equals_test( $tree->get_rootnode() );
}

=head2 reroot

 Name    : reroot
 Usage   : $tree = $tree->reroot($outgroup_name);
 Function: re-root a tree with a node as outgroup
 Returns : 
 Args    : the node name to be used as new outgroup

=cut

sub reroot {
    my ( $self, $outgroup_name, $dist_back_to_newroot ) = @_;
    if ( not defined $outgroup_name ) {
    	Bio::NEXUS::Util::Exceptions::BadArgs->throw(
    		'error' => 'An outgroup name must be supplied as an argument in order to reroot'
    	);
    }

    my $tree = $self->clone();

    # find the current root of the tree
    my $oldroot = $tree->get_rootnode();

    # rename it, since nexplot relies on all nodes having unique names
    &_rename_oldroot( $tree, $oldroot );

    # get the outgroup node
    my $outgroup = $tree->find($outgroup_name);

    # create & name a new node that will become the new root
    my $newroot = new Bio::NEXUS::Node();

    if (   $dist_back_to_newroot
        && $dist_back_to_newroot == $outgroup->get_length() )
    {
        $newroot = $outgroup->get_parent();
        $outgroup->set_length($dist_back_to_newroot);
        $newroot->get_parent()->_rearrange($newroot);
    }
    else {

        # find the node that will (temporarily) become the newroot's parent
        my $outgroup_old_parent = $outgroup->get_parent();

        # get the siblings of the outgroup
        my $newroot_siblings = $outgroup->get_siblings();

        # get the correct branch lengths for newroot and outgroup
        &_position_newroot( $outgroup, $newroot, $dist_back_to_newroot );

        # make outgroup the newroot's child and newroot the outgroup's parent
        $newroot->adopt( $outgroup, 1 );

        # remove the outgroup from the old parent's children
        $outgroup_old_parent->set_children($newroot_siblings);

        # add the newroot as a child
        $outgroup_old_parent->adopt( $newroot, 0 );

# recursively reverse the parent-child relationships between newroot and oldroot
        $outgroup_old_parent->_rearrange($newroot);
    }

    # set newroot's values to make it root
    $newroot->set_name('root');
    $newroot->set_parent_node();
    $newroot->set_support_value();
    $newroot->set_length();
    $newroot->set_depth(0);
    $tree->set_rootnode($newroot);

    # remove oldroot if the tree was bifurcating
    &_remove_oldroot_if_superfluous($oldroot);

    return $tree;
}

sub _rename_oldroot {
    my ( $tree, $oldroot ) = @_;
    my $i               = 0;
    my $renamed_oldroot = 0;
    my $oldroot_name    = 'oldroot';
    while ( $renamed_oldroot == 0 ) {
        if ( !$tree->find("$oldroot_name") ) {
            $oldroot->set_name("$oldroot_name");
            $renamed_oldroot = 1;
        }
        else {
            $oldroot_name = "oldroot" . "$i";
            $i++;
        }
    }
}

sub _position_newroot {
    my ( $outgroup, $newroot, $dist_back_to_newroot ) = @_;
    if ( $outgroup->get_length() ) {
        my $outgroup_length = $outgroup->get_length();
        if ($dist_back_to_newroot) {
            if (   $dist_back_to_newroot < $outgroup_length
                && $dist_back_to_newroot > 0 )
            {
                ## $dist_back_to_newroot should already be negative
                $newroot->set_length(
                    $outgroup_length - $dist_back_to_newroot );
                $outgroup->set_length($dist_back_to_newroot);
            }
            else {
                Bio::NEXUS::Util::Exceptions::BadNumber->throw(
                	'error' => "Branch length error: The new root's position\n"
                			. "up the tree from the outgroup must be a positive\n"
                			. "number less than or equal to the outgroup's branch length.\n"
                );
            }
        }
        else {
            $newroot->set_length( $outgroup_length / 2 );
            $outgroup->set_length( $outgroup_length / 2 );
        }
    }
    else {
        if ($dist_back_to_newroot) {
        	Bio::NEXUS::Util::Exceptions::BadArgs->throw(
        		'error' => "You provided a position for the new root on the\n"
        				. "outgroup's branch length, but the outgroup does\n"
        				. "not have a branch length.\n"
        	);
        }
    }
}

sub _remove_oldroot_if_superfluous {
    my ($oldroot) = @_;
    if ( @{ $oldroot->get_children() } == 1 ) {
        my $oldroot_child = ${ $oldroot->get_children() }[0];
        if (   defined $oldroot->get_length()
            || defined $oldroot_child->get_length() )
        {
            $oldroot_child->set_length(
                $oldroot->get_length() + $oldroot_child->get_length() );
        }
        my $oldroot_parent = $oldroot->get_parent();
        $oldroot_parent->set_children( $oldroot->get_siblings() );
        $oldroot_parent->adopt( $oldroot_child, 0 );
    }
}

=head2 select_subtree 

 Name    : select_subtree
 Usage   : $new_tree_obj = $self->select_subtree($node_name);
 Function: selects the subtree (the given node and all its children) from the tree object.
 Returns : new Bio::NEXUS::Tree object
 Args    : Node name

=cut

sub select_subtree {
    my ( $self, $nodename ) = @_;
    my $newroot  = $self->find($nodename);
    my $treename = $self->get_name();
    if ( not $newroot ) {
    	Bio::NEXUS::Util::Exceptions::BadArgs->throw(
    		'error' => "Node $nodename not found in $treename"
    	);
    }
    $newroot = $newroot->clone();    # need to clone subtree
    $newroot->set_parent_node();     # make it as root
    $newroot->set_support_value();
    $newroot->set_length();
    my $tree = new Bio::NEXUS::Tree();
    $tree->set_name( $self->get_name() );
    $tree->set_rootnode($newroot);
    return $tree;
}

=head2 exclude_subtree

 Name    : exclude_subtree
 Usage   : $new_tree_obj = $self->exclude_subtree($node_name);
 Function: removes the given node and all its children from the tree object.
 Returns : new Bio::NEXUS::Tree object
 Args    : Node name

=cut

sub exclude_subtree {
    my ( $self, $nodename ) = @_;
    my $treename   = $self->get_name();
    my $tree       = $self->clone();
    my $removenode = $tree->find($nodename);
    
    if ( not $removenode ) {
    	Bio::NEXUS::Util::Exceptions::BadArgs->throw(
    		'error' => "Node $nodename not found in $treename"
    	);
    }    

    my $parent   = $removenode->get_parent();
    my @children = @{ $parent->get_children() };
    $parent->set_children();
    for my $child (@children) {
        if ( $child->get_name() ne $removenode->get_name() ) {
            $parent->add_child($child);
        }
    }
    if ( @{ $parent->get_children() } == 1 ) {
        my $sibling = $parent->get_children()->[0];
        $parent->combine($sibling);
    }

    return $tree;
}

=head2 get_mrca_of_otus

 Name    : get_mrca_of_otus
 Usage   : $node = $self->get_mrca_of_otus($otus);
 Function: gets the most recent common ancestor for the input $otus
 Returns : Bio::NEXUS::Node object
 Args    : $otus : Array reference of the OTUs

=cut

sub get_mrca_of_otus {
    my ( $self, $otus) = @_;
    my $root_node = $self->get_rootnode;
   return $root_node->get_mrca_of_otus($otus);
}

sub AUTOLOAD {
    return if $AUTOLOAD =~ /DESTROY$/;
    my $package_name = __PACKAGE__ . '::';

    # The following methods are deprecated and are temporarily supported
    # via a warning and a redirection
    my %synonym_for = (
        "${package_name}node_list"  => "${package_name}get_nodes",
        "${package_name}otu_list"   => "${package_name}get_node_names",
        "${package_name}set_xcoord" => "${package_name}_set_xcoord",
        "${package_name}set_ycoord" => "${package_name}_set_ycoord",
        "${package_name}name"       => "${package_name}get_name",
        "${package_name}set_tree"   => "${package_name}_parse_newick",
    );

    if ( defined $synonym_for{$AUTOLOAD} ) {
        $logger->warn("$AUTOLOAD() is deprecated; use $synonym_for{$AUTOLOAD}() instead");
        goto &{ $synonym_for{$AUTOLOAD} };
    }
    else {
        Bio::NEXUS::Util::Exceptions::UnknownMethod->throw(
        	'error' => "ERROR: Unknown method $AUTOLOAD called"
        );
    }
    return;
}

1;