The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Bio::Phylo::Forest;
use strict;
use base qw'Bio::Phylo::Listable Bio::Phylo::Taxa::TaxaLinker';
use Bio::Phylo::Util::CONSTANT qw':objecttypes /looks_like/';
use Bio::Phylo::Util::Exceptions 'throw';
use Bio::Phylo::Factory;

=begin comment

This class has no internal state, no cleanup is necessary.

=end comment

=cut

{
    my $logger             = __PACKAGE__->get_logger;
    my $factory            = Bio::Phylo::Factory->new;
    my $CONSTANT_TYPE      = _FOREST_;
    my $CONTAINER_CONSTANT = _PROJECT_;

=head1 NAME

Bio::Phylo::Forest - Container for tree objects

=head1 SYNOPSIS

 use Bio::Phylo::Factory;
 my $fac = Bio::Phylo::Factory->new;
 my $forest = $fac->create_forest;
 my $tree = $fac->create_tree;
 $forest->insert($tree);
 print $forest->to_nexus;

=head1 DESCRIPTION

The Bio::Phylo::Forest object models a set of trees. The object subclasses the
L<Bio::Phylo::Listable> object, so look there for more methods available to
forest objects.

=head1 CALCULATIONS

=over

=item calc_split_frequency()

Calculates frequency of provided split

 Type    : Calculation
 Title   : calc_split_frequency
 Usage   : my $freq = $trees->calc_split_frequency([$node1,$node2]);
 Function: Calculates split frequency
 Returns : Scalar, a number
 Args    : An array of taxon objects, or a taxa object
 Comment :

=cut

    sub calc_split_frequency {
        my ( $self, $arg ) = @_;
        my @trees  = @{ $self->get_entities };
        my $ntrees = scalar @trees;
        if ($ntrees) {
            my $count = 0;
            for my $tree (@trees) {
                $count++ if $tree->is_clade($arg);
            }
            return $count / $ntrees;
        }
        return 0;
    }

=back

=head1 METHODS

=over

=item insert()

Inserts trees in forest.

 Type    : Method
 Title   : insert
 Usage   : $trees->insert( $tree1, $tree2, ... );
 Function: Inserts trees in forest.
 Returns : A Bio::Phylo::Forest object.
 Args    : Trees
 Comment : The last seen tree that is set as default
           becomes the default for the entire forest

=cut

    sub insert {
        my $self = shift;
        if ( $self->can_contain(@_) ) {
            my $seen_default = 0;
            for my $tree ( reverse @_ ) {
                if ( $tree->is_default ) {
                    if ( not $seen_default ) {
                        $seen_default++;
                    }
                    else {
                        $tree->set_not_default;
                    }
                }
            }
            if ($seen_default) {
                if ( my $tree = $self->get_default_tree ) {
                    $tree->set_not_default;
                }
            }
            $self->SUPER::insert(@_);
        }
        else {
            throw 'ObjectMismatch' => "Failed insertion: @_ [in $self]";
        }
    }

=item get_default_tree()

Gets the default tree in the forest.

 Type    : Method
 Title   : get_default_tree
 Usage   : my $tree = $trees->get_default_tree;
 Function: Gets the default tree in the forest.
 Returns : A Bio::Phylo::Forest::Tree object.
 Args    : None
 Comment : If no default tree has been set, 
           returns first tree. 

=cut

    sub get_default_tree {
        my $self  = shift;
        my $first = $self->first;
        for my $tree ( @{ $self->get_entities } ) {
            return $tree if $tree->is_default;
        }
        return $first;
    }

=item check_taxa()

Validates taxon links of nodes in invocant's trees.

 Type    : Method
 Title   : check_taxa
 Usage   : $trees->check_taxa;
 Function: Validates the taxon links of the
           nodes of the trees in $trees
 Returns : A validated Bio::Phylo::Forest object.
 Args    : None

=cut

    sub check_taxa {
        my $self = shift;

        # is linked
        if ( my $taxa = $self->get_taxa ) {
            my %tips;
			
			# build a hash of all the unlinked tips by their names
            TIP: for my $tip ( map { @{ $_->get_terminals } } @{ $self->get_entities } ) {
				next TIP if $tip->get_taxon && $taxa->contains($tip->get_taxon);
                my $name = $tip->get_internal_name;
                if ( not $tips{$name} ) {
                    $tips{$name} = [];
                }
                push @{ $tips{$name} }, $tip;
            }
			
			# build a hash of the available taxa
            my %taxa = map { $_->get_internal_name => $_ } @{ $taxa->get_entities };
			
			# iterate over unlinked tip
            for my $name ( keys %tips ) {
                $logger->debug("linking tip $name");
				
				# tip not seen yet, creating new
                if ( not exists $taxa{$name} ) {
                    $logger->debug("no taxon object for $name yet, instantiating");
                    $taxa->insert( $taxa{$name} = $factory->create_taxon( '-name' => $name ) );
                }
				
				# link tips to newly created taxon
                for my $tip ( @{ $tips{$name} } ) {
                    $tip->set_taxon( $taxa{$name} );
                }
            }
        }

        # not linked
        else {
            for my $tree ( @{ $self->get_entities } ) {
                for my $node ( @{ $tree->get_entities } ) {
                    $node->set_taxon();
                }
            }
        }
        return $self;
    }

=item make_consensus()

Creates a consensus tree.

 Type    : Method
 Title   : make_consensus
 Usage   : my $tree = $obj->make_consensus
 Function: Creates a consensus tree
 Returns : $tree
 Args    : Optional:
	   -fraction => a fraction that specifies the cutoff frequency for including
                    bipartitions in the consensus. Default is 0.5 (MajRule)
	   -branches => 'frequency' or 'average', sets branch lengths to bipartition
	                frequency or average branch length in input trees

=cut

    sub make_consensus {
        my $self     = shift;
        my %args     = looks_like_hash @_;
        my $perc     = $args{'-fraction'} || 0.5;
        my $branches = $args{'-branches'} || 'freq';
        my %seen_partitions;
        my %clade_lengths;
        my $tree_count = 0;
        my $average    = sub {
            my @list = @_;
            my $sum  = 0;
            for my $val (@list) {
                $sum += $val if defined $val;
            }
            my $avg = $sum / scalar @list;
            return $avg;
        };

# here we populate a hash whose keys are strings identifying all bipartitions in all trees
# in the forest. Because we construct these strings by concatenating (with an unlikely
# separator) all tips in that clade after sorting them alphabetically, we will get
# the same string in topologically identical clades across trees. We use these keys
# to keep a running tally of all seen bipartitions.
        for my $tree ( @{ $self->get_entities } ) {
            for my $node ( @{ $tree->get_internals } ) {

           # whoever puts this string in their input tree gets what he deserves!
                my $clade =
                  join '!\@\$%^&****unlikely_clade_separator***!\@\$%^&****',
                  sort { $a cmp $b }
                  map  { $_->get_internal_name } @{ $node->get_terminals };
                $seen_partitions{$clade}++;
                if ( not exists $clade_lengths{$clade} ) {
                    $clade_lengths{$clade} = [];
                }
                push @{ $clade_lengths{$clade} }, $node->get_branch_length;
            }
            for my $tip ( @{ $tree->get_terminals } ) {
                my $clade = $tip->get_internal_name;
                if ( not exists $clade_lengths{$clade} ) {
                    $clade_lengths{$clade} = [];
                }
                push @{ $clade_lengths{$clade} }, $tip->get_branch_length;
            }
            $tree_count++;
        }

# here we remove the seen bipartitions that occur in fewer trees than in the specified
# fraction
        my @by_size =
          sort { $seen_partitions{$b} <=> $seen_partitions{$a} }
          keys %seen_partitions;
        my $largest    = shift @by_size;
        my @partitions = keys %seen_partitions;
        for my $partition (@partitions) {
            if ( ( $seen_partitions{$partition} / $tree_count ) <= $perc ) {
                delete $seen_partitions{$partition};
            }
        }

# we now sort the clade strings by size, which automatically means once we start
# traversing them that we will visit the bipartitions in the right nesting order
        my @sorted = sort { length($b) <=> length($a) } keys %seen_partitions;
        my %seen_nodes;
        my $tree = $factory->create_tree;
        if ( @sorted == 0 ) {
            push @sorted, $largest;
            $seen_partitions{$largest} = $tree_count;
        }
        for my $partition (@sorted) {

            # now create the individual tip names again from the key string
            my @tips =
              split /\Q!\@\$%^&****unlikely_clade_separator***!\@\$%^&****\E/,
              $partition;

            # create the tip object if we haven't done so already
            for my $tip (@tips) {
                if ( not exists $seen_nodes{$tip} ) {
                    my $node = $factory->create_node( '-name' => $tip );
                    if ( $branches =~ /^f/i ) {
                        $node->set_branch_length(1.0);
                        $node->set_generic( 'average_branch_length' =>
                              $average->( @{ $clade_lengths{$tip} } ) );
                    }
                    else {
                        $node->set_branch_length(
                            $average->( @{ $clade_lengths{$tip} } ) );
                        $node->set_generic( 'bipartition_frequency' => 1.0 );
                    }
                    $seen_nodes{$tip} = $node;
                    $tree->insert($node);
                }
            }

            # create the new parent node
            my $new_parent = $factory->create_node();
            if ( $branches =~ /^f/i ) {
                $new_parent->set_branch_length(
                    $seen_partitions{$partition} / $tree_count );
                $new_parent->set_name(
                    $average->( @{ $clade_lengths{$partition} } ) );
            }
            else {
                $new_parent->set_branch_length(
                    $average->( @{ $clade_lengths{$partition} } ) );
                $new_parent->set_name(
                    $seen_partitions{$partition} / $tree_count );
            }
            $tree->insert($new_parent);

# check to see if there is an old parent node: we want to squeeze the new parent
# node between the old parent and its children
            my $old_parent = $seen_nodes{ $tips[0] }->get_parent;
            if ($old_parent) {
                $new_parent->set_parent($old_parent);
            }

            # now assign the new parent to the tips in the current bipartition
            for my $tip (@tips) {
                my $node = $seen_nodes{$tip};
                $node->set_parent($new_parent);
            }
        }

# theoretically, the root length should be 1.0 because this "partition is present
# in all trees. But it's too much trouble to stick :-)
        $tree->get_root->set_branch_length();
        return $tree;
    }

=item make_matrix()

Creates an MRP matrix object.

 Type    : Method
 Title   : make_matrix
 Usage   : my $matrix = $obj->make_matrix
 Function: Creates an MRP matrix object
 Returns : $matrix
 Args    : NONE

=cut

    sub make_matrix {
        my $self   = shift;
        my $taxa   = $self->make_taxa;
        my $matrix = $factory->create_matrix;
        $matrix->set_taxa($taxa);
        my ( %data, @charlabels, @statelabels );
        for my $taxon ( @{ $taxa->get_entities } ) {
            my $datum = $factory->create_datum;
            $datum->set_taxon($taxon);
            $datum->set_name( $taxon->get_name );
            $matrix->insert($datum);
            $data{ $taxon->get_name } = [];
        }
        my $recursion = sub {
            my ( $node, $tree, $taxa, $method ) = @_;
            push @charlabels, $tree->get_internal_name;
            push @statelabels, [ 'outgroup', $node->get_nexus_name ];
            my %tip_values =
              map { $_->get_name => 1 } @{ $node->get_terminals };
            for my $tipname ( map { $_->get_name } @{ $tree->get_terminals } ) {
                $tip_values{$tipname} = 0 if not exists $tip_values{$tipname};
            }
            for my $datumname ( keys %data ) {
                if ( exists $tip_values{$datumname} ) {
                    push @{ $data{$datumname} }, $tip_values{$datumname};
                }
                else {
                    push @{ $data{$datumname} }, '?';
                }
            }
            $method->( $_, $tree, $taxa, $method )
              for grep { $_->is_internal } @{ $node->get_children };
        };
        for my $tree ( @{ $self->get_entities } ) {
			if ( my $root = $tree->get_root ) {
				$recursion->( $root, $tree, $taxa, $recursion );
			}
        }
        for my $datum ( @{ $matrix->get_entities } ) {
			if ( my $data = $data{ $datum->get_name } ) {
				$datum->set_char( $data ) if @{ $data };
			}
        }
        $matrix->set_charlabels( \@charlabels );
        $matrix->set_statelabels( \@statelabels );
        return $matrix;
    }

=item make_taxa()

Creates a taxa block from the objects contents if none exists yet.

 Type    : Method
 Title   : make_taxa
 Usage   : my $taxa = $obj->make_taxa
 Function: Creates a taxa block from the objects contents if none exists yet.
 Returns : $taxa
 Args    : NONE

=cut

    sub make_taxa {
        my $self = shift;
        if ( my $taxa = $self->get_taxa ) {
            return $taxa;
        }
        else {
            my %taxa;
            my $taxa = $factory->create_taxa;
            for my $tree ( @{ $self->get_entities } ) {
                for my $tip ( @{ $tree->get_terminals } ) {
                    my $name = $tip->get_internal_name;
                    if ( not $taxa{$name} ) {
                        $taxa{$name} =
                          $factory->create_taxon( '-name' => $name );
                    }
                }
            }
            if (%taxa) {
                $taxa->insert( map { $taxa{$_} }
                      sort { $a cmp $b } keys %taxa );
            }
            $self->set_taxa($taxa);
            return $taxa;
        }
    }

=item to_newick()

Serializes invocant to newick string.

 Type    : Stringifier
 Title   : to_newick
 Usage   : my $string = $forest->to_newick;
 Function: Turns the invocant forest object 
           into a newick string, one line per tree
 Returns : SCALAR
 Args    : The same arguments as 
           Bio::Phylo::Forest::Tree::to_newick

=cut

    sub to_newick {
        my $self = shift;
        my $newick;
        for my $tree ( @{ $self->get_entities } ) {
            $newick .= $tree->to_newick(@_) . "\n";
        }
        return $newick;
    }

=item to_nexus()

Serializer to nexus format.

 Type    : Format convertor
 Title   : to_nexus
 Usage   : my $data_block = $matrix->to_nexus;
 Function: Converts matrix object into a nexus data block.
 Returns : Nexus data block (SCALAR).
 Args    : Trees can be formatted using the same arguments as those
           passed to Bio::Phylo::Unparsers::Newick. In addition, you
           can provide: 
           
           # as per mesquite's inter-block linking system (default is false):
           -links => 1 (to create a TITLE token, and a LINK token, if applicable)
           
           # rooting is determined based on basal trichotomy. "token" means 'TREE' or 'UTREE'
           # is used, "comment" means [&R] or [&U] is used, "nhx" means [%unrooted=on] or
           # [%unrooted=off] if used, default is "comment"
           -rooting => one of (token|comment|nhx)
           
           # to map taxon names to indices (default is true)
           -make_translate => 1 (autogenerate translation table, overrides -translate => {})
		   
		   # when making a translation table, which index to start (default is
		   # 1, BayesTraits needs 0)
		   -translate_start => 1
 Comments:

=cut

    sub to_nexus {
        my $self = shift;
        my %args = (
			'-rooting'         => 'comment',
			'-make_translate'  => 1,
			'-translate_start' => 1,
			@_
		);
        my %translate;
        my $nexus;

        # make translation table
        if ( $args{'-make_translate'} ) {
            my $i = 0;
            for my $tree ( @{ $self->get_entities } ) {
                for my $node ( @{ $tree->get_terminals } ) {
                    my $name;
                    if ( not $args{'-tipnames'} ) {
                        $name = $node->get_nexus_name;
                    }
                    elsif ( $args{'-tipnames'} =~ /^internal$/i ) {
                        $name = $node->get_nexus_name;
                    }
                    elsif ( $args{'-tipnames'} =~ /^taxon/i
                        and $node->get_taxon )
                    {
                        if ( $args{'-tipnames'} =~ /^taxon_internal$/i ) {
                            $name = $node->get_taxon->get_nexus_name;
                        }
                        elsif ( $args{'-tipnames'} =~ /^taxon$/i ) {
                            $name = $node->get_taxon->get_nexus_name;
                        }
                    }
                    else {
                        $name = $node->get_generic( $args{'-tipnames'} );
                    }
                    $translate{$name} = ( $args{'-translate_start'} + $i++ )
                      if not exists $translate{$name};
                }
            }
            $args{'-translate'} = \%translate;
        }

        # create header
        $nexus = "BEGIN TREES;\n";
        $nexus .=
            "[! Trees block written by "
          . ref($self) . " "
          . $self->VERSION . " on "
          . localtime() . " ]\n";
        if ( $args{'-links'} ) {
            delete $args{'-links'};
            $nexus .= "\tTITLE " . $self->get_nexus_name . ";\n";
            if ( my $taxa = $self->get_taxa ) {
                $nexus .= "\tLINK TAXA=" . $taxa->get_nexus_name . ";\n";
            }
        }

        # stringify translate table
        if ( $args{'-make_translate'} ) {
            delete $args{'-make_translate'};
            $nexus .= "\tTRANSLATE\n";
            my @translate;
            for ( keys %translate ) { $translate[ $translate{$_} - 1 ] = $_ }
            for my $i ( 0 .. $#translate ) {
                $nexus .= "\t\t" . ( $i + 1 ) . " " . $translate[$i];
                if ( $i == $#translate ) {
                    $nexus .= ";\n";
                }
                else {
                    $nexus .= ",\n";
                }
            }
        }

        # stringify trees
        for my $tree ( @{ $self->get_entities } ) {
            if ( $tree->is_rooted ) {
                if ( $args{'-rooting'} =~ /^token$/i ) {
                    $nexus .=
                        "\tTREE "
                      . $tree->get_nexus_name . ' = '
                      . $tree->to_newick(%args) . "\n";
                }
                elsif ( $args{'-rooting'} =~ /^comment$/i ) {
                    $nexus .=
                        "\tTREE "
                      . $tree->get_nexus_name
                      . ' = [&R] '
                      . $tree->to_newick(%args) . "\n";
                }
                elsif ( $args{'-rooting'} =~ /^nhx/i ) {
                    $tree->get_root->set_generic( 'unrooted' => 'off' );
                    if ( $args{'-nhxkeys'} ) {
                        push @{ $args{'-nhxkeys'} }, 'unrooted';
                    }
                    else {
                        $args{'-nhxkeys'} = ['unrooted'];
                    }
                    $nexus .=
                        "\tTREE "
                      . $tree->get_nexus_name . ' = '
                      . $tree->to_newick(%args) . "\n";
                }
            }
            else {
                if ( $args{'-rooting'} =~ /^token$/i ) {
                    $nexus .=
                        "\tUTREE "
                      . $tree->get_nexus_name . ' = '
                      . $tree->to_newick(%args) . "\n";
                }
                elsif ( $args{'-rooting'} =~ /^comment$/i ) {
                    $nexus .=
                        "\tTREE "
                      . $tree->get_nexus_name
                      . ' = [&U] '
                      . $tree->to_newick(%args) . "\n";
                }
                elsif ( $args{'-rooting'} =~ /^nhx/i ) {
                    $tree->get_root->set_generic( 'unrooted' => 'on' );
                    if ( $args{'-nhxkeys'} ) {
                        push @{ $args{'-nhxkeys'} }, 'unrooted';
                    }
                    else {
                        $args{'-nhxkeys'} = ['unrooted'];
                    }
                    $nexus .=
                        "\tTREE "
                      . $tree->get_nexus_name . ' = '
                      . $tree->to_newick(%args) . "\n";
                }
            }
        }

        # done!
        $nexus .= "END;\n";
        return $nexus;
    }

=begin comment

 Type    : Internal method
 Title   : _container
 Usage   : $trees->_container;
 Function:
 Returns : CONSTANT
 Args    :

=end comment

=cut

    sub _container { $CONTAINER_CONSTANT }

=begin comment

 Type    : Internal method
 Title   : _type
 Usage   : $trees->_type;
 Function:
 Returns : CONSTANT
 Args    :

=end comment

=cut

    sub _type { $CONSTANT_TYPE }
    sub _tag  { 'trees' }

=back

=cut

    # podinherit_insert_token

=head1 SEE ALSO

There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> 
for any user or developer questions and discussions.

=over

=item L<Bio::Phylo::Listable>

The forest object inherits from the L<Bio::Phylo::Listable>
object. The methods defined therein are applicable to forest objects.

=item L<Bio::Phylo::Taxa::TaxaLinker>

The forest object inherits from the L<Bio::Phylo::Taxa::TaxaLinker>
object. The methods defined therein are applicable to forest objects.

=item L<Bio::Phylo::Manual>

Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>.

=back

=head1 CITATION

If you use Bio::Phylo in published research, please cite it:

B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
I<BMC Bioinformatics> B<12>:63.
L<http://dx.doi.org/10.1186/1471-2105-12-63>

=cut

}
1;