The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Bio::Phylo::Parsers::Figtree;
use strict;
use base 'Bio::Phylo::Parsers::Abstract';
use Bio::Phylo::Util::CONSTANT qw':namespaces :objecttypes';
use Bio::Phylo::IO 'parse';
use Bio::Phylo::Util::Logger ':levels';

my $ns  = _NS_FIGTREE_;
my $pre = 'fig';

=head1 NAME

Bio::Phylo::Parsers::Figtree - Parser used by Bio::Phylo::IO, no serviceable parts inside

=head1 DESCRIPTION

This module parses annotated trees in NEXUS format as produced by FigTree
(L<http://tree.bio.ed.ac.uk/software/figtree/>), i.e. trees where nodes have
additional 'hot comments' attached to them in the tree description. The
implementation assumes syntax as follows:

 [&minmax={0.1231,0.3254},rate=0.0075583392800736]
 
I.e. the first token inside the comments is an ampersand, the annotations are
comma-separated key/value pairs, where ranges are between curly parentheses.
 
=cut

sub _parse {
    my $self = shift;
	my $fh = $self->_handle;
	
	# first we label all internal nodes with a UID
	my $i = 1;
	my $string = '';
	while(<$fh>) {
		my @parts = split /\)/, $_;
		if ( scalar @parts > 1 ) {
			$parts[$_] .= ')node' . $i++ for 0 .. $#parts - 1;
		}
		$string .= join '', @parts;		
	}
	
	# parse all trees
	my ($forest) = @{ parse( 
		'-format' => 'nexus', 
		'-string' => $string,
		'-as_project' => 1,
	)->get_items(_FOREST_) };
	
	# attach the figtree namespace
	$forest->set_namespaces( $pre => $ns );
	
	# parse out annotated tree description
	my @desc;	
	for(split /\n/, $string) {
		chomp;
		if ( /\s*tree\s\S+?\s=\s\[&(?:U|R)\]\s(.+)/ ) {
			my $desc = $1;
			push @desc, $desc;
		}
	}
	
	$self->_process_annotations($forest, @desc);
	return $forest;
}

sub _process_annotations {
	my ( $self, $forest, @desc ) = @_;
	my $log = $self->_logger;
	
	# visit trees and nodes in reading order
	my $i = 0;
	for my $tree ( @{ $forest->get_entities } ) {
		my $desc = $desc[$i];
		$tree->visit( sub { 
			my $node = shift;
			my $name = $node->get_internal_name;
			$log->info("focal node is $name");
			
			# comment is a figtree processing instruction, starts with [&
			if ( $desc =~ /[\(,\)]\Q$name\E\[&([^]]+)\]/ ) {
				my $annotation = $1;
				$log->debug("found annotation $annotation");
				
				# going to parse annotations
				my %anno = $self->_parse_annotation($annotation);
				
				# attach annotations to focal node
				$self->_attach_annotation( $node, %anno );

			}
			else {
				$log->warn("comment is not a figtree annotation: $desc");
			}
			
		} );
		$i++;
	}	
}

# attach key/value pairs to focal node
sub _attach_annotation {
	my ( $self, $node, %anno ) = @_;
	my $fac = $self->_factory;
	
	# iterate over key/value pairs of figtree annotation
	for my $key ( keys %anno ) {
		my $predicate = $key;
		$predicate =~ s/[^a-zA-Z0-9]//g; # for safe CURIEs
		
		# value is an array reference, i.e. it's a min/max range
		if ( ref $anno{$key} ) {
			$node->add_meta(
				$fac->create_meta(
					'-triple' => { "${pre}:${predicate}_min" => $anno{$key}->[0] }
				)
			);
			$node->add_meta(
				$fac->create_meta(
					'-triple' => { "${pre}:${predicate}_max" => $anno{$key}->[1] }
				)
			);					
		}
		
		# value is a scalar
		else {
			$node->add_meta(
				$fac->create_meta(
					'-triple' => {
						"${pre}:$predicate" => $anno{$key},
					}
				)
			);
		}
	}	
}


# parse fugtree annotation syntax
sub _parse_annotation {
	my ( $self, $string ) = @_;
	my $log = $self->_logger;
	$log->debug("going to parse annotation $string");
	my %anno;
	while($string) {
	
		# there is an equals sign with something in front of it
		if ( $string =~ /^([^=]+)=(.+)$/ ) {
			my ( $key, $remainder ) = ( $1, $2 );
			$log->info("key is $key");
			
			# remainder is between {}, i.e. a range
			if ( $remainder =~ /^{([^}]+)}/ ) {
				my $seq = $1;
				$log->info("value is $seq");
				my @values = split /,/, $seq;
				$anno{$key} = \@values;
				$string = substr $string, length($key) + length($seq) + 3;
				$log->debug("remainder is $string");
			}
			
			# remainder is a scalar
			elsif ( $remainder =~ /^([^,]+),?/ ) {
				my $value = $1;
				$log->info("value is $value");
				$anno{$key} = $value;
				$string = substr $string, length($key) + length($value) + 1;
				$log->debug("remainder is $string");
			}
			$string =~ s/^,//;
		}
	}
	return %anno;
}

# podinherit_insert_token

=head1 SEE ALSO

There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> 
for any user or developer questions and discussions.

=over

=item L<Bio::Phylo::IO>

The figtree parser is called by the L<Bio::Phylo::IO> object.
Look there to learn how to parse phylogenetic data files in general.

=item L<Bio::Phylo::Manual>

Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>

=back

=head1 CITATION

If you use Bio::Phylo in published research, please cite it:

B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
I<BMC Bioinformatics> B<12>:63.
L<http://dx.doi.org/10.1186/1471-2105-12-63>

=cut

1;