The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Bio::Phylo::PhyloWS::Service::Timetree;
use strict;
use utf8;
use base 'Bio::Phylo::PhyloWS::Service';
use Bio::Phylo::IO 'parse';
use Bio::Phylo::Factory;
use Bio::Phylo::Util::Logger;
use Bio::Phylo::Util::Exceptions 'throw';
use Bio::Phylo::Util::CONSTANT qw'looks_like_hash looks_like_instance';
use Bio::Phylo::Util::Dependency qw'LWP::UserAgent HTML::TreeBuilder::XPath URI::Escape';
use constant URL => 'http://timetree.org/time_query.php?';
use constant NCBI => 'http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=';

# http://localhost/nexml/service/timetree/phylows/tree/find?query=dcterms.identifier=9606%20and%20dcterms.identifier=9597
{
    my $fac    = Bio::Phylo::Factory->new;
    my $logger = Bio::Phylo::Util::Logger->new;
    
    $logger->VERBOSE(
        '-level' => 4,
        '-class' => 'Bio::Phylo::Parsers::Timetree',
    );
    
    my $parse_html = sub {
        my ( $content ) = @_;
        my $tree = HTML::TreeBuilder::XPath->new;
        $tree->parse_content($content);
        my ($title) = $tree->findnodes(q{//td[@class='titleBlack']/strong});
        my $str = $title->as_text;
        $str =~ s/^(\d+\.\d+).*$/$1/;
        return $str;
    };
    
    my $get_query_result_raw = sub {
        my ( $self, $taxon_a, $taxon_b ) = @_;
        my $lua = LWP::UserAgent->new;
        my $url = URL . "taxon_a=${taxon_a}&taxon_b=${taxon_b}";
        my $response = $lua->get($url);
        if ( $response->is_success ) {
            my $content = $response->content;
            utf8::decode($content);
            return $content;
        }
        else {
            throw 'NetworkError' => $response->status_line;
        }
    };
    
    my $create_project = sub {
        my ($name_a,$name_b) = @_;
        my $project = $fac->create_project;
        my $forest  = $fac->create_forest;
        my $taxa    = $fac->create_taxa;        
        my $taxona  = $fac->create_taxon(
            '-name' => $name_a,
            '-link' => NCBI . $name_a,
            '-guid' => $name_a,
        );        
        my $taxonb  = $fac->create_taxon(
            '-name' => $name_b,
            '-link' => NCBI . $name_b,
            '-guid' => $name_b,
        );
        
        $project->insert( $taxa, $forest );
        $taxa->insert( $taxona, $taxonb );
        $forest->set_taxa($taxa);
        return ( $project, $taxona, $taxonb, $forest );
    };

=head1 NAME

Bio::Phylo::PhyloWS::Service::Timetree - PhyloWS service wrapper for Timetree

=head1 SYNOPSIS

 # inside a CGI script:
 use CGI;
 use Bio::Phylo::PhyloWS::Service::Timetree;

 my $cgi = CGI->new;
 my $service = Bio::Phylo::PhyloWS::Service::Timetree->new( '-url' => $url );
 $service->handle_request($cgi);

=head1 DESCRIPTION

This is an example implementation of a PhyloWS service. The service
wraps around the timetree web site (using screen scraping) and returns 
project objects that include a tree for every search result.

=head1 METHODS

=head2 ACCESSORS

=over

=item get_query_result()

Gets a phylows cql query result

 Type    : Accessor
 Title   : get_query_result
 Usage   : my $result = $obj->get_query_result( $query );
 Function: Gets a query result 
 Returns : Bio::Phylo::Project
 Args    : Required: $query

=cut

    sub get_query_result {
        my ( $self, $query ) = @_;
        my ( $taxon_a, $taxon_b );
        
        # clean up CQL query
        $query = URI::Escape::uri_unescape($query);
        if ( $query =~ m/^\D*(\d+)\D+(\d+)/ ) {
            ( $taxon_a, $taxon_b ) = ( $1, $2 );    
        }                

        # download timetree result
        my $content = $get_query_result_raw->( $self, $taxon_a, $taxon_b );

        # parse timetree result
        my $age = $parse_html->( $content );

        # populate project
        my ( $project, $taxona, $taxonb, $forest ) = $create_project->($taxon_a,$taxon_b);
        my $tree = $fac->create_tree;
        my $tree_root = $fac->create_node;
        
        my $node_a = $fac->create_node(
            '-branch_length' => $age,
            '-parent'        => $tree_root,
            '-taxon'         => $taxona,
            '-name'          => $taxona->get_name,
            '-link'          => NCBI . $taxona->get_name,
            '-guid'          => $taxona->get_name,
        );
        
        my $node_b = $fac->create_node(
            '-branch_length' => $age,
            '-parent'        => $tree_root,
            '-taxon'         => $taxonb,
            '-name'          => $taxonb->get_name,
            '-link'          => NCBI . $taxonb->get_name,
            '-guid'          => $taxonb->get_name,
        );
        
        $tree->insert( $tree_root, $node_a, $node_b );
        $forest->insert($tree);

        # return project
        return $project;
    }

=item get_supported_formats()

Gets an array ref of supported formats

 Type    : Accessor
 Title   : get_supported_formats
 Usage   : my @formats = @{ $obj->get_supported_formats };
 Function: Gets an array ref of supported formats
 Returns : [ qw(nexml nexus newick html) ]
 Args    : NONE

=cut

    sub get_supported_formats { [qw(nexml nexus newick html)] }

=item get_redirect()

Gets a redirect URL if relevant

 Type    : Accessor
 Title   : get_redirect
 Usage   : my $url = $obj->get_redirect;
 Function: Gets a redirect URL if relevant
 Returns : String
 Args    : $cgi
 Comments: This method is called by handle_request so that
           services can 303 redirect a record lookup to 
           another URL. By default, this method returns 
           undef (i.e. no redirect), but if this implementation
           is called to handle a request that specifies 
           'format=html' the request is forwarded to the
           appropriate page on the http://timetree.org website

=cut

    sub get_redirect {
        my ( $self, $cgi ) = @_;
        if ( $cgi->param('format') eq 'html' ) {
            my $query = $cgi->param('query');
            $query = URI::Escape::uri_unescape($query);
            my ( $taxon_a, $taxon_b );
            if ( $query =~ m/^\D*(\d+)\D+(\d+)/ ) {
                ( $taxon_a, $taxon_b ) = ( $1, $2 );    
            }            
            my $url = URL . "taxon_a=${taxon_a}&taxon_b=${taxon_b}";
            return $url;
        }
        return;
    }

=back

=cut

    # podinherit_insert_token

=head1 SEE ALSO

There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> 
for any user or developer questions and discussions.

Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>

=head1 CITATION

If you use Bio::Phylo in published research, please cite it:

B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
I<BMC Bioinformatics> B<12>:63.
L<http://dx.doi.org/10.1186/1471-2105-12-63>

=cut

}
1;