The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Bio::PhyloTastic::TNRS;
use strict;
use warnings;
use JSON;
use URI::Escape;
use Data::Dumper;
use LWP::UserAgent;
use Bio::Phylo::IO 'parse';
use Bio::Phylo::Util::CONSTANT ':objecttypes';
use base 'Bio::PhyloTastic';

=head1 NAME

Bio::PhyloTastic::TNRS - Taxonomic Name Reconciliation Service

=head1 SYNOPSIS

 phylotastic TNRS -i <infile> -o <outfile>

=head1 DESCRIPTION

This module calls the TNRS service L<http://api.phylotastic.org/tnrs/>

=head1 OPTIONS AND ARGUMENTS

=over

=item -i infile

An input file. Default is a text file with one name per line. Required.

=item -o outfile

An output file name. If '-', prints output to STDOUT. Required.

=item -d informat

An input format, including NEXUS, Newick, NeXML, PhyloXML, TaxList. Optional.
Default is TaxList (i.e. a simple text file).

=item -s outformat

An output format, including NeXML, TaxList. Optional. Default is TaxList (i.e.
a simple text file).

=item -t timeout

Number of seconds until user agent times out. Optional. Default is 60.

=item -t wait

Number of seconds between polling the TNRS service. Optional. Default is 5.

=back

=cut

# URL for the taxonomic name resolution service
my $TNRS_URL = 'http://128.196.142.27:3000/submit';

# defaults
my $timeout = 60;
my $wait    = 5;
my $serializer = 'taxlist';

sub _get_args {
	return (
		'timeout=i' => \$timeout,
		'wait=i'    => \$wait,
		'deserializer=s' => [ 'taxlist' ],
		'serializer=s'   => \$serializer,
	);
}

sub _run {
	my ( $class, $project ) = @_;
	
	# fetch logger
	my $log = $class->_log;
	
	# get taxa
	my ($taxa) = @{ $project->get_items(_TAXA_) };
	$log->info("extracted taxa");
	
	# fetch names from taxon objects
	my %taxon_for_name = map { $_->get_name => $_ } @{ $taxa->get_entities };
	
	# do the request
	my $result = _fetch_url( $TNRS_URL, 'post', 'query' => join "\n", keys %taxon_for_name ); # this is a redirect
	my $obj = decode_json($result);
	
	# start polling
	while(1) {
		sleep $wait;
		my $result = _fetch_url($obj->{'uri'},'get');
		my $obj = decode_json($result);
		if ( $obj->{'names'} ) {
			$log->debug(Dumper($obj));
			return _process_result($result);
			exit(0);
		}
	}

}

# fetch data from a URL
sub _fetch_url {
	my ( $url, $method, %form ) = @_;
	my $log = __PACKAGE__->_log;
	$log->info("going to fetch $url");
	
	# instantiate user agent
	my $ua = LWP::UserAgent->new;
	$ua->timeout($timeout);
	$log->info("instantiated user agent with timeout $timeout");
	
	# do the request on LWP::UserAgent $ua
	my $response = $ua->$method($url,\%form);
	
	# had a 200 OK
	if ( $response->is_success ) {
		$log->info($response->status_line);
		my $content = $response->decoded_content;
		return $content;
	}
	else {
		$log->error($response->status_line);
		die $response->status_line;
	}	
}

# parses the final TNRS result, maps back to input taxa, creates output
sub _process_result {
	my $content = shift;
	my $log = __PACKAGE__->_log;
	
	# parse result
	my ($tnrs_taxa) = @{ parse(
		'-format' => 'tnrs',
		'-string' => $content,
		'-as_project' => 1,
	)->get_items(_TAXA_) };
	$log->info("retrieved ".$tnrs_taxa->get_ntax. " results");
	
	# identify predicates to write out to adjacency table
	my @predicates;
	for my $meta ( @{ $tnrs_taxa->get_meta } ) {
		if ( my $source = $meta->get_object('tnrs:source') ) {
			push @predicates, "tnrs:${source}";
		}
	}
	
	# return result
	return $tnrs_taxa;
}

1;