The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# -*-Perl-*- Test Harness script for Bioperl
# $Id$

use strict;

BEGIN {
    use lib '.';
    use Bio::Root::Test;

    test_begin(
        
        -requires_modules => [qw( 5.010 DB_File DBI DBD::SQLite )]
    );

    use_ok('Bio::DB::Taxonomy');
    use_ok('Bio::Tree::Tree');
}

my $temp_dir = test_output_dir();

# TODO: run basic tests making sure that a database is not regenerated if
# present or unless forced

ok my $db_flatfile = Bio::DB::Taxonomy->new(
    -source    => 'sqlite',
    -nodesfile => test_input_file('taxdump', 'nodes.dmp'),
    -namesfile => test_input_file('taxdump', 'names.dmp'),
);
isa_ok $db_flatfile, 'Bio::DB::Taxonomy::sqlite';
isa_ok $db_flatfile, 'Bio::DB::Taxonomy';

ok my $db = Bio::DB::Taxonomy->new(
    -source    => 'sqlite',
    -directory => $temp_dir,
    -nodesfile => test_input_file('taxdump', 'nodes.dmp'),
    -namesfile => test_input_file('taxdump', 'names.dmp'),
    -force     => 1,
);

my $id;

# taxid data in the nodes.dmp file should be unique, we ignore repeated values
# if seen

is $db->get_num_taxa, 188;

lives_ok {$id = $db->get_taxonid('Homo sapiens')};

is $id, 9606;

## easy test on human, try out the main Taxon methods
my $n;
ok $n = $db->get_taxon(9606);
is $n->id, 9606;
is $n->object_id, $n->id;
is $n->ncbi_taxid, $n->id;
is $n->parent_id, 9605;
is $n->rank, 'species';

is $n->node_name, 'Homo sapiens';
is $n->scientific_name, $n->node_name;
is ${$n->name('scientific')}[0], $n->node_name;

my %common_names = map { $_ => 1 } $n->common_names;
is keys %common_names, 3, ref($db).": common names";
ok exists $common_names{human};
ok exists $common_names{man};

is $n->division, 'Primates';
is $n->genetic_code, 1;
is $n->mitochondrial_genetic_code, 2;

# these are entrez-only, data not available in dmp files
#if ($db eq $db_entrez) {
#    ok defined $n->pub_date;
#    ok defined $n->create_date;
#    ok defined $n->update_date;
#}

# briefly test some Bio::Tree::NodeI methods
ok my $ancestor = $n->ancestor;
is $ancestor->scientific_name, 'Homo';
# unless set explicitly, Bio::Taxon doesn't return anything for
# each_Descendent; must ask the database directly
ok my @children = $ancestor->db_handle->each_Descendent($ancestor);
is @children, 1;

#sleep(3) if $db eq $db_entrez;
#
## do some trickier things...
ok my $n2 = $db->get_Taxonomy_Node('89593');
is $n2->scientific_name, 'Craniata';

# briefly check we can use some Tree methods
my $tree = Bio::Tree::Tree->new();
is $tree->get_lca($n, $n2)->scientific_name, 'Craniata';

# get lineage_nodes
my @nodes = $tree->get_nodes;
is scalar(@nodes), 0;
my @lineage_nodes;
@lineage_nodes = $tree->get_lineage_nodes($n->id); # read ID, only works if nodes have been added to tree
is scalar @lineage_nodes, 0;
@lineage_nodes = $tree->get_lineage_nodes($n);     # node object always works
cmp_ok(scalar @lineage_nodes, '>', 20);

# get lineage string
like($tree->get_lineage_string($n), qr/cellular organisms;Eukaryota/);
like($tree->get_lineage_string($n,'-'), qr/cellular organisms-Eukaryota/);
like($tree->get_lineage_string($n2), qr/cellular organisms;Eukaryota/);

# can we actually form a Tree and use other Tree methods?
ok $tree = Bio::Tree::Tree->new(-node => $n);
cmp_ok($tree->number_nodes, '>', 20);
cmp_ok(scalar($tree->get_nodes), '>', 20);
is $tree->find_node(-rank => 'genus')->scientific_name, 'Homo';

# check that getting the ancestor still works now we have explitly set the
# ancestor by making a Tree
is $n->ancestor->scientific_name, 'Homo';

ok $n = $db->get_Taxonomy_Node('1760');
is $n->scientific_name, 'Actinobacteria (class)';

# entrez isn't as good at searching as flatfile, so we have to special-case
my @ids = sort $db->get_taxonids('Chloroflexi');
is scalar @ids, 1;
is_deeply \@ids, [200795];

# lowercase
@ids = sort $db->get_taxonids('chloroflexi');
is scalar @ids, 1;
is_deeply \@ids, [200795];

# fuzzy match using SQL syntax to match any 'Chloroflexi'
@ids = sort $db->get_taxonids('Chloroflexi%');
is scalar @ids, 2;
is_deeply \@ids, [200795, 32061];

$id = $db->get_taxonids('Chloroflexi (class)');
is($id, 32061);

@ids = $db->get_taxonids('Rhodotorula');
is @ids, 8;
@ids = $db->get_taxonids('Rhodotorula <Microbotryomycetidae>');
is @ids, 1;
is $ids[0], 231509;

# get_lca should work on nodes from different databases
SKIP: {
    test_skip(-tests => 9, -requires_networking => 1);

    # check that the result is the same as if we are retrieving from the same DB
    # flatfile
    my $h_flat = $db_flatfile->get_taxon(-name => 'Homo');
    my $h_flat2 = $db_flatfile->get_taxon(-name => 'Homo sapiens');
    ok my $tree_functions = Bio::Tree::Tree->new();
    is $tree_functions->get_lca($h_flat, $h_flat2)->scientific_name, 'Homo', 'get_lca() within flatfile db';

    # entrez
    #my $h_entrez;
    #eval { $h_entrez = $db_entrez->get_taxon(-name => 'Homo sapiens');};
    #skip "Unable to connect to entrez database; no network or server busy?", 7 if $@;
    #my $h_entrez2;
    #eval { $h_entrez2 = $db_entrez->get_taxon(-name => 'Homo');};
    #skip "Unable to connect to entrez database; no network or server busy?", 7 if $@;
    #ok $tree_functions = Bio::Tree::Tree->new();
    #is $tree_functions->get_lca($h_entrez, $h_entrez2)->scientific_name, 'Homo', 'get_lca() within entrez db';

    #ok $tree_functions = Bio::Tree::Tree->new();
    # mixing entrez and flatfile
    #TODO:{
    #    local $TODO = 'Mixing databases for get_lca() not working, see bug #3416';
    #    is $tree_functions->get_lca($h_flat, $h_entrez)->scientific_name, 'Homo', 'get_lca() mixing flatfile and remote db';
    #}
    # even though the species taxa for Homo sapiens from list and flat databases
    # have the same internal id, get_lca won't work because they have different
    # roots and descendents
    #$h_list = $db_list->get_taxon(-name => 'Homo sapiens');
    #is $h_list->ancestor->internal_id, $h_flat->internal_id;
    #ok ! $tree_functions->get_lca($h_flat, $h_list);

    # but we can form a tree with the flat node then remove all the ranks we're
    # not interested in and try again
    #$tree = Bio::Tree::Tree->new(-node => $h_flat);
    #$tree->splice(-keep_rank => \@ranks);
    #is $tree->get_lca($h_flat, $h_list)->scientific_name, 'Homo';
}

# Some tests carried over from flatfile and others that would be nice to pass

# ideas from taxonomy2tree.PLS that let us make nice tree, using
# Bio::Tree::TreeFunctionsI methods; this is a weird and trivial example just
# because our test flatfile database only has the full lineage of one species
undef $tree;
for my $name ('Human', 'Hominidae') {
  my $ncbi_id = $db_flatfile->get_taxonid($name);
  if ($ncbi_id) {
    my $node = $db_flatfile->get_taxon(-taxonid => $ncbi_id);

    if ($tree) {
        ok $tree->merge_lineage($node);
    }
    else {
        ok $tree = Bio::Tree::Tree->new(-node => $node);
    }
  }
}
is $tree->get_nodes, 30;
$tree->contract_linear_paths;
my $ids = join(",", map { $_->id } $tree->get_nodes);
is $ids, '131567,9606';

END {
    unlink 'taxonomy.sqlite' if (-e 'taxonomy.sqlite');
}

done_testing();