#!/usr/local/bin/perl -w
use lib '.';
use constant NUMTESTS => 12*3;
BEGIN {
eval { require Test; };
use Test;
plan tests => NUMTESTS;
}
# All tests must be run from the software directory;
# make sure we are getting the modules from here:
use strict;
use GO::Parser;
eval {
require "XML/Parser/PerlSAX.pm";
};
if ($@) {
for (1..NUMTESTS) {
skip("XML::Parser::PerlSAX not installed",1);
}
exit 0;
}
# ----- REQUIREMENTS -----
# make sure synonums etc are parsed
# ------------------------
my $f = shift @ARGV || "./t/data/go-truncated.obo";
check_file($f);
$f = cvt($f, 'obo_text', 'obo_xml');
check_file($f);
$f = cvt($f, 'obo_xml', 'obo_text');
check_file($f);
exit 0;
sub check_file {
my $f = shift;
my $parser = new GO::Parser ({ # format=>'obo_text',
handler=>'obj'});
$parser->handler->add_root;
ok(1);
print "Parsing: $f\n";
$parser->parse ($f);
ok(1);
my $graph = $parser->handler->g;
my $terms = $graph->find_roots;
foreach my $term (@$terms) {
printf "ROOT: %s\n", $term->name;
}
$terms = $graph->get_all_nodes;
my $t = 0;
my $t2 = 0;
my $n_obs = 0;
my $n_syns = 0;
my $n_exact_syns = 0;
my $n_alt_ids = 0;
my $n_xrefs = 0;
my $n_def_xrefs = 0;
my $n_defs = 0;
my $n_comments = 0;
printf "TERMS: %d\n", scalar(@$terms);
#ok(@$terms == 97);
foreach my $term (@$terms) {
my $syns = $term->synonym_list || [];
if (@$syns) {
printf "SYNS:%s\n", join('|',@$syns);
$n_syns += @$syns;
}
my $exact_syns = $term->synonyms_by_type('exact') || [];
if (@$exact_syns) {
printf "EXACT SYNS:%s\n", join('|',@$exact_syns);
$n_exact_syns += @$exact_syns;
}
$n_alt_ids += @{$term->alt_id_list};
my $xrefs = $term->dbxref_list || [];
if (@$xrefs) {
printf "XREFS:%s\n", join('|',map {$_->as_str} @$xrefs);
$n_xrefs += @$xrefs;
}
my $comment = $term->comment;
if ($comment) {
printf "COMMENT:%s\n", $comment;
$n_comments++;
}
my $def = $term->definition;
if ($def) {
my $xrefs = $term->definition_dbxref_list;
printf "DEFXREFS:%s\n", join('|', map {$_->as_str} @$xrefs);
$n_def_xrefs += @$xrefs;
$n_defs++;
}
$n_obs ++ if $term->is_obsolete;
my $rels = $graph->get_relationships($term->acc);
$t2 += @$rels;
$t+= @{$graph->get_parent_relationships($term->acc)};
foreach my $rel (@$rels) {
printf "EDGE|%s|%s|%s\n",
$rel->subject_acc,
$rel->object_acc,
$rel->type;
}
}
printf "total defs:%s\n", $n_defs;
printf "total def xrefs:%s\n", $n_def_xrefs;
printf "total xrefs:%s\n", $n_xrefs;
printf "total comments:%s\n", $n_comments;
printf "total syns:%s\n", $n_syns;
printf "total EXACT syns:%s\n", $n_exact_syns;
printf "total obs:%s\n", $n_obs;
printf "total parent rels:%s\n", $t;
printf "total (both ways):%s\n", $t2;
ok($n_defs, 27);
ok($n_xrefs, 2);
ok($n_def_xrefs, 42);
ok($n_comments, 2);
ok($n_syns, 12);
ok($n_exact_syns, 1);
ok($n_obs, 1);
ok($n_alt_ids, 1);
ok($t, 37);
ok($t2, 64); # trailing rels counted only once
}
sub cvt {
my $f = shift;
my ($from, $to) = @_;
print "$f from:$from to:$to\n";
my $parser = new GO::Parser ({format=>$from,
handler=>$to});
my $outf = "$f.$to";
unlink $outf if -f $outf;
$parser->handler->file($outf);
$parser->parse($f);
return $outf;
}