The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
use Test::More;

BEGIN {
    eval { require XML::Twig };
    if ( not $ENV{'NEXML_ROOT'} ) {
        plan 'skip_all' => 'env var NEXML_ROOT not set';
    }
    elsif ($@) {
        plan 'skip_all' => 'XML::Twig not installed';
    }
    else {
        Test::More->import('no_plan');
    }
}
use strict;
use warnings;
use Bio::Phylo::IO qw'parse unparse';
use Bio::Phylo::Util::Logger;
use Data::Dumper;
use Bio::Phylo::Factory;

my $fac = Bio::Phylo::Factory->new;
my $XML_PATH = $ENV{'NEXML_ROOT'} . '/examples';

# here we just parse a file with only taxon elements
my $taxa = parse( '-format' => 'nexml', '-file' => "$XML_PATH/taxa.xml" )->[0];

# check the ids for the children
my @ids      = qw(t1 t2 t3 t4 t5);
my @children = @{ $taxa->get_entities };
for my $i ( 0 .. $#children ) {
    ok( $ids[$i] eq $children[$i]->get_xml_id, "$ids[$i]" );
}
ok( unparse( '-format' => 'nexml', '-phylo' => $taxa ), "serialized taxa" );

# here we parse a file with taxon elements and a trees element
my $blocks    = parse( '-format' => 'nexml', '-file' => "$XML_PATH/trees.xml" );
my $forest    = $blocks->[1];
my %internals = map { $_ => 1 } qw(n1 n3 n4 n7);
my %terminals = map { $_ => 1 } qw(n2 n5 n6 n8 n9);
my %parent_of = (
    'n3' => 'n1',
    'n4' => 'n3',
    'n7' => 'n3',
    'n2' => 'n1',
    'n5' => 'n4',
    'n6' => 'n4',
    'n8' => 'n7',
    'n9' => 'n7',
);
my %taxon_of = (
    'n2' => 't1',
    'n5' => 't3',
    'n6' => 't2',
    'n8' => 't5',
    'n9' => 't4',
);
for my $tree ( @{ $forest->get_entities } ) {

    for my $node ( @{ $tree->get_entities } ) {
        my $id = $node->get_name;
        if ( $node->is_internal ) {
            ok( exists $internals{$id}, "$id is an internal node" );
        }
        else {
            ok( exists $terminals{$id}, "$id is a terminal node" );
            ok( $node->get_taxon->get_xml_id eq $taxon_of{$id},
                "taxon if $id is $taxon_of{$id}" );
        }
        if ( my $parent = $node->get_parent ) {
            my $parent_id = $parent->get_name;
            ok( $parent_of{$id} eq $parent_id, "$parent_id is parent of $id" );
        }
    }
}
ok( $forest->first->calc_symdiff( $forest->last ) == 0,
    "identical topologies, symdiff == 0" );
ok( unparse( '-format' => 'nexml', '-phylo' => $forest ), "serialized forest" );

# here we parse a file with two character matrices, one continuous, one standard
$blocks = parse( '-format' => 'nexml', '-file' => "$XML_PATH/characters.xml" );
my $raw_matrices = {
    'CONTINUOUS' => [
        [
            -1.545414144070023,  -2.3905621575431044,
            -2.9610221833467265, 0.7868662069161243,
            0.22968509237534918
        ],
        [
            -1.6259836379710066, 3.649352410850134,
            1.778885099660406,   -1.2580877968480846,
            0.22335354995610862
        ],
        [
            -1.5798979984134964, 2.9548251411133157,
            1.522005675256233,   -0.8642016921755289,
            -0.938129801832388
        ],
        [
            2.7436692306788086, -0.7151148143399818,
            4.592207937774776,  -0.6898841440534845,
            0.5769509574453064
        ],
        [
            3.1060827493657683, -1.0453787389160105,
            2.67416332763427,   -1.4045634106692808,
            0.019890469925520196
        ],
    ],
    'STANDARD' => [ [ 1, 2 ], [ 2, 2 ], [ 3, 4 ], [ 2, 3 ], [ 4, 1 ], ],
    'DNA' => [
        [
            'a', 'c', 'g', 'c', 't', 'c', 'g', 'c', 'a', 't',
            'c', 'g', 'c', 'a', 't', 'c', 'g', 'c', 'g', 'a'
        ],
        [
            'a', 'c', 'g', 'c', 't', 'c', 'g', 'c', 'a', 't',
            'c', 'g', 'c', 'a', 't', 'c', 'g', 'c', 'g', 'a'
        ],
        [
            'a', 'c', 'g', 'c', 't', 'c', 'g', 'c', 'a', 't',
            'c', 'g', 'c', 'a', 't', 'c', 'g', 'c', 'g', 'a'
        ],
    ],
    'RNA' => [
        [
            'a', 'c', 'g', 'c', 'u', 'c', 'g', 'c', 'a', 'u',
            'c', 'g', 'c', 'a', 'u', 'c', 'g', 'c', 'g', 'a'
        ],
        [
            'a', 'c', 'g', 'c', 'u', 'c', 'g', 'c', 'a', 'u',
            'c', 'g', 'c', 'a', 'u', 'c', 'g', 'c', 'g', 'a'
        ],
        [
            'a', 'c', 'g', 'c', 'u', 'c', 'g', 'c', 'a', 'u',
            'c', 'g', 'c', 'a', 'u', 'c', 'g', 'c', 'g', 'a'
        ],
    ]
};
for my $block (@$blocks) {
    if ( $block->isa('Bio::Phylo::Taxa') ) {
        ok( $block, "got taxa block" );
        next;
    }
    my $rows = $block->get_entities;
    my $type = uc $block->get_type;
    for my $i ( 0 .. $#{$rows} ) {
        ok( $rows->[$i]->get_taxon->get_xml_id eq 't' . ( $i + 1 ),
            "found linked taxon" );
        if ( exists $raw_matrices->{$type} ) {
            my @chars = $rows->[$i]->get_char;
            for my $j ( 0 .. $#chars ) {
                my $value_in_matrix_object = $chars[$j];
                my $value_in_raw_array     = $raw_matrices->{$type}->[$i]->[$j];
                if ( ( $type eq 'CONTINUOUS' ) || ( $type eq 'STANDARD' ) ) {
                    ok(
                        $value_in_matrix_object == $value_in_raw_array,
"value in numerical cell (obj: $value_in_matrix_object raw: $value_in_raw_array)"
                    );
                }
                else {
                    ok(
                        lc($value_in_matrix_object) eq lc($value_in_raw_array),
"value in symbol cell (obj: $value_in_matrix_object raw: $value_in_raw_array)"
                    );
                }
            }
        }
    }
    ok( unparse( '-format' => 'nexml', '-phylo' => $block ),
        "serialized $block" );
}
{
    my $matrix = Bio::Phylo::Matrices::Matrix->new(
        '-type'   => 'dna',
        '-matrix' => [ [qw'taxon1 A C G T C G'], [qw'taxon2 A C G T C G'], ],
        '-charlabels' => [qw'c1 c2 c3 c4 c5 c6']
    );

    # testing character annotations
    my $char = $matrix->get_characters->get_entities->[0];
    $char->add_meta(
        $fac->create_meta(
            '-namespaces' =>
              { 'dcterms' => 'http://purl.org/dc/elements/1.1/' },
            '-triple' =>
              { 'dcterms:description' => 'this is a character annotation' }
        )
    );
    my ($meta) = @{ $char->get_meta };
    ok( $meta->get_object    eq 'this is a character annotation' );
    ok( $meta->get_predicate eq 'dcterms:description' );

    # testing annotation for unambiguous state
    my $to = $matrix->get_type_object;
    $to->add_meta_for_state(
        $fac->create_meta(
            '-namespaces' => { 'dc' => 'http://purl.org/dc/terms/description' },
            '-triple' => { 'dc:description' => 'this is an unambiguous state' }
        ),
        'A'
    );
    my ($umeta) = @{ $to->get_meta_for_state('A') };
    ok( $umeta->get_object    eq 'this is an unambiguous state' );
    ok( $umeta->get_predicate eq 'dc:description' );

    # testing annotation for ambiguous state
    $matrix->get_type_object->add_meta_for_state(
        $fac->create_meta(
            '-namespaces' => { 'dc' => 'http://purl.org/dc/terms/description' },
            '-triple' => { 'dc:description' => 'this is an ambiguous state' }
        ),
        'X'
    );
    my ($ameta) = @{ $to->get_meta_for_state('X') };
    ok( $ameta->get_object    eq 'this is an ambiguous state' );
    ok( $ameta->get_predicate eq 'dc:description' );
}