The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Bio::Phylo::Parsers::Ubiometa;
use base 'Bio::Phylo::Parsers::Abstract';
use Bio::Phylo::Util::Dependency 'XML::Twig';
use Bio::Phylo::NeXML::Entities '/entities/';
use strict;

=head1 NAME

Bio::Phylo::Parsers::Ubiometa - Parser used by Bio::Phylo::IO, no serviceable
parts inside

=head1 DESCRIPTION

This module parses RDF metadata for uBio namebank records. An example of such a
record is here: L<http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:2481730>

The parser creates a single L<Bio::Phylo::Taxa::Taxon> object to which all
metadata are attached as L<Bio::Phylo::NeXML::Meta> objects. This taxon is
embedded in a taxa block, or optionally in a L<Bio::Phylo::Project> object
if the C<-as_project> flag was provided to the call to C<parse()>.

=cut

# prettify the output, make sure there is a name and description
sub _rss_prettify {
    my ( $self, $taxon ) = @_;
    $taxon->set_name( $taxon->get_meta_object('dc:subject') );
    $taxon->set_desc(
        $taxon->get_meta_object('dc:type')
        . ', Rank: '
        . $taxon->get_meta_object('gla:rank')
        . ', Status: '
        . $taxon->get_meta_object('ubio:lexicalStatus')
    );     
}

# copy the local part of the LSID to the guid field and dc:identifier
sub _copy_identifiers {
    my ( $self, $elt, $obj ) = @_;
    if ( $elt->att('rdf:about') =~ /(\d+)$/ ) {
        my $namebankID = $1;
        $obj->set_guid($namebankID);
        $obj->add_meta( $self->_factory->create_meta(
            '-triple' => { 'dc:identifier' => $namebankID }                    
        ) );
    }    
}

# attach namespaces from element to object
sub _copy_namespaces {
    my ( $self, $elt, $obj ) = @_;
    for my $att_name ( $elt->att_names ) {
        if ( $att_name =~ /xmlns:(\S+)/ ) {
            my $prefix = $1;
            my $ns = $elt->att($att_name);
            $obj->set_namespaces( $prefix => $ns );
        }
    }    
}

sub _parse {
    my $self = shift;
    my $fac  = $self->_factory;
    my $taxa = $fac->create_taxa;
    XML::Twig->new(
        'twig_handlers' => {
            'rdf:RDF' => sub {
                my ( $twig, $elt ) = @_;
                my $taxon = $fac->create_taxon;
                
                # attach namespaces from root element to taxon
                $self->_copy_namespaces($elt,$taxon);
                
                # attach metadata
                my ($child) = $elt->children('rdf:Description');
                for my $meta_elt ( $child->children ) {
                    my $val = encode_entities( $meta_elt->att('rdf:resource') || $meta_elt->text );
                    my $key = $meta_elt->tag;
                    if ( $key eq 'dc:identifier' ) {
                        $key = 'ubio:namebankIdentifier';
                    }
                    $taxon->add_meta( $fac->create_meta(
                        '-triple' => { $key => $val }                    
                    ) );
                }
                
                # parse the rdf:about lsid, use the numerical part as
                # guid and dc:identifier                
                $self->_copy_identifiers($child,$taxon);
                
                # prettify the output, make sure there is a name and description
                $self->_rss_prettify($taxon);
                
                $taxa->insert($taxon);
            }
        }
    )->parse($self->_string);
    return $taxa;
}

# podinherit_insert_token

=head1 SEE ALSO

There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> 
for any user or developer questions and discussions.

=over

=item L<Bio::Phylo::IO>

The uBio metadata parser is called by the L<Bio::Phylo::IO> object.
Look there to learn more about parsing.

=item L<Bio::Phylo::Manual>

Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>

=back

=head1 CITATION

If you use Bio::Phylo in published research, please cite it:

B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
I<BMC Bioinformatics> B<12>:63.
L<http://dx.doi.org/10.1186/1471-2105-12-63>



=cut

1;