The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package PomBase::Chobo::OntologyData;

=head1 NAME

PomBase::Chobo::OntologyData - An in memory representation of an Ontology

=head1 SYNOPSIS

Objects of this class represent the part of an ontology that can be stored in
a Chado database.

=head1 AUTHOR

Kim Rutherford C<< <kmr44@cam.ac.uk> >>

=head1 BUGS

Please report any bugs or feature requests to C<kmr44@cam.ac.uk>.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc PomBase::Chobo::OntologyData

=over 4

=back

=head1 COPYRIGHT & LICENSE

Copyright 2012 Kim Rutherford, all rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=head1 FUNCTIONS

=cut

our $VERSION = '0.016'; # VERSION

use Mouse;

use Clone qw(clone);
use Try::Tiny;
use Carp;
use List::Util qw(uniq);

use PomBase::Chobo::OntologyTerm;


has terms_by_id => (is => 'rw', init_arg => undef, isa => 'HashRef',
                    default => sub { {} });
has terms_by_name => (is => 'rw', init_arg => undef, isa => 'HashRef',
                      default => sub { {} });
has terms_by_cv_name => (is => 'rw', init_arg => undef, isa => 'HashRef',
                         default => sub { {} });
has relationship_terms_by_cv_name => (is => 'rw', init_arg => undef, isa => 'HashRef',
                                 default => sub { {} });
has terms_by_db_name => (is => 'rw', init_arg => undef, isa => 'HashRef',
                         default => sub { {} });
has metadata_by_namespace => (is => 'rw', init_arg => undef, isa => 'HashRef',
                              default => sub { {} });
has _term_relationships => (is => 'rw', init_arg => undef, isa => 'HashRef',
                           default => sub { {} });

=head2 add

 Usage   : $ontology_data->add(metadata => {..}, terms => [...]);
 Function: Add some terms, often all terms from one OBO file
 Args    : metadata - the metadata for the terms
           terms - an array of OntologyTerm objects
 Return  : Nothing, dies on error

=cut

sub add
{
  my $self = shift;

  my %args = @_;

  my $metadata = $args{metadata};
  my $terms = $args{terms};

  my $terms_by_id = $self->terms_by_id();
  my $terms_by_name = $self->terms_by_name();
  my $terms_by_cv_name = $self->terms_by_cv_name();
  my $relationship_terms_by_cv_name = $self->relationship_terms_by_cv_name();

  my $metadata_by_namespace = $self->metadata_by_namespace();

  for my $term (@$terms) {
    my @new_term_ids = ($term->{id});

    push @new_term_ids, map { $_->{id}; } $term->alt_ids();

    my @found_existing_terms = ();

    for my $id (@new_term_ids) {
      my $existing_term = $terms_by_id->{$id};

      if (defined $existing_term) {
        if (!grep { $_ == $existing_term } @found_existing_terms) {
          push @found_existing_terms, $existing_term;
        }
      }
    }

    if (@found_existing_terms > 1) {
      die "two previously read terms match an alt_id field from:\n" .
        $term->to_string() . "\n\nmatching term 1:\n" .
        $found_existing_terms[0]->to_string() . "\n\nmatching term 2:\n" .
        $found_existing_terms[1]->to_string() . "\n";
    } else {
      if (@found_existing_terms == 1) {
        my $existing_term = $found_existing_terms[0];

        my $old_namespace = $existing_term->namespace();

        $existing_term->merge($term);

        if ($old_namespace ne $existing_term->namespace()) {
          delete $self->terms_by_cv_name()->{$old_namespace}->{$existing_term->name()};
        }

        $term = $existing_term;
      }
    }

    for my $id_details ($term->alt_ids(),
                        { id => $term->{id},
                          db_name => $term->{db_name},
                          accession => $term->{accession},
                         } ) {
      $terms_by_id->{$id_details->{id}} = $term;

      $self->terms_by_db_name()->{$id_details->{db_name}}->{$id_details->{accession}} = $term;

      my $def = $term->def();

      map {
        my $def_dbxref = $_;
        if ($def_dbxref =~ /^(.+?):(.*)/) {
          my ($def_db_name, $def_accession) = ($1, $2);
          $self->terms_by_db_name()->{$def_db_name}->{$def_accession} = $term;
        } else {
          die qq(can't parse dbxref from "def:" line: $def_dbxref);
        }
      } @{$def->{dbxrefs}};
    }

    my $name = $term->{name};

    if (defined $name) {
      if (!exists $terms_by_name->{$name} ||
          !grep { $_ == $term } @{$terms_by_name->{$name}}) {
        push @{$terms_by_name->{$name}}, $term;
      }
    } else {
      warn "term without a name tag ignored:\n", $term->to_string(), "\n\n";
      next;
    }

    my $term_namespace = $term->namespace();

    if (defined $term_namespace) {
      my $existing_term_by_name = $terms_by_cv_name->{$term_namespace}->{$name};
      if ($existing_term_by_name && $existing_term_by_name != $term) {
        warn qq(more than one Term with the name "$name" in namespace "$term_namespace" -\n) .
          "existing:\n" . $term->to_string() . "\n\nand:\n" .
          $terms_by_cv_name->{$term_namespace}->{$name}->to_string() . "\n\n";
      } else {
        $terms_by_cv_name->{$term_namespace}->{$name} = $term;
      }

      if ($term->{is_relationshiptype}) {
        $relationship_terms_by_cv_name->{$term_namespace}->{$name} = $term;
      }

      if (!exists $metadata_by_namespace->{$term_namespace}) {
        $metadata_by_namespace->{$term_namespace} = clone $metadata;
      }
    }

    if ($term->{relationship}) {
      for my $rel (@{$term->{relationship}}) {
        my $key = $term->{id} . '<' . $rel->{relationship_name} .
          '>' . $rel->{other_term};
        $self->_term_relationships()->{$key} = 1;
      }
    }
  }
}

sub get_terms_by_name
{
  my $self = shift;
  my $name = shift;

  return @{$self->terms_by_name()->{$name} // []};
}

sub get_term_by_id
{
  my $self = shift;
  my $id = shift;

  return $self->terms_by_id()->{$id};
}

sub get_cv_names
{
  my $self = shift;

  return keys %{$self->terms_by_cv_name()};
}

sub get_terms_by_cv_name
{
  my $self = shift;
  my $cv_name = shift;

  return values %{$self->terms_by_cv_name()->{$cv_name}};
}

sub get_db_names
{
  my $self = shift;

  return keys %{$self->terms_by_db_name()};
}

sub accessions_by_db_name
{
  my $self = shift;
  my $db_name = shift;

  return sort keys %{$self->terms_by_db_name()->{$db_name}};
}

sub get_terms
{
  my $self = shift;

  return map { $self->get_terms_by_cv_name($_); } $self->get_cv_names();
}

sub get_namespaces
{
  my $self = shift;

  return keys %{$self->metadata_by_namespace()};
}

sub get_metadata_by_namespace
{
  my $self = shift;
  my $namespace = shift;

  return $self->metadata_by_namespace()->{$namespace};
}

sub relationships
{
  my $self = shift;

  if ($self->{_relationships}) {
    return @{$self->{_relationships}}
  }

  $self->{_relationships} = [map {
    my ($subject_id, $rel_name, $object_id) = /(.*)<(.*)>(.*)/;

    my $object_term = $self->get_term_by_id($object_id);

    if (!$object_term) {
      my $subject_term = $self->get_term_by_id($subject_id);
      warn qq(ignoring relation where object isn't defined: "$object_id" line ) .
        $subject_term->{source_file_line_number} . ' of ' .
        $subject_term->{source_file} . "\n";
      ();
    } else {
      [$subject_id, $rel_name, $object_id];
    }
  } sort keys %{$self->_term_relationships()}];

  return @{$self->{_relationships}};
}

=head2 finish

 Usage   : $self->finish();
 Function: remove namespaces that are empty due to merging and check that
           objects and subjects of relationships exist

=cut

sub finish
{
  my $self = shift;

  my @relationships = $self->relationships();

  if (@relationships == 0) {
    warn "note: no relationships read\n";
  }

  # find and remove namespaces that are empty due to merging
  my @empty_namespaces =
    map {
      if (scalar(keys %{$self->terms_by_cv_name()->{$_}}) == 0) {
        $_;
      } else {
        ();
      }
    } keys %{$self->terms_by_cv_name()};

  map {
    delete $self->terms_by_cv_name()->{$_};
  } @empty_namespaces;
}

1;