The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Copyright 2008-2010 Tim Rayner
# 
# This file is part of Bio::MAGETAB.
# 
# Bio::MAGETAB is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 
# Bio::MAGETAB is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with Bio::MAGETAB.  If not, see <http://www.gnu.org/licenses/>.
#
# $Id: ADF.pm 340 2010-07-23 13:19:27Z tfrayner $

package Bio::MAGETAB::Util::Writer::ADF;

use Moose;
use MooseX::FollowPBP;

use Carp;

use MooseX::Types::Moose qw( Bool );

BEGIN { extends 'Bio::MAGETAB::Util::Writer::Tabfile' };

has 'magetab_object'       => ( is         => 'ro',
                                isa        => 'Bio::MAGETAB::ArrayDesign',
                                required   => 1 );

has '_cached_mapping_flag' => ( is         => 'rw',
                                isa        => Bool,
                                predicate  => '_has_cached_mapping_flag',
                                required   => 0 );

sub _write_header {

    my ( $self ) = @_;

    my $array = $self->get_magetab_object();

    # Term Sources are a bit ugly, because they're normally attached
    # to Investigation. We currently cheat and go via any Bio::MAGETAB
    # container that's available (this means that *all* in-memory term
    # sources are dumped into the ADF):
    my ( @termsources, $num_cols );
    if ( my $magetab = $array->get_ClassContainer() ) {
        @termsources = $magetab->get_termSources();
        if ( my $num_ts = scalar @termsources ) {
            $num_cols = $num_ts + 1;
        }
    }

    # Just two columns is standard for the header section if there are
    # no Term Sources; main and mapping sections will differ (FIXME
    # check this against the spec; is this valid?).
    $num_cols ||= 2;
    $self->set_num_columns( $num_cols );
    $self->_write_line( '[header]' );

    my %single = (
        'Array Design Name'   => 'name',
        'Version'             => 'version',
        'Provider'            => 'provider',
        'Printing Protocol'   => 'printingProtocol',
    );

    # Single elements are straightforward.
    while ( my ( $field, $value ) = each %single ) {
        my $getter = "get_$value";
        $self->_write_line( $field, $array->$getter );
    }

    # Elements pointing to objects need a bit more work.
    my %multi = (
        
        'technologyType' => [
            sub { return ( [ 'Technology Type',
                             map { $_->get_value()     } @_ ] ) },
            sub { return ( [ 'Technology Type Term Accession Number',
                             map { $self->_get_thing_accession($_) } @_ ] ) },
            sub { return ( [ 'Technology Type Term Source REF',
                             map { $self->_get_type_termsource_name($_) } @_ ] ) },
        ],
        'surfaceType' => [
            sub { return ( [ 'Surface Type',
                             map { $_->get_value()     } @_ ] ) },
            sub { return ( [ 'Surface Type Term Accession Number',
                             map { $self->_get_thing_accession($_) } @_ ] ) },
            sub { return ( [ 'Surface Type Term Source REF',
                             map { $self->_get_type_termsource_name($_) } @_ ] ) },
        ],
        'substrateType' => [
            sub { return ( [ 'Substrate Type',
                             map { $_->get_value()     } @_ ] ) },
            sub { return ( [ 'Substrate Type Term Accession Number',
                             map { $self->_get_thing_accession($_) } @_ ] ) },
            sub { return ( [ 'Substrate Type Term Source REF',
                             map { $self->_get_type_termsource_name($_) } @_ ] ) },
        ],
        'sequencePolymerType' => [
            sub { return ( [ 'Sequence Polymer Type',
                             map { $_->get_value()     } @_ ] ) },
            sub { return ( [ 'Sequence Polymer Type Term Accession Number',
                             map { $self->_get_thing_accession($_) } @_ ] ) },
            sub { return ( [ 'Sequence Polymer Type Term Source REF',
                             map { $self->_get_type_termsource_name($_) } @_ ] ) },
        ],
    );

    # All the complicated stuff gets handled by the dispatch methods
    # in %multi.
    ATTR:
    while ( my ( $field, $subs ) = each %multi ) {
        my $getter = "get_$field";
        my @attrs = $array->$getter;
        next ATTR if ( scalar @attrs == 1 && ! defined $attrs[0] );
        foreach my $sub ( @$subs ) {
            foreach my $lineref ( $sub->( @attrs ) ) {

                # Don't write the line if there's nothing to write but the tag.
                if ( scalar grep { defined $_ && $_ ne q{} } @{ $lineref }[1..$#$lineref] ) {
                    $self->_write_line( @{ $lineref } );
                }
            }
        }
    }

    # Dump out our Term Source info.
    if ( scalar @termsources ) {
        $self->_write_line( 'Term Source Name',
                            map { $_->get_name() } @termsources );
        $self->_write_line( 'Term Source Version',
                            map { $_->get_version() } @termsources );
        $self->_write_line( 'Term Source File',
                            map { $_->get_uri() } @termsources );
    }

    # Attach all comments to the ArrayDesign.
    foreach my $comment ( $array->get_comments() ) {
        my $field = sprintf("Comment[%s]", $comment->get_name());
        $self->_write_line( $field, $comment->get_value() );
    }

    return;
}

sub _get_reporter_tag_lists {

    my ( $self ) = @_;

    my $array = $self->get_magetab_object();

    my @reporters = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::Reporter' ) }
                       $array->get_designElements();

    my (%db_name, %group_name);
    foreach my $rep ( @reporters ) {
        foreach my $db_entry ( $rep->get_databaseEntries() ) {
            my $ts = $db_entry->get_termSource();
            $db_name{ $ts->get_name() }++ if $ts;
        }
        foreach my $group ( $rep->get_groups() ) {
            $group_name{ $group->get_category() }++;
        }
    }
    my @dbs    = sort keys %db_name;
    my @groups = sort keys %group_name;

    return \@dbs, \@groups;
}

sub _get_composite_tag_lists {

    my ( $self ) = @_;

    my $array = $self->get_magetab_object();

    my @composites = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::CompositeElement' ) }
                       $array->get_designElements();

    my %db_name;
    foreach my $elem ( @composites ) {
        foreach my $db_entry ( $elem->get_databaseEntries() ) {
            my $ts = $db_entry->get_termSource();
            $db_name{ $ts->get_name() }++ if $ts;
        }
    }
    my @dbs = sort keys %db_name;

    return \@dbs;
}

sub _generate_main_header_line {

    my ( $self, $reporter_dbs, $groups, $composite_dbs ) = @_;

    my @header = (
        'Block Column',
        'Block Row',
        'Column',
        'Row',
        'Reporter Name',
        'Reporter Sequence',
        ( map { "Reporter Database Entry [$_]" } @$reporter_dbs    ),
        ( map { "Reporter Group [$_]" }          @$groups ),
    );
    if ( scalar @$groups ) {
        push @header, 'Reporter Group Term Source REF';

        if ( $self->get_export_version ne '1.0' ) {
            push @header, 'Reporter Group Term Accession Number';
        }
    }
    push @header, (
        'Control Type',
        'Control Type Term Source REF',
    );
    if ( $self->get_export_version ne '1.0' ) {
        push @header, 'Control Type Term Accession Number';
    }

    # CompositeElement.
    unless ( $self->_must_generate_mapping() ) {
        push @header,
            'Composite Element Name',
            ( map { "Composite Element Database Entry [$_]" } @$composite_dbs ),
            'Composite Element Comment';
    }

    return \@header;
}

sub _get_feature_coords {

    my ( $self, $feature ) = @_;

    my @coords = map { $feature->$_ }
        qw( get_blockCol get_blockRow get_col get_row );

    return @coords;
}

sub _get_element_dbentries {

    my ( $self, $element, $dbs ) = @_;

    my @accessions;

    my %accession = map {
        my $ts = $_->get_termSource();
        ( $ts ? $ts->get_name() : q{} ) => $_->get_accession();
    } $element->get_databaseEntries();
    foreach my $db ( @$dbs ) {
        my $acc = $accession{ $db };
        push @accessions, ( defined $acc ? $acc : q{} );
    }

    return @accessions;
}

sub _get_reporter_groups {

    my ( $self, $reporter, $groups ) = @_;

    my @groups;
    my %group = map {
        $_->get_category() => $_->get_value()
    } $reporter->get_groups();
    foreach my $name ( @$groups ) {
        my $gr = $group{ $name };
        push @groups, ( defined $gr ? $gr : q{} );
    }

    return @groups;
}

sub _get_reporter_group_source {

    my ( $self, $reporter, $groups ) = @_;

    my @sources;

    # Group Term Source and Accession, where needed.
    if ( scalar @$groups ) {
        my @rep_groups = $reporter->get_groups();
        if ( scalar @rep_groups > 1 ) {
            carp(qq{Warning: Multiple Reporter Group Term Sources/Accessions not supported. }
               . qq{ADF output only contains these values for "}
               . $rep_groups[0]->get_category() . qq{"\n})
        }
        push @sources, $self->_get_type_termsource_name( $rep_groups[0] );

        if ( $self->get_export_version() ne '1.0' ) {
            my $acc = $rep_groups[0]->get_accession();
            push @sources, ( defined $acc ? $acc : q{} );
        }
    }

    return @sources;
}

sub _get_reporter_control_type {

    my ( $self, $reporter ) = @_;

    my @typeinfo;
    if ( my $ctype = $reporter->get_controlType() ) {
        push @typeinfo, $ctype->get_value();
        push @typeinfo, $self->_get_type_termsource_name( $ctype );

        if ( $self->get_export_version() ne '1.0' ) {
            my $acc = $ctype->get_accession();
            push @typeinfo, ( defined $acc ? $acc : q{} );
        }
    }
    else {
        push @typeinfo, (q{}) x 2;
        if ( $self->get_export_version() ne '1.0' ) {
            push @typeinfo, q{};
        }
    }

    return @typeinfo;
}

sub _generate_reporter_data {

    my ( $self, $reporter, $dbs, $groups ) = @_;

    my @data;
    push @data, $reporter->get_name(), $reporter->get_sequence();

    # Get the database entries, in order.
    push @data, $self->_get_element_dbentries( $reporter, $dbs );

    # Get the group names, in order.
    push @data, $self->_get_reporter_groups(       $reporter, $groups );
    push @data, $self->_get_reporter_group_source( $reporter, $groups );

    # Control Type.
    push @data, $self->_get_reporter_control_type( $reporter );

    return @data;
}

sub _generate_composite_data {

    my ( $self, $composite, $dbs ) = @_;

    my @data = $composite->get_name();

    # Get the database entries, in order.
    push @data, $self->_get_element_dbentries( $composite, $dbs );

    if ( my $comm = $composite->get_comment() ) {
        push @data, $comm->get_value();
    }
    else {
        push @data, q{};
    }

    return @data;
}

sub _must_generate_mapping {

    my ( $self ) = @_;

    unless ( $self->_has_cached_mapping_flag() ) {

        # Check all reporters; if any map to more than one CE, we need
        # a mapping section. The result is cached so we only check
        # this once.
        my $array = $self->get_magetab_object();
        my @reporters = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::Reporter' ) }
            $array->get_designElements();

        REPORTER:
        foreach my $rep ( @reporters ) {
            if ( scalar @{ [ $rep->get_compositeElements() ] } > 1 ) {
                $self->_set_cached_mapping_flag(1);
                last REPORTER;
            }
        }

        $self->_set_cached_mapping_flag(0)
            unless $self->_has_cached_mapping_flag();
    }

    return $self->_get_cached_mapping_flag();
}

sub _write_main {

    my ( $self ) = @_;

    my $array = $self->get_magetab_object();

    # Figure out which databases are represented.
    my ( $reporter_dbs, $groups ) = $self->_get_reporter_tag_lists();
    my $composite_dbs             = $self->_get_composite_tag_lists();

    # FIXME beware memory issues here; consider creating an iterator
    # to access some of these objects? This would probably need to be
    # in the actual Bio::MAGETAB model, possibly with a file- or
    # db-based backend.
    my @features = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::Feature' ) }
                       $array->get_designElements();
    
    # Print out the column headings.
    my $header = $self->_generate_main_header_line( $reporter_dbs,
                                                    $groups,
                                                    $composite_dbs );
    $self->set_num_columns( scalar @$header );
    $self->_write_line( '[main]' );
    $self->_write_line( @$header );

    # Loop through all the features, writing out the info.
    foreach my $feature ( @features ) {

        # Sort out the basics;
        my @line = $self->_get_feature_coords( $feature );

        # Simple reporter info.
        my $reporter = $feature->get_reporter();
        push @line, $self->_generate_reporter_data( $reporter, $reporter_dbs, $groups );

        unless ( $self->_must_generate_mapping() ) {

            # There will be only one (or zero) CompositeElements in
            # such cases.
            my $composite = $reporter->get_compositeElements();
            push @line, $self->_generate_composite_data( $composite, $composite_dbs ) if $composite;
        }

        # Write out the line.
        $self->_write_line( @line );
    }

    # These may be needed for the mapping section.
    return $composite_dbs;
}

sub _write_mapping {

    my ( $self, $dbs ) = @_;

    my $array = $self->get_magetab_object();
    my @header = (
        'Composite Element Name',
        'Map2Reporters',
        ( map { "Composite Element Database Entry [$_]" } @$dbs ),
        'Composite Element Comment',
    );

    # Print out the column headings.
    $self->set_num_columns( scalar @header );
    $self->_write_line( '[mapping]' );
    $self->_write_line( @header );

    # Build a compositeElement to reporter mapping.
    my @reporters = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::Reporter' ) }
                       $array->get_designElements();
    my %map2reporters;
    foreach my $rep ( @reporters ) {
        foreach my $comp ( $rep->get_compositeElements() ) {
            push @{ $map2reporters{ $comp->get_name() } }, $rep->get_name();
        }
    }

    # Build our mapping lines and write them out.
    my @compelems = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::CompositeElement' ) }
                       $array->get_designElements();
    foreach my $element ( @compelems ) {
        my $name = $element->get_name();
        my @line = (
            $name,
            join(';', @{ $map2reporters{ $name } } ),
            $self->_get_element_dbentries( $element, $dbs ),
            join('; ', map { $_->get_value() } $element->get_comments()),
        );
        $self->_write_line( @line );
    }

    return;
}

sub write {

    my ( $self ) = @_;

    # First, the header section.
    $self->_write_header();

    $self->_write_line( q{} );    # spacer line

    # The main body of the ADF.
    my $comp_dbs = $self->_write_main();

    $self->_write_line( q{} );    # spacer line

    # Where necessary, the ADF mapping section.
    if ( $self->_must_generate_mapping() ) {
        $self->_write_mapping( $comp_dbs );
    }

    return;
}

# Make the classes immutable. In theory this speeds up object
# instantiation for a small compilation time cost.
__PACKAGE__->meta->make_immutable();

no Moose;

=head1 NAME

Bio::MAGETAB::Util::Writer::ADF - Export of MAGE-TAB ArrayDesign
objects.

=head1 SYNOPSIS

 use Bio::MAGETAB::Util::Writer::ADF;
 my $writer = Bio::MAGETAB::Util::Writer::ADF->new({
    magetab_object => $array_design,
    filehandle     => $adf_fh,
 });
 
 $writer->write();

=head1 DESCRIPTION

Export of ArrayDesigns to ADF files.

=head1 ATTRIBUTES

See the L<Tabfile|Bio::MAGETAB::Util::Writer::Tabfile> class for superclass attributes.

=over 2

=item magetab_object

The Bio::MAGETAB::ArrayDesign to export. This is a required
attribute.

=back

=head1 METHODS

=over 2

=item write

Exports the ArrayDesign to ADF.

=back

=head1 SEE ALSO

L<Bio::MAGETAB::Util::Writer>
L<Bio::MAGETAB::Util::Writer::Tabfile>

=head1 AUTHOR

Tim F. Rayner <tfrayner@gmail.com>

=head1 LICENSE

This library is released under version 2 of the GNU General Public
License (GPL).

=cut

1;