The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
#
# Copyright 2008-2010 Tim Rayner
# 
# This file is part of Bio::MAGETAB.
# 
# Bio::MAGETAB is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 
# Bio::MAGETAB is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with Bio::MAGETAB.  If not, see <http://www.gnu.org/licenses/>.
#
# $Id: 013_sdrf.t 384 2014-04-04 16:15:47Z tfrayner $

use strict;
use warnings;

use Test::More qw(no_plan);
use Test::Exception;
use File::Temp qw(tempfile);

use lib 't/testlib';
use CommonTests qw( test_parse check_term );

BEGIN {
    use_ok( 'Bio::MAGETAB::Util::Reader::SDRF' );
    use_ok( 'Bio::MAGETAB::Util::Writer::SDRF' );
}

sub add_dummy_objects {

    my ( $builder ) = @_;

    foreach my $tsname ( 'NCI META', 'MO', 'ArrayExpress' ) {
        $builder->find_or_create_term_source({ name => $tsname });
    }
    foreach my $adname ( 'A-TEST-1' ) {
        $builder->find_or_create_array_design({ name => $adname });
    }
    foreach my $efname ( 'EF1', 'EF2' ) {
        $builder->find_or_create_factor({ name => $efname });
    }
    my %proto = (
        'EXTPRTCL10654'  => 'Extracted Product',
        'TRANPRTCL10656' => '',
    );
    while ( my ( $protoname, $param ) = each %proto ) {
        my $p = $builder->find_or_create_protocol({ name => $protoname });
        $builder->find_or_create_protocol_parameter({ name => $param, protocol => $p }) if $param;
    }

    return;
}

my $sdrf_reader;

# Instantiate with none of the required attributes.
dies_ok( sub{ $sdrf_reader = Bio::MAGETAB::Util::Reader::SDRF->new() },
         'instantiation without attributes' );

# Populate our temporary test SDRF file.
my ( $fh, $filename ) = tempfile( UNLINK => 1 );
while ( my $line = <DATA> ) {
    print $fh $line;
}

# Close the filehandle, since we'll be using the filename only.
close( $fh ) or die("Error closing filehandle: $!");

# Test parsing.
lives_ok( sub{ $sdrf_reader = Bio::MAGETAB::Util::Reader::SDRF->new( uri => $filename ) },
          'instantiation with uri attribute' );
my $builder;
lives_ok( sub { $builder = $sdrf_reader->get_builder(); }, 'SDRF parser returns a Builder object' );
is( ref $builder, 'Bio::MAGETAB::Util::Builder', 'of the correct class' );

add_dummy_objects( $builder );

my $sdrf;
lives_ok( sub { $sdrf = test_parse( $sdrf_reader ) }, 'parsing completes without exceptions' );

# Check that FV->measurement creation is behaving more or less correctly.
my @fvs = grep { $_->get_factor()->get_name() eq 'EF2' } $sdrf->get_ClassContainer->get_factorValues();
is( scalar @fvs, 3, 'recognising the correct number of Measurement FVs');

my @fv_vals = sort map { int($_->get_measurement()->get_value()) } @fvs;
is_deeply(\@fv_vals, [0, 10, 10],  'with the correct values');

my @fv_units = sort map { $_->get_measurement()->get_unit()->get_value() } @fvs;
is_deeply(\@fv_units, ['mM', 'mg_per_mL', 'mg_per_mL'],  'and the correct units');

# Test parsing into a supplied magetab_object.
use Bio::MAGETAB::SDRF;
my $sdrf2 = Bio::MAGETAB::SDRF->new( uri => $filename );

lives_ok( sub{ $sdrf_reader = Bio::MAGETAB::Util::Reader::SDRF->new( uri            => $filename,
                                                                     magetab_object => $sdrf2, ) },
          'parser instantiates with uri and magetab_object attributes' );

# Brief test of the export code; this is nowhere near as thorough as it should be FIXME.
( $fh, $filename ) = tempfile( UNLINK => 1 );
my $sdrf_writer;

dies_ok( sub{ $sdrf_writer = Bio::MAGETAB::Util::Writer::SDRF->new( filehandle     => $fh,
                                                                    magetab_object => $sdrf,
                                                                    export_version => '1.2' ) },
         'writer fails to instantiate with an invalid export version' );

foreach my $version ( qw( 1.0 1.1 ) ) {
    lives_ok( sub{ $sdrf_writer = Bio::MAGETAB::Util::Writer::SDRF->new( filehandle     => $fh,
                                                                         magetab_object => $sdrf,
                                                                         export_version => $version ) },
              "writer instantiates with export version $version" );
}

lives_ok( sub{ $sdrf_writer->write() }, 'writer outputs SDRF data without crashing' );


#########
# These tests take a long time to run and don't really contribute much.
#add_dummy_objects( $sdrf_reader->get_builder() );
#test_parse( $sdrf_reader );

# These two sets of parse results really ought to look identical.
#is_deeply( $sdrf, $sdrf2, 'SDRF objects agree' );
#########

# FIXME (IMPORTANT!) check the output against what we expect!

# FIXME test with bad SDRF input (unrecognized headers etc.)

# Check that assay comments are being processed correctly.
( $fh, $filename ) = tempfile( UNLINK => 1 );
print $fh join("\t", ('Assay Name', 'Comment[com3]','Technology Type', 'Comment[com1]', 'Comment[com2]')) . "\n";
print $fh join("\t", ('Assay 1', 'com text 3','high throughput seq', 'com text 1', 'com text 2')) . "\n";
close( $fh ) or die("Error closing filehandle: $!");

$sdrf_reader = Bio::MAGETAB::Util::Reader::SDRF->new( uri => $filename );
$sdrf = test_parse( $sdrf_reader );

my $tt = join(";", map { $_->get_value }
                  $sdrf_reader->get_builder()->get_assay({ name => 'Assay 1' })->get_comments());
is( $tt, 'com text 3;com text 1;com text 2', 'Assay comments parsing correctly');

# Check that Array Design REF can be attached to Assay Name.
( $fh, $filename ) = tempfile( UNLINK => 1 );
print $fh join("\t", ('Assay Name', 'Array Design REF','Comment[com1]','Technology Type')) . "\n";
print $fh join("\t", ('Assay 1', 'Design 1','Comment 1', 'tt')) . "\n";
close( $fh ) or die("Error closing filehandle: $!");

$sdrf_reader = Bio::MAGETAB::Util::Reader::SDRF->new( uri => $filename );
$sdrf_reader->get_builder->create_array_design({name => 'Design 1'});
$sdrf = test_parse( $sdrf_reader );

my $assay = $sdrf_reader->get_builder()->get_assay({ name => 'Assay 1' });
my $ad = $assay->get_arrayDesign();
is( $ad->get_name(), 'Design 1', 'Assay Array Design correctly linked' );
is( join(";", map { $_->get_value } $ad->get_comments()), 'Comment 1',
    'Array Design commented correctly');

__DATA__
Source Name	Provider	Characteristics[ OrganismPart ]	Characteristics[DiseaseState]	Term Source REF:test namespace	Term Accession Number	Material Type	Description	Comment[MyNVT]	Sample Name	Characteristics[Age]	Unit[TimeUnit]	Term Source REF	Material Type	Comment[sample comment]	Protocol REF	Performer	Parameter Value[Extracted Product]	Date	Comment[P_COMM]	Extract Name	Material Type	LabeledExtract Name	MaterialType	Term Source REF	Label	Term Source REF	Protocol REF	Term Source REF	Hybridization Name	Comment[some comment about the hyb]	Array Design REF	Comment[some comment about the array]	Protocol REF:made-up namespace:	Scan Name	Image File	Comment [scan comment here]	Array Data File	Comment[raw data comment]	Protocol REF	Normalization Name	Comment[data smoothness]	Derived Array Data File	Factor Value [EF1](Prognosis)	Term Source REF	Term Accession Number	Factor Value [EF2]	Unit[ConcentrationUnit]	Term Source REF
my source	the guy in the next room	root	hemophilia	NCI META	CL:111111	organism_part	description_text	mycomment	my sample	6	hours	MO	cell	sample comment value	EXTPRTCL10654	the guy in the next room	total RNA	2007-02-21	This did not happen. I was not here.	my extract	not_a_MO_term	my LE1	total_RNA	MO	Cy3	MO	P-XMPL-7	ArrayExpress	my hybridization	hyb conditions were suboptimal	A-TEST-1	My favourite array design	scanning protocol	my scan	imagefile1.TIFF	this was a great picture	Data1.txt		TRANPRTCL10656	my norm	high	NormData1.txt	ill	NCI META	CL:0123345	10	mg_per_mL	MO
my source	the guy in the next room	root	hemophilia	NCI META	CL:111111	organism_part	description_text	mycomment	my sample	6	hours	MO	cell	sample comment value	EXTPRTCL10654	the guy in the next room	total RNA	2007-02-21	This did not happen. I was not here.	my extract	not_a_MO_term	my LE2	total_RNA	MO	Cy5	MO	P-XMPL-7	ArrayExpress	my hybridization	hyb conditions were suboptimal	A-TEST-1	My favourite array design	scanning protocol	my scan	imagefile1.TIFF	this was a great picture	Data2.txt	not as good as the picture	TRANPRTCL10656	my norm	low	NormData2.txt	healthy	NCI META	CL:2347689	0	mg_per_mL	MO
sparse source 1			normal		blah blah ignore me										EXTPRTCL10654		polyA RNA					sparse LE Cy5			Cy5		P-XMPL-11	ArrayExpress	sparse hyb		A-TEST-1			sparse scan1	testing.jpg		Data3.txt		TRANPRTCL10656	norm 3		NormData3.txt	pained expression			10	mM	MO
sparse source 2			normal												EXTPRTCL10654		polyA RNA					sparse LE Cy3			Cy3		P-XMPL-11	ArrayExpress	sparse hyb		A-TEST-1			sparse scan2			Data4.txt		TRANPRTCL10656	norm 3		NormData3.txt	pregnant pause					
sparse source 3			normal												EXTPRTCL10654		polyA RNA					sparse LE biotin			biotin		P-XMPL-11	ArrayExpress	sparse hyb b		A-TEST-1		scanning protocol	sparse scan3	imagefile2.TIFF	a bit blurry			TRANPRTCL10656	norm 4		NormData4.txt	preternatural calm