The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Microarray::File::Data;

use 5.006;
use strict;
use warnings;
our $VERSION = '2.22';

require Microarray::File;
require Microarray::Spot;

{ package data_file;

	our @ISA = qw( delimited_file );

	sub new {
		my $class = shift;
		my $self = { };
		if (@_){		
			$self->{ _file_name } = shift;		# shift in file name
			bless $self, $class;
			$self->set_filehandle(shift) if (@_);	# Fh is passed from CGI
			if ($class eq 'data_file'){
				# try and guess which file type we're dealing with
				if(my $new_class = $self->guess_class){
					# we've found a better match, recreate ourself
					my $file_name = $self->file_name;
					my $source = $self->get_source;
					$self = { 	_file_name => $file_name, 
								_source => $source
							};
					bless $self,$new_class;
				}
			}
			$self->import_data;
		} else {
			bless $self, $class;
		}
		return $self;
	}
	sub guess_class {
		use Module::List qw(list_modules);
		my $self = shift;
		
		my $source = $self->get_source;
		
		my $hModules = list_modules('Microarray::File::Data::',{list_modules=>1,list_prefixes=>1});
		for my $module (keys %$hModules){
			eval "require $module";	
			next unless ($module->can('auto_data_file'));
			if (my ($class,$search_term) = $module->auto_data_file){
				return $class if ($source =~ /$search_term/i);
			}
		}
		warn "Microarray::File::Data ERROR: Could not deduce the type of file from '".$self->file_name."'\n";
		return;
	}
	sub import_data {
		my $self = shift;
		my $aaData = $self->load_file_data;
		$self->sort_data($aaData);
	}

	# _spot_data 	= scanner data imported as array of arrays
	#Ê_data_fields	= column field names and indeces
	# _header_info	= scanner set up information from the data file header
	# _spots 		= spot objects	
	sub sort_data {
		my $self = shift;
		my $aaData = shift;
		$self->set_data_fields(shift @$aaData);	# method in class delimited_file
		$self->{ _spot_data } = $aaData;		# all the numbers
		$self->{ _spot_count } = scalar @$aaData;
	}
	# not anything to do with the above, but instead is the data 
	# sorted on any specific field other than spot_index - 
	# used to discriminate processed_data from other data
	sub comes_sorted {
		return;
	}
	# the size of the data array after removing data fields
	# for the number of spot OBJECTS created, use number_spots()
	sub spot_count {
		my $self = shift;
		$self->{ _spot_count };
	}
	sub import_header_info {					# import a hash ref containing header information
		my $self = shift;
		$self->{ _header_info } = shift;
	}
	# sets spot objects for all data in one go
	sub set_spot_objects {
		my $self = shift;
		my $aaData = $self->spot_data;
		my $aData_Fields = $self->data_file_fields;				# required data fields from any data file format
		my $spot_object_count	= 0; 							# count of number of objects created
		for (my $i=0; $i<@$aaData; $i++){	
			my $oSpot = array_spot->new();						# new spot object
			for my $field (@$aData_Fields){						# each spot field name
				$oSpot->$field($self->$field($i));				#Êfill the spot object with the spot_row data
			}
			# add spot object to data_file
			$self->add_spot($oSpot);
			$spot_object_count++;
		}
		$self->number_spots($spot_object_count);
	}
	# the data array
	sub spot_data {
		my $self = shift;
		$self->{ _spot_data };
	}
	# return a spot object for a specific index
	sub spot_object {
		my $self = shift;
		my $index = shift;
		if (my $oSpot = $self->get_spots($index)){
			return $oSpot;
		} else {
			my $aData_Fields = $self->data_file_fields;	 		# required data fields from any data file format
			my $oSpot = array_spot->new();						# new spot object
			for my $field (@$aData_Fields){						# each spot field name
				$oSpot->$field($self->$field($index-1));		#Êfill the spot object with the spot_row data
			}
			return $oSpot;
		}
	}
	
	# adds spot objects to the arrayref held in { _spots }
	# NOTE: the zero index in this arrayref is a count of the total number of spots, 
	# and spots are placed in the array index corresponding to their "spot index"
	sub add_spot {
		my $self = shift;
		my $oSpot = shift;
		my $aSpots = $self->get_spots;
		$$aSpots[ $oSpot->spot_index ] = $oSpot;
	}
	# the total number of spot OBJECTS in the array
	# for number of spots counted from data array length, use spot_count() 
	sub number_spots {
		my $self = shift;
		my $aSpots = $self->get_spots;
		if (@_){
			$$aSpots[0] = shift;	# set from set_spot_objects()
		} else {
			$$aSpots[0];	# first index is the number of spots
		}
	}
	# the spots objects are stored as an array ref
	# each spot is placed at the array index that matches the spot index
	# THEREFORE THE INDEX[0] DOES NOT CONTAIN A SPOT!
	sub get_spots {
		my $self = shift;
		unless (defined $self->{ _spots }){
			$self->{ _spots } = [];
		}
		if (@_) { 	# passed a spot index
			my $index = shift;
			return unless (defined $self->{ _spots }[$index]);
			$self->{ _spots }[$index];
		} else {	# return all objects
			$self->{ _spots };
		}
	}
	sub data_file_fields {	# minimum required fields
		[	'spot_index','feature_id','synonym_id',
			'channel1_signal','channel2_signal',
			'channel1_quality','channel2_quality',
			'block_row','block_col',
			'spot_diameter','flag_id',
			'spot_row','spot_col','x_pos','y_pos', 
			'ch1_mean_f','ch1_median_b','ch2_mean_f','ch2_median_b',
			'channel1_snr','channel2_snr',
			'log2_ratio'];
	}
	sub return_data {
		my $self = shift;
		my $aaData = $self->spot_data;
		return $aaData->[shift][shift];
	}
	sub image_file_names {
		my $self = shift;
		return ($self->channel1_image_file,$self->channel2_image_file);
	}
	sub fluor_names {
		my $self = shift;
		return ($self->channel1_name,$self->channel2_name);
	}
	sub laser_powers {
		my $self = shift;
		return ($self->channel1_laser,$self->channel2_laser);
	}
	sub pmt_voltages {
		my $self = shift;
		return ($self->channel1_pmt,$self->channel2_pmt);
	}

	### calculated fields ###
	sub channel1_signal {
		my $self = shift;
		my $index = shift;
		$self->ch1_mean_f($index) - $self->ch1_median_b($index);
	}
	sub channel2_signal {
		my $self = shift;
		my $index = shift;
		$self->ch2_mean_f($index) - $self->ch2_median_b($index);
	}
	sub log2_ratio {
		my $self = shift;
		my $index = shift;
		my $ch1 = $self->channel1_signal($index);
		my $ch2 = $self->channel2_signal($index);
		return if (($ch1 <= 0) || ($ch2 <= 0));
		if ($self->flip_flop == 1){
			return log($ch1/$ch2)/log(2);
		} else {
			return log($ch2/$ch1)/log(2);
		}
	}
	sub flip_flop {
		my $self = shift;
		if (defined $self->{ _flip_flop }){
			$self->{ _flip_flop };
		} else {
			return 1;
		}
	}
	sub flip {
		my $self = shift;
		$self->{ _flip_flop } = -1;
	}
	sub flop {
		my $self = shift;
		$self->{ _flip_flop } = 1;
	}
	sub channel_signal {
		my $self = shift;
		my $index = shift;
		my $ch = shift;
		my $method = 'channel'.$ch.'_signal';
		$self->$method($index);
	}
	sub channel1_snr {
		my $self = shift;
		my $index = shift;
		$self->ch1_median_f($index) / $self->ch1_sd_b($index);
	}
	sub channel2_snr {
		my $self = shift;
		my $index = shift;
		$self->ch2_median_f($index) / $self->ch2_sd_b($index);
	}
	sub channel_snr {
		my $self = shift;
		my $index = shift;
		my $ch = shift;
		my $method = 'channel'.$ch.'_snr';
		$self->$method($index);
	}
	sub channel_quality {
		my $self = shift;
		my $index = shift;
		my $ch = shift;
		my $method = 'channel'.$ch.'_quality';
		$self->$method($index);
	}
	sub channel_sat {
		my $self = shift;
		my $index = shift;
		my $ch = shift;
		my $method = 'channel'.$ch.'_sat';
		$self->$method($index);
	}
	sub guess_barcode {
		use File::Basename;
		my $self = shift;
		my $file = basename($self->file_name);
		my @aName = split(/-|_| /,$file);
		return $aName[0];
	}
	sub channel_image_file {
		my $self = shift;
		my $ch = shift;
		my $method = "channel".$ch."_image_file";
		$self->$method;
	}
	sub num_channels {
		2
	}
	
}


1;


__END__

=head1 NAME

Microarray::File::Data - An object oriented Perl module describing microarray data files

=head1 SYNOPSIS

	use Microarray::File::Data;

	my $oFile = data_file->new('/results.txt');

=head1 DESCRIPTION

Microarray::File::Data provides methods for retrieving data from microarray data file objects. 

=head1 METHODS

=head2 Object creation

If you know the type of data file you are dealing with, then you should use the appropriate file module. However, if for some reason you don't know you can create a C<data_file> object and the module will attempt to create a file object of the correct type for you. This depends on the correct Plug-In module being present. If you write your own Plug-In module, you'll need to include a class-level method 'auto_data_file' in your module, which returns the package name of the file object and a search term that will identify an unknown file as the correct type.  

=head2 Spot object methods

=head3 Spot object creation

The module can create individual L<Microarray::Spot|Microarray::Spot> objects for you, either on-mass, or individually as you want them. The overhead for doing this is not huge, so if you have replicates that you want to handle using the L<Microarray::Reporter|Microarray::Reporter> module, this is a handy way to fill the reporter container. 

	$oFile->spot_object(123);				# sets and gets object for spot index 123

	$oFile->set_spot_objects;				# sets all spot objects
	my $oSpot = $oFile->spot_object(1234);	# spot object for spot index 1234

=over 4

=item B<set_spot_objects>

Creates spot objects for all spots.

=item B<spot_object>

Pass a spot index to this method to return the relevant spot object. If C<set_spot_objects> has not been called, this will create and return only this object. 

=item B<number_spots>

Returns the total number of spot objects created by C<set_spot_objects()>. 

=item B<get_spots>

Returns the spot objects as an array, where each index of the array matches that of the spot. (Therefore there is not a spot at index[0], but instead this element holds the number of spots!)

=back

=head2 Other methods

=over

=item B<image_file_names>, B<fluor_names>, B<laser_powers>, B<pmt_voltages>

Returns the relevant values for each analysed channel as a list. Will only work for file types that return the relevant information (for instance, BlueFuse does not return laser/PMT information).

=item B<guess_barcode>

In the event that a barcode is not present in the data file, will parse the file name and assume that the first portion of the name (using an underscore or hyphen as a delimiter) is the barcode. 

=item B<num_channels>

Defaults to two in the event that a file type is used which does not return the number of channels. 

=back

And many other methods that need no explanation;

=over

=item B<analysis_software, pixel_size, channel1_name, channel2_name, channel1_signal, channel2_signal, channel1_snr, channel2_snr, channel_quality, channel_sat, bad_flags>

Once again, not all file types will return the relevant information (BlueFuse does not return channel saturation or SNR). 

=back

=head1 TESTING

This distribution has been extensively tested, but does not include data files for testing purposes since they are very large. If you would like to run the full test you can download the files at L<http://www.instituteforwomenshealth.ucl.ac.uk/trl/pipeline/download.html>. 

=head1 SEE ALSO

L<Microarray|Microarray>, L<Microarray::File|Microarray::File>, L<Microarray::Spot|Microarray::Spot>

=head1 AUTHOR

Christopher Jones, Gynaecological Cancer Research Laboratories, Institute for Women's Health, University College London.

L<http://www.instituteforwomenshealth.ucl.ac.uk/AcademicResearch/Cancer/trl>

c.jones@ucl.ac.uk

=head1 COPYRIGHT AND LICENSE

Copyright 2008 by Christopher Jones, University College London

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself. 

=cut