The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/*
 * MaxClassPol.cpp
 *
 *  Created on: 26 feb 2012
 *      Author: Wolfcastle
 *
 *  Description
 *		This library implements the independent t-test
 */

#include "feature_selection.h"

/*
Description: Feature selection method that uses the independent t-test to select a feature
Parameters: (1) 2D expression data matrix, (2) number of matrix rows, (3) number of matrix columns (4) list of column names,
            (5) first group symbol and (6) second group symbol
Return value: Index of the top t-value feature (row in data matrix)
*/
int indTTest(double** expData, int numFeatures, int numSamples, char** sampleNames, char* NORMAL, char* MALIGN){
	
	//Determine class sizes
	int sample;
	int numNormal = 0;
	int numMalign = 0;
	for(sample = 0; sample < numSamples; sample++){
		if(strcmp(sampleNames[sample], NORMAL) == 0)
			numNormal++;
		else if(strcmp(sampleNames[sample], MALIGN) == 0)
			numMalign++;
	}
	
	//Establish class index arrays
	int normalIndexes[numNormal];
	int malignIndexes[numMalign];
	int normalCounter = 0;
	int malignCounter = 0;
	for(sample = 0; sample < numSamples; sample++){
		if(strcmp(sampleNames[sample], NORMAL) == 0){
			normalIndexes[normalCounter] = sample;
			normalCounter++;
		}
		else if(strcmp(sampleNames[sample], MALIGN) == 0){
			malignIndexes[malignCounter] = sample;
			malignCounter++;
		}
	}
	
	//Perform t-test
	int feature;
	double currentT = 0;
	double topT = 0;
	int bestFeature;
	for(feature = 0; feature < numFeatures; feature++){

		//Calculate sum of normal samples
		double sumNormal = 0;
		int normalBuffer;
		for(sample = 0; sample < numNormal; sample++){
			normalBuffer = normalIndexes[sample];
			sumNormal += expData[feature][normalBuffer];
		}
		//Calculate sum of malign samples
		double sumMalign = 0;
		int malignBuffer;
		for(sample = 0; sample < numMalign; sample++){
			malignBuffer = malignIndexes[sample];
			sumMalign += expData[feature][malignBuffer];
		}

		//Calculate mean
		double normalMean = sumNormal / numNormal;
		double malignMean = sumMalign / numMalign;

		//Calculate variance of normal samples
		double normalVar = 0;
		for(sample = 0; sample < numNormal; sample++){
			normalBuffer = normalIndexes[sample];
			normalVar += pow(expData[feature][normalBuffer] - normalMean, 2);
		}

		//Calculate variance of malign samples
		double malignVar = 0;
		for(sample = 0; sample < numMalign; sample++){
			malignBuffer = malignIndexes[sample];
			malignVar += pow(expData[feature][malignBuffer] - malignMean, 2);
		}
		
		//Calculate t and compare to previous best
		if(normalVar == 0 && malignVar == 0){
			normalVar = 0.000001;
		}
		currentT = fabs(normalMean - malignMean) / sqrt ((normalVar / numNormal) + (malignVar / numMalign));
		if(currentT > topT){
			topT = currentT;
			bestFeature = feature;
		}
	}
	
	//Return index of best feature
	return bestFeature;
}