The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* File : correlations.h : contains all function protypes, structs, #includes, #defines etc. ****************/

/********************************************************************** # includes *********************/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <stdarg.h>
#include <time.h>

/********************************************************************** # defines **********************/

#define NODATA 99999.00

/************************************************ structs that are used to store data structures *****/

/* the nameRec structure is used to identify a list of correlations associated 
 * with either a gene, or a node.  In the clusterRec struct there is an array of
 * nameRec structs that are dynamically allocated.  There is enough space allocated
 * to store 2*numLines-1 nameRecs, where numLines is the number of genes for which
 * there is data.  The nameRecs from 0 to numLines-1 correspond to genes, and the 
 * nameRecs from numLines to 2*numLines-1 correspond to nodes.  This same array is
 * reused to store the correlations that are used when experiments are clustered.  In
 * this case 2*numExperiments-1 nameRecs are used, the first numExperiments worth are used for
 * experiments, the rest for compound nodes.
 */

typedef struct{
  char *orf;
  char *name;
  float rowWeight;
  int joined; /* to check whether a gene/experiment or node has been joined to another gene/experiment or node yet */
  struct correlationRec *first;
  struct correlationRec *last;
  int numCorrelations;
} nameRec;

typedef struct{
  nameRec *genes;
  int numGenes;
} clusterRec;

typedef struct correlationRec{
  int ORFnumber;
  float corr;
  struct correlationRec *next;
} correlationRec;

/************************************************************** Global Variables ******************************/

int		gLogData=0; /* whether to log transform the data */
int		gCentered=0; /* keeps track of whether to use a centered metric for the genes */
char*	gPrefix; /* if they want to pass in a unique identifier, as oppposed to using the filename */
int 	gUID=0; /* whether they passed in a unique identifier */
float	gCutOff=0.8; /* the cut off below which they don't see correlations */
int		gMaxNumCorrelations=20; /* number of correlations to save */
int     gShowCorrelations = 1; /* whether to show the correlations or not */

/************************************************************** Function Prototypes ***************************/

/************************************************************** General Functions *****************************/

int     main(int argc, char *argv[]);
void    ParseOptions(char *ifile, int argc, char **argv);
void 	Usage(void);
char	*GetFilePrefix(char *ifile);

void	GetUserInput();
void	GetTransformationOptions(void);
void	GetGeneMetric(void);
void	GetCutOff(void);
void	GetNumCorrelations(void);
void	CheckYesOrNo(char *inputLine);

void	MakeFileName(char *ifile, char **fileName);

void	GetDataSize(FILE *istream, int *numExperiments, int *numLines);
void	DoMemoryAllocation(float **eWeights, int numExperiments, char ***experimentNames,
			   clusterRec *cluster, float **dataMatrix);
void 	FreeCorrelations(correlationRec **node);
float	*ReadInData(FILE *istream, clusterRec *cluster, int numExperiments, float *eWeights, 
		    char **experimentNames, float *dataMatrix);
void	InitializeArray(nameRec *names);
void	ReadOneLine(FILE *istream, float *dataMatrix, int numExperiments, int currLine, nameRec *names);
double	StringToReal(char *s);
FILE	*OpenInFile(char *ifile);
FILE	*OpenOutFile(char *ofile);
FILE    *OpenForAppend(char *ofile);
void	LogTransformData(float *dataMatrix, int numGenes, int numExperiments);

/*************************************************************** Functions for Hierarchically Clustering ********/

void	FreeCluster(clusterRec *cluster);
void	MakeCorrelations(clusterRec *cluster, float *dataMatrix, int numExperiments, float *eWeights, char *ifile);
float	CalculateCorrelation(float *genePtr, float *cmpPtr, int numExperiments,
			     float *eWeights);
void	CheckToInsert(clusterRec *cluster, int geneCounter, int comparedToCounter, float pearsonCorrelation);
void	InsertSorted(correlationRec **list, correlationRec *newOne);
correlationRec *SwitchLast(correlationRec **list, correlationRec* newOne);
correlationRec	*DeleteLast(correlationRec **list);
correlationRec	*MakeNewRecord(double correlation, int geneNumber);

void 	Error(char *msg, ...);
void 	FreeExperimentNames(char **experimentNames, int numExperiments);

/**************************************************** Functions for debugging purposes *****************************************/

void 	PrintOneGene(correlationRec *list, FILE *outfile, clusterRec *cluster);