The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
# Last changed Time-stamp: <2014-12-10 13:05:43 mtw>
# AUTHOR: Joerg Fallmann <joerg.fallmann@univie.ac.at>

###############
###Use stuff
###############
use strict;
use warnings;
use Data::Dumper;
use Getopt::Long qw( :config posix_default bundling no_ignore_case );
use Pod::Usage;
use PerlIO::gzip;
use Cwd;
use List::Util qw(sum);
use File::Path qw(make_path remove_tree);
use Bio::ViennaNGS;

###############
###Variables
###############

my $VERBOSE = 0;
my ( $dir, $odir, $file, $klength, $type );

###############
###Command Line Options
###############

pod2usage(-verbose => 0)
	unless GetOptions(
	"dir|d=s"	=> \$dir,
	"odir|o=s"	=> \$odir,
	"file|f=s"	=> \$file,
	"kmer|k=s"      => \$klength,
	"type|t=s"      => \$type,
	"help|h"	=> sub{pod2usage(-verbose => 1)},
	"man|m"		=> sub{pod2usage(-verbose => 2)},
	"verbose"	=> sub{ $VERBOSE++ }
	);

$dir = cwd() unless ($dir);
$odir = "$dir"."/Kmer_Analysis" unless $odir;
$dir =~ s/ //g;
$odir =~ s/ //g;

###############
###MAIN
###############

if (!-d $odir){
make_path($odir) or die "Error creating directory: $odir";
}

chdir($dir) or die "Could not find directory $dir, $!\n";

unless (-f $file) {
	warn "Could not find input file $file given via -f option";
	pod2usage(-verbose => 0);
}

unless ($type || $file =~ /.fa$/ || $file =~ /.fastq/ || $file =~ /.fasta/) {
	warn "Could not determine file type, please define using the -t option (fasta or fastq)";
	pod2usage(-verbose => 0);
}

open(IN,"<:gzip(autopop)","$file") || die ("Could not open $file!\n@!\n");

my (%embeddings, %kmer, @seqs);
my $x=1; ## Line counter for fastq file
while(<IN>){
    if ($file =~ /\.fastq/ || $type eq "fastq"){
#	print STDERR "Processing fastq file!\n";
        if( (++$x)%2 && $x != 5){
	    chomp (my $raw = $_);
	    push @seqs , $raw;
	}
	else{
#	    print STDERR $x,"\n";
	    $x = 1 if ($x >= 5);
	}
#	$embeddings{"$line[1] $line[2]"}= $line[5];
    }
    elsif ($file =~ /\.fa$/ || $file =~ /\.fasta$/ || $type eq "fasta"){
#	print STDERR "Processing fasta file!\n";
	next if ($_=~m/^>/);
	chomp (my $raw = $_);
	push @seqs, $raw;
#	$embeddings{"seq"}= $raw;
    }
    else{
	warn "Could not determine file type, please define using the -t option (fasta or fastq)";
	pod2usage(-verbose => 0);
    }
}
close(IN);

###Investigate kmer_enrichment
%kmer = %{kmer_enrichment(\@seqs, $klength)};
#foreach my $key (keys %embeddings){
#  %kmer = %{kmer_enrichment($embeddings{$key}, $klength)};
#  %kmer = %{kmer_enrichment(\@seqs, $klength)};
#}

my $total = sum values %kmer;
chdir($odir) or die "Could not find directory $odir, $!\n";

### Print Output
open(KMER,">","$klength\_mers") or die "Could not open file $!\n";
print KMER "$klength\-mer\tCount\tRatio\n";
print KMER "TOTAL\t$total\t1\n";
foreach my $key  (sort {$kmer{$b} <=> $kmer{$a} } keys %kmer) {
    my $ratio = $kmer{$key}/$total;
    print KMER "$key\t$kmer{$key}\t$ratio\n";
}
close(KMER);

chdir($dir) or die "Could not find directory $dir, $!\n";
###############
###POD
###############

__END__

=head1 NAME

kmer_analysis.pl - Simple k-mer count analysis of fasta or fastq files

=head1 SYNOPSIS

kmer_analysis.pl [-f I<FILE>] [-d I<STRING>] [-o I<STRING>] [-k
I<INTEGER>] [-t I<STRING>] [options]

=head1 DESCRIPTION

This program counts k-mers of user defined length in fasta or fastq files.

=head1 OPTIONS

=over 

=item B<-f>

File for processing

=item B<-d>

Working directory

=item B<-o>

Output directory

=item B<-k>

Kmer length to search

=item B<-t>

File type, can either be fasta or fastq

=item B<--help -h>

Print short help

=item B<--man>

Prints the manual page and exits

=back


=head1 AUTHOR

Joerg Fallmann E<lt>joerg.fallmann@univie.ac.atE<gt>

=cut


##################################END################################