The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
# -*-CPerl-*-
# Last changed Time-stamp: <2014-12-13 01:03:41 mtw>

###############
###Use stuff
###############
use strict;
use warnings;
use PerlIO::gzip;
use Cwd;
use File::Path qw (make_path);
use Pod::Usage;
use Getopt::Long qw( :config posix_default bundling no_ignore_case );
use Bio::ViennaNGS::Util qw(unique_array);

###############
###Variables
###############

my $VERBOSE=0;
my ( $file, $type, $peak, $chromsize, $track, $conv, $anno );

###############
###Command Line Options
###############

pod2usage(-verbose => 0)
    unless GetOptions(
	"file|f=s"	=> \$file,
	"type|t=s"	=> \$type,
	"chrom|c=s"	=> \$chromsize,
	"anno|a=s"	=> \$anno,
	"help|h"	=> sub{pod2usage(-verbose => 1)},
	"man|m"		=> sub{pod2usage(-verbose => 2)},
	"verbose"	=> sub{ $VERBOSE++ }
    );


my $dir = cwd();

my $file = file("Bedgraph", $file);

make_path($file) unless (-d $file);

die "You must provide filename and cell-type, and file with chrom-sizes\n"
  unless ($file && $type && $chromsize);

###############
###MAIN
###############

my $pid = $$;
(my $job = `cat /proc/$pid/cmdline`)=~ s/\0/ /g;
print STDERR $job,"\n";
print STDERR "Adding annotation information\n" if ($anno && $anno eq "extended");
my %sizes=();

open (SIZE, "<:gzip(autopop)", $chromsize);
while (<SIZE>){
    chomp (my $raw = $_);
    my @line = split("\t",$raw);
    $sizes{"$line[0]"}=$line[1];
}
close (SIZE);

open (IN, "<:gzip(autopop)" , $file);
chdir("Bedgraph/$file") or die "Could not move to Bedgraph $!\n";

my %covplus  = ();
my %covminus = ();
my %anno     = ();
my %annop    = ();
my %annom    = ();

while (<IN>){
    chomp (my $raw = $_);
    push my @line , split (/\t/,$raw);
    push @line, "\." if ( !$line[5] ); 
    my $chrom  = $line[0];#)=~ s/chr//g;
    my $start  = $line[1];
    my $end    = $line[2];
    my $strand = $line[5];
    my $annotation = join("\t",$line[3],$line[4]);
    for (5..$#line){
	$annotation .= "\t".$line[$_];
    }
    for (my $i=$start;$i<=$end;$i++){
	my $a=$i+1;
	next if ($a > ($sizes{"$chrom"}));
	if ($strand eq "+" or $strand eq "1"){
	    $covplus{"$chrom\t$i\t$a"}+=1;
	    push @{$annop{"$chrom\t$i\t$a"}}, $annotation if ($anno && $anno eq "extended");
	}
	elsif($strand eq "-" or $strand eq "-1"){
	    $covminus{"$chrom\t$i\t$a"}+=1;
	    push @{$annom{"$chrom\t$i\t$a"}}, $annotation if ($anno && $anno eq "extended");
	}
    }
}
close (IN);

foreach my $key (keys %covplus){
    my $cov=$covplus{$key};
    my @an = unique_array(\@{$annop{$key}});
    my $annotation = join("\t",@an);
    my @tmp=split(/\t/,$key);
    my $chrom=$tmp[0];
    open (OUT1, ">>:gzip", "$chrom\.$type\.fw.gz");
    if ($anno && $anno eq "extended"){
	print OUT1 "$key\t$cov\t$annotation\n";
    }
    else{
	print OUT1 "$key\t$cov\n";
    }
    close (OUT1);
}

foreach my $key (keys %covminus){
    my $cov=$covminus{$key}; # at least one occurence
    my @an = unique_array(\@{$annom{$key}});
    my $annotation = join("\t",@an);
    my @tmp=split(/\t/,$key);
    my $chrom=$tmp[0];
    open (OUT2, ">>:gzip", "$chrom\.$type\.re.gz");
    if ($anno && $anno eq "extended"){
	print OUT2 "$key\t-$cov\t$annotation\n";
    }
    else{
	print OUT2 "$key\t-$cov\n";
    }
    close (OUT2);
}

`for i in \`ls chr\*\.fw.gz\`;do zcat \$i|sort -k1,1 -k2,2 -n|gzip > \$i\.bedg.gz;done`;
`for i in \`ls chr\*\.re.gz\`;do zcat \$i|sort -k1,1 -k2,2 -n|gzip > \$i\.bedg.gz;done`;

chdir ($dir) or die "Could not move to $dir $!\n";


###############
###POD
###############

__END__

=head1 NAME

bed2bedGraph.pl - Convert BED or extended BED files to
bedGraph format

=head1 SYNOPSIS

bed2bedGraph.pl [-f I<FILE>] [-c I<FILE>] [-t I<STRING>] [-a
I<STRING>] [options]

=head1 DESCRIPTION

This program converts BED files to strand specific bedGraph files,
allowing additional annotation and automatic generation of bedGraph
files which can easily be converted to big-type files for UCSC
visualization.

=head1 OPTIONS

=over 

=item B<-f>

BED file for conversion.

=item B<-c>

File containing chromosome sizes

=item B<-t>

Type of bed file (e.g. sample name, replicate name, condition, ...)

=item B<-a>

Whether file is a standard bed or extended bed, 'extended' for extended bed

=item B<--help -h>

Print short help

=item B<--man>

Prints the manual page and exits

=back



=head1 AUTHOR

Joerg Fallmann E<lt>joerg.fallmann@univie.ac.atE<gt>

=cut


##################################END################################