The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Bio::Graphics::Browser2::DataLoader::sam;

# $Id: bam.pm 22336 2009-12-07 22:18:43Z lstein $

use strict;
use base 'Bio::Graphics::Browser2::DataLoader::bam';
use Bio::DB::Sam;

# go back to line-at-a-time loading
sub load {
    my $self                = shift;
    $self->Bio::Graphics::Browser2::DataLoader::load(@_);
}

# We do nothing here; the base class
# makes a copy of the data file in SOURCES
sub load_line { }

sub finish_load {
    my $self = shift;
    my $source_file = File::Spec->catfile($self->sources_path,$self->track_name);

    # sort out the file names
    my $track_name = $self->track_name;
    $track_name    =~ s/\..+$//;  # get rid of any extension

    my $base       = File::Spec->catfile($self->data_path,$track_name);
    my $bam        = $base . '.bam';
    my $sorted     = $base . '_sorted';

    # turn SAM file into a BAM file
    $self->set_status('converting SAM to BAM');
    $self->do_strip_prefix($source_file);

    $self->sam2bam($source_file,$bam);

    # sort it
    $self->set_status('sorting BAM file');
    Bio::DB::Bam->sort_core(0,$bam,$sorted,250*1e6); # use 200 meg core maximum

    # no need to keep sorted and unsorted copies
    rename "$sorted.bam",$bam;

    $self->set_status('indexing BAM file');

    Bio::DB::Bam->index_build($bam);

    my $bigwig_exists = 0;

    if ($self->has_bigwig) {
	$self->set_status('creating BigWig coverage file');
	$bigwig_exists = $self->create_big_wig();
    }

    $self->set_status('creating conf file');
    $self->create_conf_file($bam,$bigwig_exists);

    $self->set_status('recompressing source');
    system ("gzip $source_file");
}

sub do_strip_prefix {
    my $self = shift;
    my $source_file = shift;
    my $prefix = $self->strip_prefix or return;
    my $dest   = "$source_file.new";
    open my $i,'<',$source_file  or die "Can't open $source_file: $!";
    open my $o,">",$dest         or die "Can't open $dest: $!";
    while (<$i>) {
	next if /^\@/;
	s/^([^\t]+)\t([^\t]+)\t$prefix([^\t]+)/$1\t$2\t$3/;
    } continue {
	print $o $_;
    }
    close $o;
    close $i;
    rename $dest,$source_file;
}

sub sam2bam {
    my $self = shift;
    my ($sampath,$bampath) = @_;

    my $tam = Bio::DB::Tam->open($sampath)
	or die "Could not open SAM file for reading: $!";

    # get chromosome names
    my $chroms = $self->chromosome_list;

    my $header = eval {$tam->header_read};
    unless ($header) {
	warn "Bio::DB::Sam version 1.20 or greater required for SAM conversion to work properly";
    }

    unless ($header && $header->n_targets > 0) {
	my $fasta      = $self->get_fasta_file;
	$fasta 
	    or die "Could not find a suitable reference FASTA file for indexing this SAM file";

	my $fai = Bio::DB::Sam::Fai->load($fasta)
	    or die "Could not load reference FASTA file for indexing this SAM file: $!";

	$header = $tam->header_read2($fasta.".fai");
    }

    my $targets = $header->target_name;

    my $bam = Bio::DB::Bam->open($bampath,'w')
	or die "Could not open BAM file for writing: $!";

    $bam->header_write($header);
    my $alignment = Bio::DB::Bam::Alignment->new();
    my $lines = 0;

    while ($tam->read1($header,$alignment) > 0) {
	next if $chroms && !$chroms->{$targets->[$alignment->tid]};
	$bam->write1($alignment);
	$self->set_status("converted $lines lines...") if $lines++%1000 == 0;
    }
    
    undef $tam;
    undef $bam;
}

1;