The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
package Bio::Graphics::Browser2::DataLoader::generic;

# $Id$
use strict;
use Bio::DB::SeqFeature::Store;
use Carp 'croak';
use File::Basename 'basename';
use base 'Bio::Graphics::Browser2::DataLoader';

use constant TOO_SMALL_FOR_SUMMARY_MODE => 10000;  # don't go into summary mode

my @COLORS = qw(blue red orange brown mauve peach green cyan 
                black yellow cyan papayawhip coral);

my $DUMPING_FIXED;  # flag that we patched Bio::SeqFeature::Lite

sub start_load {
    my $self = shift;
    my $track_name = $self->track_name;
    my $data_path  = $self->data_path;

    my $db     = $self->create_database($data_path);
    my $loader_class = $self->Loader;
    my $fast         = $self->do_fast;
    eval "require $loader_class" unless $loader_class->can('new');
    my $loader = $loader_class->new(-store=> $db,
				    -fast => $fast,
				    -summary_stats    => 1,
				    -verbose          => 0,
				    -index_subfeatures=> 0,
	);
    $loader->start_load();
    $self->{loader}    = $loader;
    $self->{conflines} = [];
    $self->state('starting');
}

sub do_fast { 1 };

sub Loader {
    croak "The Loader() class method must be implemented in a subclass";
}

sub finish_load {
    my $self = shift;
    my $line_count = shift;

    $self->set_status('creating database');
    $self->loader->finish_load();
    my $db        = $self->loader->store;
    my $conf      = $self->conf_fh;
    my $trackname = $self->track_name;
    my $dsn       = $self->dsn;
    my $backend   = $self->backend;

    $self->write_gff3($db,$dsn) if $backend eq 'memory';

    my $trackno   = 0;
    my $loadid    = $self->loadid;
    my $inhibit_summary = $line_count < TOO_SMALL_FOR_SUMMARY_MODE;
    eval {
	$self->set_status('calculating summary statistics');
	$self->loader->build_summary;
    } unless $inhibit_summary;
    warn $@ if $@;

    $self->set_status('creating configuration');
    
    print $conf <<END;
[$loadid:database]
db_adaptor = Bio::DB::SeqFeature::Store
db_args    = -adaptor $backend
             -dsn     $dsn
search options = default +wildcard +stem

#>>>>>>>>>> cut here <<<<<<<<
END

    if (my @lines = @{$self->{conflines}}) {  # good! user has provided some config hints
	my ($old_trackname,$seen_feature);
	my $category = $self->category;
	for my $line (@lines) {
	    chomp $line;
	    if ($line =~ /^\s*database/) {
		next;   # disallowed
	    }
	    elsif ($line =~ /^\[([^\]]+)\]/) { # overwrite track names to avoid collisions
		$old_trackname = $1;
		undef $seen_feature;
		my $trackname = $self->new_track_label;
		print $conf "[$trackname]\n";
		print $conf "database = $loadid\n" ;
		print $conf "category = $category\n";
		print $conf "show summary = 0\n" if $inhibit_summary;
	    } elsif ($line =~ /^feature/) {
		$seen_feature++;
		print $conf $line,"\n";
	    } elsif ($line !~ /\S/ && $old_trackname && !$seen_feature) {
		print $conf "feature = $old_trackname\n\n";
		undef $old_trackname;
		undef $seen_feature;
	    } else {
		print $conf $line,"\n";
	    }
	}
	if ($old_trackname && !$seen_feature) {
	    print $conf "feature = $old_trackname\n\n";
	}
    } else {  # make something up
	my @types = eval {$db->toplevel_types};
	@types    = $db->types unless @types;

	my $filename = $self->track_name;
	for my $t (@types) {
	    my $trackname = $self->new_track_label($t);

	    my ($glyph,$stranded);
	    # start of a big heuristic section
	    if ($t =~ /gene|cds|exon|mRNA|transcript/i) {
		$glyph    = 'gene';
		$stranded = 0;
	    } else {
		$glyph    = 'segments';
		$stranded = 1;
	    }

	    my $color = $COLORS[rand @COLORS];
	    my $category  = $self->category;
	    my $summary   = $inhibit_summary ? 'show summary = 0' : '';
	    print $conf <<END;
[$trackname]
database = $loadid
feature   = $t
glyph     = $glyph
bgcolor   = $color
fgcolor   = $color
label     = 1
stranded  = $stranded
connector = solid
balloon hover = \$description
category    = $category
key         = $t
$summary

END
	}
    }
}

sub loader {shift->{loader}}
sub state {
    my $self = shift;
    my $d    = $self->{state};
    $self->{state} = shift if @_;
    $d;
}
sub load_line {
    my $self = shift;
    my $line = shift;

    my $old_state = $self->state;
    my $state     = $self->_state_transition($old_state,$line);
    my $prefix    = $self->strip_prefix;

    if ($state eq 'data') {
	$line =~ s/^$prefix// if $prefix;
	$self->loader->load_line($line);
    } elsif ($state eq 'config') {
	push @{$self->{conflines}},$line;
    } else {
	# ignore it
    }
    $self->state($state) if $state ne $old_state;
}

# This is called to save out the feature file in GFF3 format,
# which is needed when using the memory backend.
sub write_gff3 {
    my $self = shift;
    my ($db,$path) = @_;
    $self->fix_memory_dumping;
    $self->set_status('writing GFF3 file');
    $path .= "/data.gff3";
    open my $f,">",$path or die "Can't open $path: $!";
    print $f "##gff-version 3\n";
    print $f "##Index-subfeatures 0\n\n";
    my $i = $db->get_seq_stream;
    while (my $feature = $i->next_seq) {
	print $f $feature->gff3_string(1),"\n";
    }
    close $f;
}

# This is a hack to patch errors in bioperl feature file loading
sub fix_memory_dumping {
    my $self = shift;
    return if $DUMPING_FIXED++;
    *Bio::SeqFeature::Lite::gff3_string = eval <<'END';
sub {
    my ($self,$recurse,$parent_tree,$seenit,$force_id) = @_;
    $parent_tree ||= {};
    $seenit      ||= {};
    my @rsf      =   ();
    my @parent_ids;

    if ($recurse) {
	$self->_traverse($parent_tree) unless %$parent_tree;  # this will record parents of all children
	my $primary_id = defined $force_id ? $force_id : $self->_real_or_dummy_id;

	@rsf = $self->get_SeqFeatures;
	return if $seenit->{$primary_id}++;
	@parent_ids = keys %{$parent_tree->{$primary_id}};
    }

    my $group      = $self->format_attributes(\@parent_ids,$force_id);
    my $name       = $self->name;

    my $class = $self->class;
    my $strand = ('-','.','+')[$self->strand+1];
    my $p = join("\t",
		 $self->seq_id||'.',
		 $self->source||'.',
		 $self->method||'.',
		 $self->start||'.',
		 $self->stop||'.',
		 defined($self->score) ? $self->score : '.',
		 $strand||'.',
		 defined($self->phase) ? $self->phase : '.',
		 $group||'');
    return join("\n",
		$p,
		map {$_->gff3_string(1,$parent_tree,$seenit)} @rsf);
}
END
}

# shamelessly copied from Bio::Graphics::FeatureFile.
sub _state_transition {
    my $self = shift;
    my ($current_state,$line) = @_;

    if ($current_state eq 'starting') {
	return $current_state unless /\S/;
	$current_state = 'config';
    }

    if ($current_state eq 'data') {
	return 'config' if $line =~ m/^\s*\[([^\]]+)\]/;  # start of a configuration section
    } 
    elsif ($current_state eq 'config') {
	return 'data'   if $line =~ /^\#\#(\w+)/;     # GFF3 meta instruction
	return 'data'   if $line =~ /^reference\s*=/; # feature-file reference sequence directive
	
	return 'config' if $line =~ /^\s*$/;                             #empty line
	return 'config' if $line =~ m/^\[([^\]]+)\]/;                    # section beginning
	return 'config' if $line =~ m/^[\w\s]+=/;                        # arg = value
	return 'config' if $line =~ m/^\s+(.+)/;                         # continuation line
	return 'config' if $line =~ /^\#/;                               # comment -not a meta
	return 'data';
    }
    return $current_state;
}
1;