#########
# Author: Andreas Kahari, andreas.kahari@ebi.ac.uk
# Maintainer: $Author: zerojinx $
# Created: ?
# Last Modified: $Date: 2010-11-02 11:37:11 +0000 (Tue, 02 Nov 2010) $
# Id: $Id: bioseqio.pm 687 2010-11-02 11:37:11Z zerojinx $
# Source: $Source: /nfs/team117/rmp/tmp/Bio-Das-ProServer/Bio-Das-ProServer/lib/Bio/Das/ProServer/SourceAdaptor/Transport/bioseqio.pm,v $
# $HeadURL: https://proserver.svn.sourceforge.net/svnroot/proserver/trunk/lib/Bio/Das/ProServer/SourceAdaptor/Transport/bioseqio.pm $
#
package Bio::Das::ProServer::SourceAdaptor::Transport::bioseqio;
use strict;
use warnings;
use base qw(Bio::Das::ProServer::SourceAdaptor::Transport::generic);
use Bio::SeqIO;
use Bio::DB::Flat;
use Carp;
use English qw(-no_match_vars);
our $VERSION = do { my ($v) = (q$Revision: 687 $ =~ /\d+/mxsg); $v; };
sub init {
my $self = shift;
$self->{_data} = undef; # Will hold latest Bio::SeqIO object
# Make sure that the database index exists if the
# $self->config->{index} configuration entry exists.
if (defined $self->config->{index} &&
! -f sprintf q(%s/%s/config.dat),
$self->config->{dbroot},
$self->config->{dbname}) {
my $db = Bio::DB::Flat->new(
-directory => $self->config->{dbroot},
-dbname => $self->config->{dbname},
-format => $self->config->{format},
-index => $self->config->{index},
-write_flag => 1
);
my $msg = sprintf qq(Building %s index for DB %s in %s\n),
$self->config->{index},
$self->config->{dbname},
$self->config->{dbroot};
carp $msg;
$db->build_index($self->config->{filename});
}
return;
}
sub query {
my ($self, $query) = @_;
if (defined $self->{_data} &&
$self->{_data}->display_name eq $query) {
return $self->{_data};
}
if (defined $self->config->{index}) {
return $self->_query_indexed($query);
}
return $self->_query_sequentially($query);
}
#########
# Opens the file specified by the configuration and looks
# through it sequentially until one sequence is found whose
# display_name corresponds to the segment. The found
# sequence is cached and returned as a Bio::Seq object.
#
sub _query_sequentially {
my ($self, $query) = @_;
my $fname = $self->{filename} || $self->config->{filename};
my $format = $self->{format} || $self->config->{format};
$self->{_data} = Bio::Seq->new( -display_id => 'notfound' );
my $seqio = Bio::SeqIO->new(
-file => $fname,
-format => $format,
);
while (defined (my $seq = $seqio->next_seq())) {
if ($seq->display_name eq $query) {
$self->{_data} = $seq;
last;
}
}
return $self->{_data};
}
#########
# Uses Bio::DB::Flat to look for the sequence whose
# display_name corresponds to the segment. The found
# sequence is cached and returned as a Bio::Seq object.
#
sub _query_indexed {
my ($self, $query) = @_;
my $db = Bio::DB::Flat->new(
-directory => $self->config->{dbroot},
-dbname => $self->config->{dbname},
-format => $self->config->{format},
-index => $self->config->{index},
);
$self->{_data} = $db->get_Seq_by_id($query);
return $self->{_data};
}
1;
__END__
=head1 NAME
Bio::Das::ProServer::SourceAdaptor::Transport::bioseqio - A ProServer
transport module that works off any flat file that Bio::SeqIO
supports.
=head1 VERSION
$Revision: 687 $
=head1 SYNOPSIS
=head1 DESCRIPTION
NB: This is *not* what you want to use if your files are large. As an
example, a single query for "Z261_HUMAN" on the complete Swissprot
file "sprot42.dat" takes several minutes.
=head1 SUBROUTINES/METHODS
=head2 init
=head2 query
=head1 DIAGNOSTICS
=head1 CONFIGURATION AND ENVIRONMENT
For sequential querying flat files, the following
configuration entries are needed:
filename The name of the flat file to search.
format The format of the flat file.
If using Bio::DB::Flat (this is depedent on the existance of
the 'index' configuration entry), the following additional
configuration entries are needed:
index The type of index to create and/or use ('bdb' or
'binarysearch'). Corresponds to the '-index'
option of Bio::DB::Flat::new().
dbname The name of the database to create and/or
use. Corresponds to the '-dbname' option of
Bio::DB::Flat::new().
dbroot The directory where the database index is
or will be located. Corresponds to the
'-directory' option of Bio::DB::Flat::new().
=head1 DEPENDENCIES
=head1 INCOMPATIBILITIES
=head1 BUGS AND LIMITATIONS
=head1 AUTHOR
Andreas Kahari, andreas.kahari@ebi.ac.uk
=head1 LICENSE AND COPYRIGHT