The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Das adaptor for GUS

=head1 NAME

DAS::GUS - DAS-style accession to a gus database

=head1 SYNOPSIS

	# Open up a feature database
	$db = DAS::GUS->new(
			-dsn  => 'dbi:Oracle:sid=GUSDEV;host=localhost;port=1521',
						-user => 'user',
						-pass => 'pass', );
	
	@segments = $db->segment ( -name  => 'AAEE01000001',
							   -start => 1,
							   -end   => 1000000 );
	
	# segments are Bio::Das::SegmentI - compliant objects
	
	# fetch a list of features
	@features = $db->features( -type=>['type1', 'type2', 'type3'] );

	# invoke a callback over features
	$db->features( -type=>['type1', 'type2', 'type3'], 
				   -callback => sub { ... }
				 );

	# get all featur types
	@types = $db->types

	# count types
	%types = $db->types( -enumerate=>1 );

	@feature = $db->get_feature_by_name( $class=>$name );
	@feature = $db->get_feature_by_target( $target_name );
	@feature = $db->get_feature_by_attribute( $att1=>$value1, 
											  $att2=>$value2 );
	$feature = $db->get_feature_by_id( $id );

	$error = $db->error;

=cut

=head1 AUTHOR 

Name:  Haiming Wang
Email: hwang@uga.edu

=cut

#'

package DAS::GUS;

use strict;
use DAS::GUS::Segment;
use DAS::Util::SqlParser;
use Bio::Root::Root;
use Bio::DasI;
use Bio::PrimarySeq;
use DBI;
use Carp qw(longmess);
use vars qw($VERSION @ISA);

use constant DEBUG => 0;

$VERSION = 0.11;
@ISA = qw(Bio::Root::Root Bio::DasI);

=head2 new

  Title		: new
  Usage		: $db = DAS::GUS->new (
						-dsn  => 'dbi:Oracle:sid=GUSDEV;host=localhost;port=1521',
						-user => 'user',
						-pass => 'pass', );
  Function	: Open up a Bio::DB::DasI interface to a GUS database
  Returns	: a new DAS::GUS object

=cut

sub new {
	my $proto = shift;
	my $self = bless {}, ref($proto) || $proto;
	my %arg = @_;

	my $dsn      = $arg{-dsn};
	my $username = $arg{-user};
	my $password = $arg{-pass};
	my $sqlfile  = $arg{-sqlfile};
	my $dbh = DBI->connect( $dsn, $username, $password )
			or $self->throw("unable to open db handle");
	
	# solve oracle clob problem
	$dbh->{LongTruncOk} = 0;
	$dbh->{LongReadLen} = 10000000;

	$self->dbh($dbh);
	$self->parser(DAS::Util::SqlParser->new($sqlfile));
	return $self;
}

=head2 dbh

  Title		: dbh
  Usage		: $obj->dbh($newval)
  Function  : get a database handle
  Returns	: value of dbh (a scalar)
  Args		: on set, new value (a scalar or undef, optional)

=cut

sub dbh {
	my $self = shift;

	return $self->{'dbh'} = shift if @_;
	return $self->{'dbh'};
}

=head2 parser

  Title		: parser
  Usage		: $obj->parser($parserObj)
  Function  : get a sql parser object
  Returns	: a sql parser object
  Args		: 

=cut

sub parser { 
	my $self = shift; 

	return $self->{'parser'} = shift if @_; 
	return $self->{'parser'}; 
}

=head2 segment

  Title		: segment
  Usage		: $db->segment(@args)
  Function	: create a segment object
  Returns	: segment object(s)
  Args		: see below

This method generates a Bio::Das::SegmentI object 
(see L<Bio::Das::SegmentII>).  The segment can be used to find 
overlapping features and the raw sequence.  

When making the segment() call, you specify the ID of a sequence 
landmark (e.g. an accession number, a clone or contig), and a 
positional range relative to the landmark.  If no range is specified, 
then the entire region spanned by the landmark is used to generate 
the segment.

Arguments are -option=E<gt>value pairs as follows:

  -name		ID of the landmark sequence.

  -class	A namespace qualifier. It is not necessary for the 
  		    database to honor namespace qualifiers, but if it does, 
			this is where the qualifier is indicated.

  -version	Version number of the landmark. It is not necessary 
  			for the database to honor version, but if it does, 
			this is where the version is indicated.

  -start	Start of the segment relative to landmark. Positions 
  			follow standard 1-based sequence rules.  If not 
			specified, defaults to the beginning of the landmark.
	
  -end 		End of the segment relative to the landmark. If not 
  		    specified, defaults to the end of the landmark.

  -atts     Attribute of reference sequence

The return value is a list of Bio::Das::SegmentI objects. If the 
method is called in a scalar context and there are no more than 
one segments that satisfy the request, then it is allowed to return 
the segment. Otherwise, the method must throw a "multiple segment 
exception".

=cut

sub segment {
  my $self = shift;
  my ( $name,$base_start,$stop,$end,$class,$version,$atts ) =
    $self->_rearrange([qw(NAME
			  START
			  STOP 
			  END 
			  CLASS 
			  VERSION 
			  ATTS
			 )], @_);

  $end ||= $stop;

  return DAS::GUS::Segment->new($name,
					     $self,
					     $base_start,
					     $end,
					     $atts);
}

=head2 features

  Title		: fetures
  Usage		: $db->features(@args)
  Function	: get all features, possibly filtered by type
  Returns	: a list of DAS::GUS::Segment::Feature objects
  Args		: see below
  Status	: public

This routine will retrieve features in the database regardless of 
position. It can be used to return all features, or a subset based 
on their method and source.

Arguments are -option=E<gt>value pairs as follows:

  -type		List of feature types to return. Argument is an array of i
  			reference containing strings of the format "method:source"

  -callback	A callback to invoke on each feature. The subroutine will
  			be passed each Bio::SeqFeatureI object in turn.

  -attributes	A has reference containing attributes to match.

  -iterator Whether to return an iterator across the features

Types are indicated using the nomenclature "method:source". Either of 
these fields can be omitted, in which case a wildcard is used for the
missing field. Type names without the colon (e.g. "exon") are 
interpreted as the method name and a source wild card. Regular 
expression are allowed in either field, as in: "similarity:BLAST.*".

The -attributes argument is a hashref containing one or more 
attributes to match against:

  -attributes => { Gene => 'abc-1',
  				   Note => 'confirmed' }

Attribute matching is simple exact string match, and multiple 
attributes are ANDed together.

If one provides a callback, it will be invoked on each feature in 
turn. If the callback returns a false value, iteration will be 
interrupted. When a callback is provided, the method returns undef.

=cut

sub features {
	my $self = shift;
	my ( $type, $types, $callback, $attributes, $iterator ) = 
								$self->_rearrange([qw(TYPE
													  TYPES
													  CALLBACK
													  ATTRIBUTES
													  ITERATOR)], @_ );
				
	$type ||= $types;

	my @features = DAS::GUS::Segment->features(
											-type	   => $type, 
											-attributes => $attributes,
											-callback   => $callback, 
											-iterator   => $iterator, 
											-factory    => $self, );
	return @features;
}

=head2 get_feature_by_name

	Title	: get_feature_by_name
	Usage	: $db->get_feature_by_name($class => $name)
	Function: fetch features by their name
	Returns : a list of DAS::GUS::Segment::Feature objects
	Args	: the class and the name of the desired feature
	Status  : public

        Note    : You need to modify _feature_get() in Browser.pm currently. 
	  Find "return unless @segments;" and change it to "return @segments;"	  
	  Debug it later. Also see multiple_choice() in gbrowse cgi script for
	  getting features\'s attributes

=cut

sub get_feature_by_name {

	my $self = shift;
	my ( $name, $class, $ref, $base_start, $stop ) 
			= $self->_rearrange([qw(NAME CLASS REF START END)], @_);

	my ( @segs, @features, $sth, $segment, $seg_name );

	if( $name ) {  
		$name =~ s/[?*]\s*$/%/; # replace * with % in the sql

		# get features by locus name, genbank accession number or 
		# protein function keywords

		my $query = $self->parser->getSQL("GUS.pm", "get_feature_by_name");
		$query =~ s/(\$\w+)/eval $1/eg;
		$query =~ s/\*/\%/g;
		my $un = uc($name);
		$query =~ s/\?/\'\%$un\%\'/g;
		$sth = $self->dbh->prepare($query);
		$sth->execute();

		while(my $hashref = $sth->fetchrow_hashref) {
			$seg_name = $$hashref{'CTG_NAME'};

			# if this is a segment, return a segment object
			return $self->segment($seg_name) if($seg_name =~ /$name/i);
			$segment = $self->segment($seg_name);

			my $feat = DAS::GUS::Segment::Feature->new(
							$self,
							$segment,					# parent
							$seg_name, 		            # the source sequence
							$$hashref{'STARTM'},  		# start
							$$hashref{'END'}, 		    # end
							$$hashref{'TYPE'}.':'.$$hashref{'SOURCE'},
							$$hashref{'SCORE'}, 		# score
							$$hashref{'STRAND'},  		# strand
							$$hashref{'PHASE'},			# phase 
							$$hashref{'NAME'}, 			# group
							$$hashref{'ATTS'}, 			# attributes
							$$hashref{'NAME'}, 			# unique_name 
							$$hashref{'FEATURE_ID'} 	# feature_id
						);

			push @features, $feat;
		}
	}
	if (@features) { @features; } else { (); }
}

sub default_class { return 'Sequence' }

# Compatible with other gbrowse related scripts - e.g. das
sub aggregators { return }

sub absolute { return }

package DAS::GUSIterator;

sub new {

	my $package = shift;
	my $features = shift;
	return bless $features, $package;
}

sub next_seq {

	my $self = shift;
	return unless @$self;

	my $next_feature = shift @$self;
	return $next_feature;
}

1;