The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl


=head1 NAME

bp_bioflat_index.pl - index sequence files using Bio::DB::Flat

=head1 DESCRIPTION

 Create or update a biological sequence database indexed with the
 Bio::DB::Flat indexing scheme.  The arguments are a list of flat files
 containing the sequence information to be indexed.

=head1 USAGE

 bp_bioflat_index.pl <options> file1 file2 file3...

 Options:

     --create              Create or reinitialize the index.  If not specified,
                           the index must already exist.

     --format   <format>   The format of the sequence files.  Must be one
                           of "genbank", "swissprot", "embl" or "fasta".

     --location <path>     Path to the directory in which the index files
                           are stored.

     --dbname <name>       The symbolic name of the database to be created.

     --indextype <type>    Type of index to create.  Either "bdb" or "flat".
                           "binarysearch" is the same as "flat".

Options can be abbreviated.  For example, use -i for --indextype.

The following environment variables will be used as defaults if the 
corresponding options are not provided:

     OBDA_FORMAT      format of sequence file
     OBDA_LOCATION    path to directory in which index files are stored
     OBDA_DBNAME      name of database
     OBDA_INDEX       type of index to create

=cut

use strict;
use warnings;
use Bio::Root::Root;
use Bio::Root::IO;
use Bio::DB::Flat;
use Getopt::Long;
use File::Path qw(mkpath rmtree);

my ($CREATE,$FORMAT,$LOCATION,$DBNAME,$INDEXTYPE);

GetOptions( 'create'      => \$CREATE,
				'format:s'    => \$FORMAT,
				'location:s'  => \$LOCATION,
				'dbname:s'    => \$DBNAME,
				'indextype:s' => \$INDEXTYPE );

$FORMAT    = $ENV{OBDA_FORMAT}    unless defined $FORMAT;
$LOCATION  = $ENV{OBDA_LOCATION}  unless defined $LOCATION;
$DBNAME    = $ENV{OBDA_DBNAME}    unless defined $DBNAME;
$INDEXTYPE = $ENV{OBDA_INDEXTYPE} unless defined $INDEXTYPE;

my $root = 'Bio::Root::Root';
my $io   = 'Bio::Root::IO';

# confirm that database directory is there
defined $LOCATION or 
  $root->throw("please provide a base directory with the --location option");

-d $LOCATION or 
  $root->throw("$LOCATION is not a valid directory; use --create to create a new index");

defined $DBNAME or 
  $root->throw("please provide a database name with the --dbname option");

defined $FORMAT or 
  $root->throw("please specify the format for the input files with the --format option");

unless (defined $INDEXTYPE) {
	$INDEXTYPE = 'flat';
	$root->warn('setting index type to "flat", use the --indextype option to override');
}

# Confirm that database is there and that --create flag is sensible.
my $path = $io->catfile($LOCATION,$DBNAME,'config.dat');
if (-e $path) {
  if ($CREATE) {
    $root->warn("existing index detected; deleting.");
    rmtree($io->catfile($LOCATION,$DBNAME),1,1);
  } else {
    $root->warn("existing index detected; ignoring --indextype and --format options.");
    undef $INDEXTYPE;
  }
}
elsif (!$CREATE) {
  $root->throw("Cannot find database config file at location $path; use --create to create a new index");
}

# open for writing/updating
my $db = Bio::DB::Flat->new(-directory  => $LOCATION,
									 -dbname     => $DBNAME,
									 $INDEXTYPE ? (
														-index      => $INDEXTYPE
													  )
									 : (),
									 -write_flag => 1,
									 -format     => $FORMAT) or 
  $root->throw("can't create Bio::DB::Flat object");

my $entries = $db->build_index(@ARGV);

print STDERR "(Re)indexed $entries entries.\n ";

__END__