Changes 015
META.json 0273
META.yml 0194
Makefile.PL 772
README.1st 50 041
dist.ini 08
examples/ 12
lib/Bio/ASN1/EntrezGene/ 116173
lib/Bio/ASN1/ 236280
lib/Bio/ASN1/Sequence/ 107164
lib/Bio/ASN1/ 223267
t/00-compile.t 047
t/input.asn 00
t/input1.asn 00
t/release-eol.t 016
t/release-mojibake.t 020
t/release-no-tabs.t 016
t/release-pod-coverage.t 021
t/release-pod-syntax.t 015
t/seq.asn 00
t/testindexer.t 6283
t/testparser.t 24
26 files changed (This is a version diff) 8012108
@@ -1,3 +1,18 @@
+Revision history for Bio-ASN1-Entrezgene
+1.70      2013-09-14 14:39:54 America/Chicago
+  * Bio::ASN1::EntrezGene is now able to parse EntrezGene-set in which case
+    next_seq() will return the next set of sequences with each sequence as
+    an element in the array ref instead of an array ref with a single element.
+version 1.10: Important update if you see segmentation fault when running the
+              parser - so far I only saw it happen on Perl 5.8 (Perl 5.10 is 
+              fine) due to an exceedingly long (and invalid) URL in one 
+              Arabidopsis entry. It's due to Perl regex engine core dumps when
+              matching the long string exhausted the stack. I changed the 
+              particular regex in and to solve the 
+              issue. The overall parsing runs 2-3% faster after the change.
 version 1.09: Added parser/indexer for NCBI's ASN.1-formatted
                 sequence files (like Genbank records).
               Updated test, example scripts and documentation
@@ -1,10 +1,75 @@
-require 5.004;
 use strict;
-use ExtUtils::MakeMaker;
+use warnings;
+use ExtUtils::MakeMaker 6.30;
-  NAME         => 'Bio::ASN1::EntrezGene',
-  VERSION_FROM => 'lib/Bio/ASN1/',
-  AUTHOR       => 'Mingyi Liu (mingyi.liu[at]',
-  PREREQ_PM    => {'Bio::Index::AbstractSeq' => '0'}
+my %WriteMakefileArgs = (
+  "ABSTRACT" => "Regular expression-based Perl Parser for NCBI Entrez Gene.",
+  "AUTHOR" => "Mingyi Liu <mingyiliu\>",
+  "BUILD_REQUIRES" => {},
+    "ExtUtils::MakeMaker" => "6.30"
+  },
+  "DISTNAME" => "Bio-ASN1-EntrezGene",
+  "EXE_FILES" => [],
+  "LICENSE" => "perl",
+  "NAME" => "Bio::ASN1::EntrezGene",
+  "PREREQ_PM" => {
+    "Bio::Index::AbstractSeq" => 0,
+    "Carp" => 0,
+    "parent" => 0,
+    "strict" => 0,
+    "utf8" => 0,
+    "warnings" => 0
+  },
+    "File::Spec" => 0,
+    "IO::Handle" => 0,
+    "IPC::Open3" => 0,
+    "Test::More" => 0
+  },
+  "VERSION" => "1.70",
+  "test" => {
+    "TESTS" => "t/*.t"
+  }
+unless ( eval { ExtUtils::MakeMaker->VERSION(6.63_03) } ) {
+  my $tr = delete $WriteMakefileArgs{TEST_REQUIRES};
+  my $br = $WriteMakefileArgs{BUILD_REQUIRES};
+  for my $mod ( keys %$tr ) {
+    if ( exists $br->{$mod} ) {
+      $br->{$mod} = $tr->{$mod} if $tr->{$mod} > $br->{$mod};
+    }
+    else {
+      $br->{$mod} = $tr->{$mod};
+    }
+  }
+unless ( eval { ExtUtils::MakeMaker->VERSION(6.56) } ) {
+  my $br = delete $WriteMakefileArgs{BUILD_REQUIRES};
+  my $pp = $WriteMakefileArgs{PREREQ_PM};
+  for my $mod ( keys %$br ) {
+    if ( exists $pp->{$mod} ) {
+      $pp->{$mod} = $br->{$mod} if $br->{$mod} > $pp->{$mod};
+    }
+    else {
+      $pp->{$mod} = $br->{$mod};
+    }
+  }
+delete $WriteMakefileArgs{CONFIGURE_REQUIRES}
+  unless eval { ExtUtils::MakeMaker->VERSION(6.52) };
@@ -1,34 +0,0 @@
-This is the README file for Bio::ASN1::EntrezGene package, which
-1. XML parser-like parser for the ASN.1-formatted NCBI Entrez Gene files.
-2. Indexer for Entrez Gene files.
-3. XML parser-like parser for the ASN.1-formatted NCBI Sequence files.
-4. Indexer for Sequence files.
-These modules have quite high performance and error reporting capabilities.
-Additionally, one could dump the data structure generated from extracted
-NCBI object records into XML extremely easily using XML::Simple's XMLout().
-Written and maintained by Dr. Mingyi Liu <>.
-Copyright (c) 2005 Mingyi Liu, GPC Biotech, Altana Research Institute.
-All rights reserved.
-This program is free software - you can redistribute it and/or modify
-it under the same terms as Perl itself.
-Bio::ASN1::EntrezGene package can be installed & tested as follows:
-perl Makefile.PL
-make test
-make install
-For documentation, among many other things, please refer to the POD (
-plain old documentation) inside the module.
-It is highly recommended that you check the example scripts out (under
-the examples directory)!
@@ -1,5 +0,0 @@
-Please note that Bio::ASN1::EntrezGene package version 1.091 is exactly the
-same as version 1.09 (the only difference being that this file only exists
-in version 1.091).  The reason for releasing a version 1.091 is that CPAN
-indexing of Bio::ASN1::EntrezGene version 1.09 had some problem that is not
-yet fixed & I'd have to upload a package with different file name.
@@ -0,0 +1,41 @@
+This distribution includes:
+1. XML parser-like parser for the ASN.1-formatted NCBI Entrez Gene files.
+2. Indexer for Entrez Gene files.
+3. XML parser-like parser for the ASN.1-formatted NCBI Sequence files.
+4. Indexer for Sequence files.
+These modules have quite high performance and error reporting capabilities.
+Additionally, one could dump the data structure generated from extracted
+NCBI object records into XML extremely easily using XML::Simple's XMLout().
+Written by Dr. Mingyi Liu <>.
+Copyright (c) 2005 Mingyi Liu, GPC Biotech, Altana Research Institute.
+This program is free software - you can redistribute it and/or modify
+it under the same terms as Perl itself.
+Bio::ASN1::EntrezGene package can be installed & tested as follows:
+    perl Makefile.PL
+    make
+    make test
+    make install
+For documentation, among many other things, please refer to the POD (
+plain old documentation) inside the module.
+It is highly recommended that you check the example scripts out (under
+the examples directory)!
+- - -
+This distribution is part of the [BioPerl]( project.
@@ -0,0 +1,8 @@
+name        = Bio-ASN1-EntrezGene
+version     = 1.70
+author      = Mingyi Liu <>
+license     = Perl_5
+copyright_holder = Mingyi Liu, GPC Biotech AG and Altana Research Institute
+copyright_year   = 2013
@@ -582,8 +582,9 @@ sub safeassign
 # safely extracts a value, another choice is to simply use
 # eval in-line, if it fails, it fails.  Probably faster, but can't
 # give feedback in-line (always has to add a couple lines dealing with
-# $@ for error reporting), might still be worth it though because 
+# $@ for error reporting), might still be worth it though because
 # of the speed.  User can make his/her own choice here.
 sub safeval
   my ($ds, $str) = @_; # data structure and string (we need $ds passed in because we use strict)
@@ -1,105 +1,42 @@
-=head1 NAME
-Bio::ASN1::EntrezGene::Indexer - Indexes NCBI Entrez Gene files.
-=head1 SYNOPSIS
-  use Bio::ASN1::EntrezGene::Indexer;
-  # creating & using the index is just a few lines
-  my $inx = Bio::ASN1::EntrezGene::Indexer->new(
-    -filename => 'entrezgene.idx',
-    -write_flag => 'WRITE'); # needed for make_index call, but if opening 
-                             # existing index file, don't set write flag!
-  $inx->make_index('Homo_sapiens', 'Mus_musculus', 'Rattus_norvegicus');
-  my $seq = $inx->fetch(10); # Bio::Seq obj for Entrez Gene #10
-  # alternatively, if one prefers just a data structure instead of objects
-  $seq = $inx->fetch_hash(10); # a hash produced by Bio::ASN1::EntrezGene
-                            # that contains all data in the Entrez Gene record
-  # note that in case you wonder, you can get the files 'Homo_sapiens'
-  # from NCBI Entrez Gene ftp download, DATA/ASN/Mammalia directory
-Bio::ASN1::EntrezGene, Bioperl version that contains Stefan Kirov's and all dependencies therein.
-Same as Bio::ASN1::EntrezGene
-Bio::ASN1::EntrezGene::Indexer is a Perl Indexer for NCBI Entrez Gene genome
-databases. It processes an ASN.1-formatted Entrez Gene record and stores the
-file position for each record in a way compliant with Bioperl standard (in
-fact its a subclass of Bioperl's index objects).
-Note that this module does not parse record, because it needs to run fast and
-grab only the gene ids.  For parsing record, use Bio::ASN1::EntrezGene, or
-better yet, use Bio::SeqIO, format 'entrezgene'.
-It takes this module (version 1.07) 21 seconds to index the human genome
-Entrez Gene file (Apr. 5/2005 download) on one 2.4 GHz Intel Xeon processor.
-=head1 SEE ALSO
-For details on various parsers I generated for Entrez Gene, example scripts that
-uses/benchmarks the modules, please see L<>.
-Those other parsers etc. are included in V1.05 download.
-=head1 AUTHOR
-Dr. Mingyi Liu <>
-The Bio::ASN1::EntrezGene module and its related modules and scripts
-are copyright (c) 2005 Mingyi Liu, GPC Biotech AG and Altana Research
-Institute. All rights reserved. I created these modules when working
-on a collaboration project between these two companies. Therefore a
-special thanks for the two companies to allow the release of the code
-into public domain.
-You may use and distribute them under the terms of the Perl itself or
-GPL (L<>).
-=head1 CITATION
-Liu, M and Grigoriev, A (2005) "Fast Parsers for Entrez Gene" 
-Bioinformatics. In press
-Any OS that Perl & Bioperl run on.
-=head1 METHODS
 package Bio::ASN1::EntrezGene::Indexer;
+  $Bio::ASN1::EntrezGene::Indexer::AUTHORITY = 'cpan:BIOPERLML';
+  $Bio::ASN1::EntrezGene::Indexer::VERSION = '1.70';
+use utf8;
 use strict;
+use warnings;
 use Carp qw(carp croak);
-use vars qw ($VERSION @ISA);
 use Bio::ASN1::EntrezGene;
 use Bio::Index::AbstractSeq;
+use parent qw(Bio::Index::AbstractSeq);
+# ABSTRACT: Indexes NCBI Sequence files.
+# AUTHOR:   Dr. Mingyi Liu <>
+# OWNER:    2005 Mingyi Liu
+# OWNER:    2005 GPC Biotech AG
+# OWNER:    2005 Altana Research Institute
+# LICENSE:  Perl_5
-@ISA = qw(Bio::Index::AbstractSeq);
-$VERSION = '1.09';
+# TODO: Should this be deprecated?
 sub _version
-  return $VERSION;
+    return $Bio::Index::AbstractSeq::VERSION;
 sub _type_stamp
   return '__EntrezGene_ASN1__';
-sub _index_file 
+sub _index_file
   my($self, $file, $idx) = @_;
   my $position;
@@ -115,22 +52,100 @@ sub _index_file
   return 1;
 sub _file_format
   return 'entrezgene';
+sub fetch_hash
+  my ($self, $geneid) = @_;
+  if (my $gene = $self->db->{$geneid})
+  {
+    my ($fileno, $position) = $self->unpack_record($gene);
+    my $parser = Bio::ASN1::EntrezGene->new('fh' => $self->_file_handle($fileno));
+    seek($parser->fh, $position, 0);
+    return $parser->next_seq;
+  }
+sub _file_handle {
+  my( $self, $i ) = @_;
+  unless ($self->{'_filehandle'}[$i]) {
+    my @rec = $self->unpack_record($self->db->{"__FILE_$i"})
+      or $self->throw("Can't get filename for index : $i");
+    my $file = $rec[0];
+    local *FH;
+    open *FH, $file or $self->throw("Can't read file '$file' : $!");
+    $self->{'_filehandle'}[$i] = *FH; # Cache filehandle
+  }
+  return $self->{'_filehandle'}[$i];
+=encoding utf-8
+=head1 NAME
+Bio::ASN1::EntrezGene::Indexer - Indexes NCBI Sequence files.
+=head1 VERSION
+version 1.70
+=head1 SYNOPSIS
+  use Bio::ASN1::EntrezGene::Indexer;
+  # creating & using the index is just a few lines
+  my $inx = Bio::ASN1::EntrezGene::Indexer->new(
+    -filename => 'entrezgene.idx',
+    -write_flag => 'WRITE'); # needed for make_index call, but if opening
+                             # existing index file, don't set write flag!
+  $inx->make_index('Homo_sapiens', 'Mus_musculus', 'Rattus_norvegicus');
+  my $seq = $inx->fetch(10); # Bio::Seq obj for Entrez Gene #10
+  # alternatively, if one prefers just a data structure instead of objects
+  $seq = $inx->fetch_hash(10); # a hash produced by Bio::ASN1::EntrezGene
+                            # that contains all data in the Entrez Gene record
+  # note that in case you wonder, you can get the files 'Homo_sapiens'
+  # from NCBI Entrez Gene ftp download, DATA/ASN/Mammalia directory
+Bio::ASN1::EntrezGene::Indexer is a Perl Indexer for NCBI Entrez Gene genome
+databases. It processes an ASN.1-formatted Entrez Gene record and stores the
+file position for each record in a way compliant with Bioperl standard (in
+fact its a subclass of Bioperl's index objects).
+Note that this module does not parse record, because it needs to run fast and
+grab only the gene ids.  For parsing record, use Bio::ASN1::EntrezGene, or
+better yet, use Bio::SeqIO, format 'entrezgene'.
+It takes this module (version 1.07) 21 seconds to index the human genome
+Entrez Gene file (Apr. 5/2005 download) on one 2.4 GHz Intel Xeon processor.
+=head1 METHODS
 =head2 fetch
   Parameters: $geneid - id for the Entrez Gene record to be retrieved
   Example:    my $hash = $indexer->fetch(10); # get Entrez Gene #10
   Function:   fetch the data for the given Entrez Gene id.
   Returns:    A Bio::Seq object produced by Bio::SeqIO::entrezgene
-  Notes:      One needs to have Bio::SeqIO::entrezgene installed before 
+  Notes:      One needs to have Bio::SeqIO::entrezgene installed before
                 calling this function!
 =head2 fetch_hash
   Parameters: $geneid - id for the Entrez Gene record to be retrieved
@@ -141,19 +156,15 @@ sub _file_format
                 Gene record.
   Notes:      Alternative to fetch()
-sub fetch_hash
-  my ($self, $geneid) = @_;
-  if (my $gene = $self->db->{$geneid})
-  {
-    my ($fileno, $position) = $self->unpack_record($gene);
-    my $parser = Bio::ASN1::EntrezGene->new('fh' => $self->_file_handle($fileno));
-    seek($parser->fh, $position, 0);
-    return $parser->next_seq;
-  }
+=head2 _version
+=head2 _type_stamp
+=head2 _index_file
+=head2 _file_format
 =head2 _file_handle
@@ -171,21 +182,67 @@ sub fetch_hash
               changes file handle code like I do below to fit perl 5.005_03, this
               sub would be removed from this module
-sub _file_handle {
-	my( $self, $i ) = @_;
-	unless ($self->{'_filehandle'}[$i]) {
-		my @rec = $self->unpack_record($self->db->{"__FILE_$i"})
-		  or $self->throw("Can't get filename for index : $i");
-		my $file = $rec[0];
-		local *FH;
-		open *FH, $file or $self->throw("Can't read file '$file' : $!");
-		$self->{'_filehandle'}[$i] = *FH; # Cache filehandle
-	}
-	return $self->{'_filehandle'}[$i];
+Bio::ASN1::EntrezGene, Bioperl version that contains Stefan Kirov's and all dependencies therein.
+Same as Bio::ASN1::EntrezGene
+=head1 SEE ALSO
+For details on various parsers I generated for Entrez Gene, example scripts that
+uses/benchmarks the modules, please see L<>.
+Those other parsers etc. are included in V1.05 download.
+=head1 CITATION
+Liu, Mingyi, and Andrei Grigoriev. "Fast parsers for Entrez Gene."
+Bioinformatics 21, no. 14 (2005): 3189-3190.
+Any OS that Perl & Bioperl run on.
+=head1 FEEDBACK
+=head2 Mailing lists
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+                  - General discussion
+  - About the mailing lists
+=head2 Support
+Please direct usage questions or support issues to the mailing list:
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+=head2 Reporting bugs
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+=head1 AUTHOR
+Dr. Mingyi Liu <>
+This software is copyright (c) 2005 by Mingyi Liu, 2005 by GPC Biotech AG, and 2005 by Altana Research Institute.
+This software is available under the same terms as the perl 5 programming language system itself.
@@ -1,135 +1,23 @@
-=head1 NAME
-Bio::ASN1::EntrezGene - Regular expression-based Perl Parser for NCBI Entrez Gene.
-=head1 SYNOPSIS
-  use Bio::ASN1::EntrezGene;
-  my $parser = Bio::ASN1::EntrezGene->new('file' => "Homo_sapiens");
-  while(my $result = $parser->next_seq)
-  {
-    # extract data from $result, or Dumpvalue->new->dumpValue($result);
-  }
-  # a new way to get the $result data hash for a particular gene id:
-  use Bio::ASN1::EntrezGene::Indexer;
-  my $inx = Bio::ASN1::EntrezGene::Indexer->new(-filename => 'entrezgene.idx');
-  my $seq = $inx->fetch_hash(10); # returns $result for Entrez Gene record
-                                  # with geneid 10
-  # note that the index file 'entrezgene.idx' can be created as follows
-  my $inx = Bio::ASN1::EntrezGene::Indexer->new(
-    -filename => 'entrezgene.idx',
-    -write_flag => 'WRITE');
-  $inx->make_index('Homo_sapiens', 'Mus_musculus'); # files come from NCBI download
-  # for more detail please refer to Bio::ASN1::EntrezGene::Indexer perldoc
-Bio::ASN1::EntrezGene package can be installed & tested as follows:
-  perl Makefile.PL
-  make
-  make test
-  make install
-Bio::ASN1::EntrezGene is a regular expression-based Perl Parser for NCBI Entrez
-Gene genome databases (L<>).  It
-parses an ASN.1-formatted Entrez Gene record and returns a data structure that
-contains all data items from the gene record.
-The parser will report error & line number if input data does not conform to the
-NCBI Entrez Gene genome annotation file format.
-Note that it is possible to provide reading of all NCBI's ASN.1-formatted
-files through simple variations of the Entrez Gene parser (I need more
-investigation to be sure, but at least the sequence parser is a very simple
-variation on Entrez Gene parser and works well).
-It took the parser version 1.0 11 minutes to parse the human genome Entrez Gene
-file on one 2.4 GHz Intel Xeon processor.  The addition of validation and error
-reporting in 1.03 and handling of new Entrez Gene format slowed the parser down
-about 40%.
-Since V1.07, this package also included an indexer that runs pretty fast (it 
-takes 21 seconds for the indexer to index the human genome on the same 
-processor).  Therefore the combination of the modules would allow user to 
-retrieve and parse arbitrary records.
-=head1 SEE ALSO
-The script included in this package (please 
-see the Bio-ASN1-EntrezGene-x.xx/examples directory) is a very
-important and near-complete demo on using this module to extract all data
-items from Entrez Gene records.  Do check it out because in fact, this 
-script took me about 3-4 times more time to make for my project than the 
-parser V1.0 itself. Note that the example script was edited to leave
-out stuff specific to my internal project.
-For details on various parsers I generated for Entrez Gene, example scripts that 
-uses/benchmarks the modules, please see L<>.
-Those other parsers etc. are included in V1.05 download.
-=head1 AUTHOR
-Dr. Mingyi Liu <>
-The Bio::ASN1::EntrezGene module and its related modules and scripts 
-are copyright (c) 2005 Mingyi Liu, GPC Biotech AG and Altana Research 
-Institute. All rights reserved. I created these modules when working 
-on a collaboration project between these two companies. Therefore a 
-special thanks for the two companies to allow the release of the code 
-into public domain.
-You may use and distribute them under the terms of the Perl itself or
-GPL (L<>).
-=head1 CITATION
-Liu, M and Grigoriev, A (2005) "Fast Parsers for Entrez Gene" 
-Bioinformatics. In press
-Any OS that Perl runs on.
-=head1 METHODS
 package Bio::ASN1::EntrezGene;
+  $Bio::ASN1::EntrezGene::AUTHORITY = 'cpan:BIOPERLML';
+  $Bio::ASN1::EntrezGene::VERSION = '1.70';
+use utf8;
 use strict;
+use warnings;
 use Carp qw(carp croak);
-use vars qw ($VERSION);
-$VERSION = '1.09';
+# ABSTRACT: Regular expression-based Perl Parser for NCBI Entrez Gene.
+# AUTHOR:   Dr. Mingyi Liu <>
+# OWNER:    2005 Mingyi Liu
+# OWNER:    2005 GPC Biotech AG
+# OWNER:    2005 Altana Research Institute
+# LICENSE:  Perl_5
-=head2 new
-  Parameters: maxerrstr => 20 (optional) - maximum number of characters after
-                offending element, used by error reporting, default is 20
-              file or -file => $filename (optional) - name of the file to be 
-                parsed. call next_seq to parse!
-              fh or -fh => $filehandle (optional) - handle of the file to be 
-                parsed. 
-  Example:    my $parser = Bio::ASN1::EntrezGene->new();
-  Function:   Instantiate a parser object
-  Returns:    Object reference
-  Notes:      Setting file or fh will reset line numbers etc. that are used
-                for error reporting purposes, and seeking on file handle would 
-                mess up linenumbers!
 sub new
@@ -142,16 +30,6 @@ sub new
   return $self;
-=head2 maxerrstr
-  Parameters: $maxerrstr (optional) - maximum number of characters after
-                offending element, used by error reporting, default is 20
-  Example:    $parser->maxerrstr(20);
-  Function:   get/set maxerrstr.
-  Returns:    maxerrstr.
-  Notes:
 sub maxerrstr
@@ -161,28 +39,6 @@ sub maxerrstr
-=head2 parse
-  Parameters: $string that contains Entrez Gene record,
-              $trimopt (optional) that specifies how the data structure
-                returned should be trimmed. 2 is recommended and 
-                default
-              $noreset (optional) that species that line number should not
-                be reset
-              DEPRECATED as external function!!! Do not call this function
-                directly!  Call next_seq() instead
-  Example:    my $value = $parser->parse($text); # DEPRECATED as
-                # external function!!! Do not call this function
-                # directly!  Call next_seq() instead
-  Function:   Takes in a string representing Entrez Gene record, parses
-                the record and returns a data structure.
-  Returns:    A data structure containing all data items from the Entrez
-                Gene record.
-  Notes:      DEPRECATED as external function!!! Do not call this function
-                directly!  Call next_seq() instead
-              $string should not contain 'EntrezGene ::=' at beginning!
 sub parse
@@ -212,19 +68,6 @@ sub parse
   return $result;
-=head2 input_file
-  Parameters: $filename for file that contains Entrez Gene record(s)
-  Example:    $parser->input_file($filename);
-  Function:   Takes in name of a file containing Entrez Gene records.
-              opens the file and stores file handle
-  Returns:    none.
-  Notes:      Attemps to open file larger than 2 GB even on Perl that
-                does not support 2 GB file (accomplished by calling
-                "cat" and piping output. On OS that does not have "cat"
-                error message will be displayed)
 sub input_file
@@ -239,27 +82,6 @@ sub input_file
   $self->{linenumber} = 0; # reset line number
-=head2 next_seq
-  Parameters: $trimopt (optional) that specifies how the data structure
-                returned should be trimmed. option 2 is recommended and
-                default
-  Example:    my $value = $parser->next_seq();
-  Function:   Use the file handle generated by input_file, parses the next
-                the record and returns a data structure.
-  Returns:    A data structure containing all data items from the Entrez
-                Gene record.
-  Notes:      Must pass in a filename through new() or input_file() first!
-              For details on how to use the $trimopt data trimming option
-                please see comment for the trimdata method. An option
-                of 2 is recommended and default
-              The acceptable values for $trimopt include:
-                1 - trim as much as possibile
-                2 (or 0, undef) - trim to an easy-to-use structure
-                3 - no trimming (in version 1.06, prior to version
-                    1.06, 0 or undef means no trimming)
 sub next_seq
@@ -270,13 +92,12 @@ sub next_seq
     next unless /\S/;
-    my $tmp = (/^\s*Entrezgene ::= ({.*)/si)? $1 : "{" . $_; # get rid of the 'Entrezgene ::= ' at the beginning of Entrez Gene record
+    my $tmp = (/^\s*Entrezgene(-Set)? ::= ({.*)/si)? $2 : "{" . $_; # get rid of the 'Entrezgene ::= ' at the beginning of Entrez Gene record
     return $self->parse($tmp, $compact, 1); # 1 species no resetting line number
-# NCBI's Apr 05, 2005 format change forced much usage of lookahead, which would for
-# sure slows parser down.  But can't code efficiently without it.
 sub _parse
   my ($self, $flag) = @_;
@@ -310,7 +131,7 @@ sub _parse
       $self->{linenumber} += $lines =~ s/\n//g || $lines =~ s/\r//g; # count by *NIX/Win or Mac
       my $tmp;
       # we put \s* in lookahead for linenumber counting purpose (which slows things down)
-      if(($self->{input} =~ /\G"((?:[^"]|"")*)"(?=\s*[,}])/cg && ++$tmp) ||
+      if(($self->{input} =~ /\G"((?:[^"]+|"")*)"(?=\s*[,}])/cg && ++$tmp) ||
          $self->{input} =~ /\G([\w-]+)(?=\s*[,}])/cg)
         my $value = $1;
@@ -344,7 +165,7 @@ sub _parse
         return $data;
-    elsif($self->{input} =~ /\G[ \t]*"((?:[^"]|"")*)"(?=\s*[,}])/cg)
+    elsif($self->{input} =~ /\G[ \t]*"((?:[^"]+|"")*)"(?=\s*[,}])/cg)
       my $value = $1;
       $value =~ s/""/"/g;
@@ -408,7 +229,7 @@ sub _parse
 # so now  $hash->{comments}->[0]->[0]->[0]->{source}->[0]->[0]->[0]->{src}->[0]->[0]->{tag}->[0]->{id}
 # becomes $hash->{comments}->[0]->{source}->{src}->{tag}->{id}
 # this may create problem as array might suddenly change to hash depending on whether it
-# has multiple elements or not.  So set $flag to 2 or 0/undef would disallow trimming that 
+# has multiple elements or not.  So set $flag to 2 or 0/undef would disallow trimming that
 # would lead to data type change, thus resulting in data structure like:
 #    'comments' => ARRAY(0x88617e8)
 #       0  HASH(0x889d578)
@@ -423,25 +244,6 @@ sub _parse
 #                            'id' => 5
 # still not the safest, but saves some hassle writing code
-=head2 trimdata
-  Parameters: $hashref or $arrayref
-              $trimflag (optional, see Notes)
-  Example:    trimdata($datahash); # using the default flag
-  Function:   recursively process all attributes of a hash/array
-              hybrid and get rid of any arrayref that points to
-              one-element arrays (trims data structure) depending on
-              the optional flag.
-  Returns:    none - trimming happenes in-place
-  Notes:      This function is useful to compact a data structure produced by
-                Bio::ASN1::EntrezGene::parse.
-              The acceptable values for $trimopt include:
-                1 - trim as much as possibile
-                2 (or 0, undef) - trim to an easy-to-use structure
-                3 - no trimming (in version 1.06, prior to version
-                    1.06, 0 or undef means no trimming)
 sub trimdata
@@ -483,17 +285,6 @@ sub trimdata
-=head2 fh
-  Parameters: $filehandle (optional)
-  Example:    trimdata($datahash); # using the default flag
-  Function:   getter/setter for file handle
-  Returns:    file handle for current file being parsed.
-  Notes:      Use with care!
-              Line number report would not be corresponding to file's line 
-                number if seek operation is performed on the file handle!
 sub fh
@@ -506,6 +297,186 @@ sub fh
   return $self->{fh};
+sub rawdata
+  my $self = shift;
+  return "Entrezgene ::= $self->{input}";
+=encoding utf-8
+=head1 NAME
+Bio::ASN1::EntrezGene - Regular expression-based Perl Parser for NCBI Entrez Gene.
+=head1 VERSION
+version 1.70
+=head1 SYNOPSIS
+  use Bio::ASN1::EntrezGene;
+  my $parser = Bio::ASN1::EntrezGene->new('file' => "Homo_sapiens");
+  while(my $result = $parser->next_seq)
+  {
+    # extract data from $result, or Dumpvalue->new->dumpValue($result);
+  }
+  # a new way to get the $result data hash for a particular gene id:
+  use Bio::ASN1::EntrezGene::Indexer;
+  my $inx = Bio::ASN1::EntrezGene::Indexer->new(-filename => 'entrezgene.idx');
+  my $seq = $inx->fetch_hash(10); # returns $result for Entrez Gene record
+                                  # with geneid 10
+  # note that the index file 'entrezgene.idx' can be created as follows
+  my $inx = Bio::ASN1::EntrezGene::Indexer->new(
+    -filename => 'entrezgene.idx',
+    -write_flag => 'WRITE');
+  $inx->make_index('Homo_sapiens', 'Mus_musculus'); # files come from NCBI download
+  # for more detail please refer to Bio::ASN1::EntrezGene::Indexer perldoc
+Bio::ASN1::EntrezGene is a regular expression-based Perl Parser for NCBI Entrez
+Gene genome databases (L<>).  It
+parses an ASN.1-formatted Entrez Gene record and returns a data structure that
+contains all data items from the gene record.
+The parser will report error & line number if input data does not conform to the
+NCBI Entrez Gene genome annotation file format.
+Note that it is possible to provide reading of all NCBI's ASN.1-formatted
+files through simple variations of the Entrez Gene parser (I need more
+investigation to be sure, but at least the sequence parser is a very simple
+variation on Entrez Gene parser and works well).
+It took the parser version 1.0 11 minutes to parse the human genome Entrez Gene
+file on one 2.4 GHz Intel Xeon processor.  The addition of validation and error
+reporting in 1.03 and handling of new Entrez Gene format slowed the parser down
+about 40%.
+Since V1.07, this package also included an indexer that runs pretty fast (it
+takes 21 seconds for the indexer to index the human genome on the same
+processor).  Therefore the combination of the modules would allow user to
+retrieve and parse arbitrary records.
+=head2 maxerrstr
+  Parameters: $maxerrstr (optional) - maximum number of characters after
+                offending element, used by error reporting, default is 20
+  Example:    $parser->maxerrstr(20);
+  Function:   get/set maxerrstr.
+  Returns:    maxerrstr.
+  Notes:
+=head2 input_file
+  Parameters: $filename for file that contains Entrez Gene record(s)
+  Example:    $parser->input_file($filename);
+  Function:   Takes in name of a file containing Entrez Gene records.
+              opens the file and stores file handle
+  Returns:    none.
+  Notes:      Attempts to open file larger than 2 GB even on Perl that
+                does not support 2 GB file (accomplished by calling
+                "cat" and piping output. On OS that does not have "cat"
+                error message will be displayed)
+=head1 METHODS
+=head2 new
+  Parameters: maxerrstr => 20 (optional) - maximum number of characters after
+                offending element, used by error reporting, default is 20
+              file or -file => $filename (optional) - name of the file to be
+                parsed. call next_seq to parse!
+              fh or -fh => $filehandle (optional) - handle of the file to be
+                parsed.
+  Example:    my $parser = Bio::ASN1::EntrezGene->new();
+  Function:   Instantiate a parser object
+  Returns:    Object reference
+  Notes:      Setting file or fh will reset line numbers etc. that are used
+                for error reporting purposes, and seeking on file handle would
+                mess up linenumbers!
+=head2 parse
+  Parameters: $string that contains Entrez Gene record,
+              $trimopt (optional) that specifies how the data structure
+                returned should be trimmed. 2 is recommended and
+                default
+              $noreset (optional) that species that line number should not
+                be reset
+              DEPRECATED as external function!!! Do not call this function
+                directly!  Call next_seq() instead
+  Example:    my $value = $parser->parse($text); # DEPRECATED as
+                # external function!!! Do not call this function
+                # directly!  Call next_seq() instead
+  Function:   Takes in a string representing Entrez Gene record, parses
+                the record and returns a data structure.
+  Returns:    A data structure containing all data items from the Entrez
+                Gene record.
+  Notes:      DEPRECATED as external function!!! Do not call this function
+                directly!  Call next_seq() instead
+              $string should not contain 'EntrezGene ::=' at beginning!
+=head2 next_seq
+  Parameters: $trimopt (optional) that specifies how the data structure
+                returned should be trimmed. option 2 is recommended and
+                default
+  Example:    my $value = $parser->next_seq();
+  Function:   Use the file handle generated by input_file, parses the next
+                the record and returns a data structure.
+  Returns:    A data structure containing all data items from the Entrez
+                Gene record.
+  Notes:      Must pass in a filename through new() or input_file() first!
+              For details on how to use the $trimopt data trimming option
+                please see comment for the trimdata method. An option
+                of 2 is recommended and default
+              The acceptable values for $trimopt include:
+                1 - trim as much as possibile
+                2 (or 0, undef) - trim to an easy-to-use structure
+                3 - no trimming (in version 1.06, prior to version
+                    1.06, 0 or undef means no trimming)
+=head2 trimdata
+  Parameters: $hashref or $arrayref
+              $trimflag (optional, see Notes)
+  Example:    trimdata($datahash); # using the default flag
+  Function:   recursively process all attributes of a hash/array
+              hybrid and get rid of any arrayref that points to
+              one-element arrays (trims data structure) depending on
+              the optional flag.
+  Returns:    none - trimming happenes in-place
+  Notes:      This function is useful to compact a data structure produced by
+                Bio::ASN1::EntrezGene::parse.
+              The acceptable values for $trimopt include:
+                1 - trim as much as possibile
+                2 (or 0, undef) - trim to an easy-to-use structure
+                3 - no trimming (in version 1.06, prior to version
+                    1.06, 0 or undef means no trimming)
+=head2 fh
+  Parameters: $filehandle (optional)
+  Example:    trimdata($datahash); # using the default flag
+  Function:   getter/setter for file handle
+  Returns:    file handle for current file being parsed.
+  Notes:      Use with care!
+              Line number report would not be corresponding to file's line
+                number if seek operation is performed on the file handle!
 =head2 rawdata
   Parameters: none
@@ -514,16 +485,89 @@ sub fh
   Returns:    a string containing the ASN1-formatted Entrez Gene record
   Notes:      Must first parse a record then call this function!
               Could be useful in interpreting line number value in error
-                report (if user did a seek on file handle right before parsing 
+                report (if user did a seek on file handle right before parsing
-sub rawdata
-  my $self = shift;
-  return "Entrezgene ::= $self->{input}";
+=head2 _parse
+NCBI's Apr 05, 2005 format change forced much usage of lookahead, which would for
+sure slows parser down.  But can't code efficiently without it.
+Bio::ASN1::EntrezGene package can be installed & tested as follows:
+  perl Makefile.PL
+  make
+  make test
+  make install
+=head1 SEE ALSO
+The script included in this package (please
+see the Bio-ASN1-EntrezGene-x.xx/examples directory) is a very
+important and near-complete demo on using this module to extract all data
+items from Entrez Gene records.  Do check it out because in fact, this
+script took me about 3-4 times more time to make for my project than the
+parser V1.0 itself. Note that the example script was edited to leave
+out stuff specific to my internal project.
+For details on various parsers I generated for Entrez Gene, example scripts that
+uses/benchmarks the modules, please see L<>.
+Those other parsers etc. are included in V1.05 download.
+=head1 CITATION
+Liu, Mingyi, and Andrei Grigoriev. "Fast parsers for Entrez Gene."
+Bioinformatics 21, no. 14 (2005): 3189-3190.
+Any OS that Perl runs on.
+=head1 FEEDBACK
+=head2 Mailing lists
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+                  - General discussion
+  - About the mailing lists
+=head2 Support
+Please direct usage questions or support issues to the mailing list:
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+=head2 Reporting bugs
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+=head1 AUTHOR
+Dr. Mingyi Liu <>
+This software is copyright (c) 2005 by Mingyi Liu, 2005 by GPC Biotech AG, and 2005 by Altana Research Institute.
+This software is available under the same terms as the perl 5 programming language system itself.
@@ -1,97 +1,41 @@
-=head1 NAME
-Bio::ASN1::Sequence::Indexer - Indexes NCBI Sequence files.
-=head1 SYNOPSIS
-  use Bio::ASN1::Sequence::Indexer;
-  # creating & using the index is just a few lines
-  my $inx = Bio::ASN1::Sequence::Indexer->new(
-    -filename => 'seq.idx',
-    -write_flag => 'WRITE'); # needed for make_index call, but if opening
-                             # existing index file, don't set write flag!
-  $inx->make_index('seq1.asn', 'seq2.asn');
-  my $seq = $inx->fetch('AF093062'); # Bio::Seq obj for Sequence (doesn't work yet)
-  # alternatively, if one prefers just a data structure instead of objects
-  $seq = $inx->fetch_hash('AF093062'); # a hash produced by Bio::ASN1::Sequence
-                            # that contains all data in the Sequence record
-Bio::ASN1::Sequence, Bioperl and all dependencies therein.
-Same as Bio::ASN1::EntrezGene
-Bio::ASN1::Sequence::Indexer is a Perl Indexer for NCBI Sequence genome
-databases. It processes an ASN.1-formatted Sequence record and stores the
-file position for each record in a way compliant with Bioperl standard (in
-fact its a subclass of Bioperl's index objects).
-Note that this module does not parse record, because it needs to run fast and
-grab only the gene ids.  For parsing record, use Bio::ASN1::Sequence.
-As with Bio::ASN1::Sequence, this module is best thought of as beta version -
-it works, but is not fully tested.
-=head1 SEE ALSO
-Please check out perldoc for Bio::ASN1::EntrezGene for more info.
-=head1 AUTHOR
-Dr. Mingyi Liu <>
-The Bio::ASN1::EntrezGene module and its related modules and scripts
-are copyright (c) 2005 Mingyi Liu, GPC Biotech AG and Altana Research
-Institute. All rights reserved. I created these modules when working
-on a collaboration project between these two companies. Therefore a
-special thanks for the two companies to allow the release of the code
-into public domain.
-You may use and distribute them under the terms of the Perl itself or
-GPL (L<>).
-=head1 CITATION
-Liu, M and Grigoriev, A (2005) "Fast Parsers for Entrez Gene" 
-Bioinformatics. In press
-Any OS that Perl & Bioperl run on.
-=head1 METHODS
 package Bio::ASN1::Sequence::Indexer;
+  $Bio::ASN1::Sequence::Indexer::AUTHORITY = 'cpan:BIOPERLML';
+  $Bio::ASN1::Sequence::Indexer::VERSION = '1.70';
+use utf8;
 use strict;
+use warnings;
 use Carp qw(carp croak);
-use vars qw ($VERSION @ISA);
 use Bio::ASN1::Sequence;
 use Bio::Index::AbstractSeq;
+use parent qw(Bio::Index::AbstractSeq);
+# ABSTRACT: Indexes NCBI Sequence files.
+# AUTHOR:   Dr. Mingyi Liu <>
+# OWNER:    2005 Mingyi Liu
+# OWNER:    2005 GPC Biotech AG
+# OWNER:    2005 Altana Research Institute
+# LICENSE:  Perl_5
-@ISA = qw(Bio::Index::AbstractSeq);
-$VERSION = '1.09';
+# TODO: Should this be deprecated?
 sub _version
-  return $VERSION;
+  return $Bio::Index::AbstractSeq::VERSION;
 sub _type_stamp
   return '__Sequence_ASN1__';
 sub _index_file
   my($self, $file, $idx) = @_;
@@ -111,11 +55,87 @@ sub _index_file
   return 1;
 sub _file_format
   return 'sequence';
+sub fetch_hash
+  my ($self, $seqid) = @_;
+  if (my $seq = $self->db->{$seqid})
+  {
+    my ($fileno, $position) = $self->unpack_record($seq);
+    my $parser = Bio::ASN1::Sequence->new('fh' => $self->_file_handle($fileno));
+    seek($parser->fh, $position, 0);
+    return $parser->next_seq;
+  }
+sub _file_handle {
+  my( $self, $i ) = @_;
+  unless ($self->{'_filehandle'}[$i]) {
+    my @rec = $self->unpack_record($self->db->{"__FILE_$i"})
+      or $self->throw("Can't get filename for index : $i");
+    my $file = $rec[0];
+    local *FH;
+    open *FH, $file or $self->throw("Can't read file '$file' : $!");
+    $self->{'_filehandle'}[$i] = *FH; # Cache filehandle
+  }
+  return $self->{'_filehandle'}[$i];
+=encoding utf-8
+=head1 NAME
+Bio::ASN1::Sequence::Indexer - Indexes NCBI Sequence files.
+=head1 VERSION
+version 1.70
+=head1 SYNOPSIS
+  use Bio::ASN1::Sequence::Indexer;
+  # creating & using the index is just a few lines
+  my $inx = Bio::ASN1::Sequence::Indexer->new(
+    -filename => 'seq.idx',
+    -write_flag => 'WRITE'); # needed for make_index call, but if opening
+                             # existing index file, don't set write flag!
+  $inx->make_index('seq1.asn', 'seq2.asn');
+  my $seq = $inx->fetch('AF093062'); # Bio::Seq obj for Sequence (doesn't work yet)
+  # alternatively, if one prefers just a data structure instead of objects
+  $seq = $inx->fetch_hash('AF093062'); # a hash produced by Bio::ASN1::Sequence
+                            # that contains all data in the Sequence record
+Bio::ASN1::Sequence::Indexer is a Perl Indexer for NCBI Sequence genome
+databases. It processes an ASN.1-formatted Sequence record and stores the
+file position for each record in a way compliant with Bioperl standard (in
+fact its a subclass of Bioperl's index objects).
+Note that this module does not parse record, because it needs to run fast and
+grab only the gene ids.  For parsing record, use Bio::ASN1::Sequence.
+As with Bio::ASN1::Sequence, this module is best thought of as beta version -
+it works, but is not fully tested.
+=head1 METHODS
 =head2 fetch
   Parameters: $geneid - id for the Sequence record to be retrieved
@@ -125,8 +145,6 @@ sub _file_format
   Notes:      Bio::SeqIO::sequence does not exist and probably won't
                 exist for a while!  So call fetch_hash instead
 =head2 fetch_hash
   Parameters: $seqid - id for the Sequence record to be retrieved
@@ -136,19 +154,15 @@ sub _file_format
   Notes:      Alternative to fetch()
-sub fetch_hash
-  my ($self, $seqid) = @_;
-  if (my $seq = $self->db->{$seqid})
-  {
-    my ($fileno, $position) = $self->unpack_record($seq);
-    my $parser = Bio::ASN1::Sequence->new('fh' => $self->_file_handle($fileno));
-    seek($parser->fh, $position, 0);
-    return $parser->next_seq;
-  }
+=head2 _version
+=head2 _type_stamp
+=head2 _index_file
+=head2 _file_format
 =head2 _file_handle
@@ -166,21 +180,64 @@ sub fetch_hash
               changes file handle code like I do below to fit perl 5.005_03, this
               sub would be removed from this module
-sub _file_handle {
-	my( $self, $i ) = @_;
-	unless ($self->{'_filehandle'}[$i]) {
-		my @rec = $self->unpack_record($self->db->{"__FILE_$i"})
-		  or $self->throw("Can't get filename for index : $i");
-		my $file = $rec[0];
-		local *FH;
-		open *FH, $file or $self->throw("Can't read file '$file' : $!");
-		$self->{'_filehandle'}[$i] = *FH; # Cache filehandle
-	}
-	return $self->{'_filehandle'}[$i];
+Bio::ASN1::Sequence, Bioperl and all dependencies therein.
+Same as Bio::ASN1::EntrezGene
+=head1 SEE ALSO
+Please check out perldoc for Bio::ASN1::EntrezGene for more info.
+=head1 CITATION
+Liu, Mingyi, and Andrei Grigoriev. "Fast parsers for Entrez Gene."
+Bioinformatics 21, no. 14 (2005): 3189-3190.
+Any OS that Perl & Bioperl run on.
+=head1 FEEDBACK
+=head2 Mailing lists
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+                  - General discussion
+  - About the mailing lists
+=head2 Support
+Please direct usage questions or support issues to the mailing list:
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+=head2 Reporting bugs
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+=head1 AUTHOR
+Dr. Mingyi Liu <>
+This software is copyright (c) 2005 by Mingyi Liu, 2005 by GPC Biotech AG, and 2005 by Altana Research Institute.
+This software is available under the same terms as the perl 5 programming language system itself.
@@ -1,121 +1,23 @@
-=head1 NAME
-Bio::ASN1::Sequence - Regular expression-based Perl Parser for ASN.1-formatted NCBI Sequences.
-=head1 SYNOPSIS
-  use Bio::ASN1::Sequence;
-  my $parser = Bio::ASN1::Sequence->new('file' => "downloaded.asn1");
-  while(my $result = $parser->next_seq)
-  {
-    # extract data from $result, or Dumpvalue->new->dumpValue($result);
-  }
-  # a new way to get the $result data hash for a particular sequence id:
-  use Bio::ASN1::Sequence::Indexer;
-  my $inx = Bio::ASN1::Sequence::Indexer->new(-filename => 'seq.idx');
-  my $seq = $inx->fetch_hash('AF093062');
-  # for creation of .idx index files please refer to
-  # Bio::ASN1::Sequence::Indexer perldoc
-Bio::ASN1::Sequence is part of the Bio::ASN1::EntrezGene package.
-Bio::ASN1::EntrezGene package can be installed & tested as follows:
-  perl Makefile.PL
-  make
-  make test
-  make install
-Bio::ASN1::Sequence is a regular expression-based Perl Parser for ASN.1-formatted
-NCBI sequences.  It parses an ASN.1-formatted sequence record and returns a data 
-structure that contains all data items from the sequence record.
-The parser will report error & line number if input data does not conform to the
-NCBI Sequence annotation file format.
-The sequence parser is basically a modified version of the high-performance
-Bio::ASN1::EntrezGene parser.  However, I created a standalone module for sequence
-since it is more efficient to keep Sequence-specific code out of
-In fact it is possible to provide reading of all NCBI's ASN.1-formatted
-files through simple variations of the Entrez Gene parser (I need more
-investigation to be sure, but at least the sequence parser works well).
-Since demand for parsing NCBI ASN.1-formatted sequences is much lower than EntrezGene,
-this module is more like a beta version that works on the examples I checked, but
-I did not check all available records or data definitions.  The error-reporting
-function of this module has to be useful sometimes. :)
-=head1 SEE ALSO
-The script included in this package (please
-see the Bio-ASN1-EntrezGene-x.xx/examples directory) shows the usage.
-Please check out perldoc for Bio::ASN1::EntrezGene for more info.
-=head1 AUTHOR
-Dr. Mingyi Liu <>
-The Bio::ASN1::EntrezGene module and its related modules and scripts
-are copyright (c) 2005 Mingyi Liu, GPC Biotech AG and Altana Research
-Institute. All rights reserved. I created these modules when working
-on a collaboration project between these two companies. Therefore a
-special thanks for the two companies to allow the release of the code
-into public domain.
-You may use and distribute them under the terms of the Perl itself or
-GPL (L<>).
-=head1 CITATION
-Liu, M and Grigoriev, A (2005) "Fast Parsers for Entrez Gene" 
-Bioinformatics. In press
-Any OS that Perl runs on.
-=head1 METHODS
 package Bio::ASN1::Sequence;
+  $Bio::ASN1::Sequence::AUTHORITY = 'cpan:BIOPERLML';
+  $Bio::ASN1::Sequence::VERSION = '1.70';
+use utf8;
 use strict;
+use warnings;
 use Carp qw(carp croak);
-use vars qw ($VERSION);
-$VERSION = '1.09';
+# ABSTRACT: Regular expression-based Perl Parser for ASN.1-formatted NCBI Sequences.
+# AUTHOR:   Dr. Mingyi Liu <>
+# OWNER:    2005 Mingyi Liu
+# OWNER:    2005 GPC Biotech AG
+# OWNER:    2005 Altana Research Institute
+# LICENSE:  Perl_5
-=head2 new
-  Parameters: maxerrstr => 20 (optional) - maximum number of characters after
-                offending element, used by error reporting, default is 20
-              file or -file => $filename (optional) - name of the file to be
-                parsed. call next_seq to parse!
-              fh or -fh => $filehandle (optional) - handle of the file to be
-                parsed.
-  Example:    my $parser = Bio::ASN1::Sequence->new();
-  Function:   Instantiate a parser object
-  Returns:    Object reference
-  Notes:      Setting file or fh will reset line numbers etc. that are used
-                for error reporting purposes, and seeking on file handle would 
-                mess up linenumbers!
 sub new
@@ -128,16 +30,6 @@ sub new
   return $self;
-=head2 maxerrstr
-  Parameters: $maxerrstr (optional) - maximum number of characters after
-                offending element, used by error reporting, default is 20
-  Example:    $parser->maxerrstr(20);
-  Function:   get/set maxerrstr.
-  Returns:    maxerrstr.
-  Notes:
 sub maxerrstr
@@ -147,28 +39,6 @@ sub maxerrstr
-=head2 parse
-  Parameters: $string that contains Sequence record,
-              $trimopt (optional) that specifies how the data structure
-                returned should be trimmed. 2 is recommended and 
-                default
-              $noreset (optional) that species that line number should not
-                be reset
-              DEPRECATED as external function!!! Do not call this function
-                directly!  Call next_seq() instead
-  Example:    my $value = $parser->parse($text); # DEPRECATED as
-                # external function!!! Do not call this function
-                # directly!  Call next_seq() instead
-  Function:   Takes in a string representing Sequence record, parses
-                the record and returns a data structure.
-  Returns:    A data structure containing all data items from the sequence
-                record.
-  Notes:      DEPRECATED as external function!!! Do not call this function
-                directly!  Call next_seq() instead
-              $string should not contain 'Seq-entry ::= set' at beginning!
 sub parse
@@ -198,19 +68,6 @@ sub parse
   return $result;
-=head2 input_file
-  Parameters: $filename for file that contains Sequence record(s)
-  Example:    $parser->input_file($filename);
-  Function:   Takes in name of a file containing Sequence records.
-              opens the file and stores file handle
-  Returns:    none.
-  Notes:      Attemps to open file larger than 2 GB even on Perl that
-                does not support 2 GB file (accomplished by calling
-                "cat" and piping output. On OS that does not have "cat"
-                error message will be displayed)
 sub input_file
@@ -225,27 +82,6 @@ sub input_file
   $self->{linenumber} = 0; # reset line number
-=head2 next_seq
-  Parameters: $trimopt (optional) that specifies how the data structure
-                returned should be trimmed. option 2 is recommended and
-                default
-  Example:    my $value = $parser->next_seq();
-  Function:   Use the file handle generated by input_file, parses the next
-                the record and returns a data structure.
-  Returns:    A data structure containing all data items from the sequence
-                record.
-  Notes:      Must pass in a filename through new() or input_file() first!
-              For details on how to use the $trimopt data trimming option
-                please see comment for the trimdata method. An option
-                of 2 is recommended and default
-              The acceptable values for $trimopt include:
-                1 - trim as much as possibile
-                2 (or 0, undef) - trim to an easy-to-use structure
-                3 - no trimming (in version 1.06, prior to version
-                    1.06, 0 or undef means no trimming)
 sub next_seq
@@ -261,8 +97,7 @@ sub next_seq
-# NCBI's Apr 05, 2005 format change forced much usage of lookahead, which would for
-# sure slows parser down.  But can't code efficiently without it.
 sub _parse
   my ($self, $flag) = @_;
@@ -296,7 +131,7 @@ sub _parse
       $self->{linenumber} += $lines =~ s/\n//g || $lines =~ s/\r//g; # count by *NIX/Win or Mac
       my ($tmp, $tmp1);
       # we put \s* in lookahead for linenumber counting purpose (which slows things down)
-      if(($self->{input} =~ /\G"((?:[^"]|"")*)"(?=\s*[,}])/cg && ++$tmp) ||
+      if(($self->{input} =~ /\G"((?:[^"]+|"")*)"(?=\s*[,}])/cg && ++$tmp) ||
          ($self->{input} =~ /\G'([^']+)'\s*H/icg && ++$tmp1) || # this is the only difference b/w sequence and entrez gene formats so far
          $self->{input} =~ /\G([\w-]+)(?=\s*[,}])/cg)
@@ -337,7 +172,7 @@ sub _parse
         return $data;
-    elsif($self->{input} =~ /\G[ \t]*"((?:[^"]|"")*)"(?=\s*[,}])/cg)
+    elsif($self->{input} =~ /\G[ \t]*"((?:[^"]+|"")*)"(?=\s*[,}])/cg)
       my $value = $1;
       $value =~ s/""/"/g;
@@ -401,7 +236,7 @@ sub _parse
 # so now  $hash->{comments}->[0]->[0]->[0]->{source}->[0]->[0]->[0]->{src}->[0]->[0]->{tag}->[0]->{id}
 # becomes $hash->{comments}->[0]->{source}->{src}->{tag}->{id}
 # this may create problem as array might suddenly change to hash depending on whether it
-# has multiple elements or not.  So set $flag to 2 or 0/undef would disallow trimming that 
+# has multiple elements or not.  So set $flag to 2 or 0/undef would disallow trimming that
 # would lead to data type change, thus resulting in data structure like:
 #    'comments' => ARRAY(0x88617e8)
 #       0  HASH(0x889d578)
@@ -416,27 +251,6 @@ sub _parse
 #                            'id' => 5
 # still not the safest, but saves some hassle writing code
-=head2 trimdata
-  Parameters: $hashref or $arrayref
-              $trimflag (optional, see Notes)
-  Example:    trimdata($datahash); # using the default flag
-  Function:   recursively process all attributes of a hash/array
-              hybrid and get rid of any arrayref that points to
-              one-element arrays (trims data structure) depending on
-              the optional flag.
-  Returns:    none - trimming happenes in-place
-  Notes:      This function is useful to compact a data structure produced by
-                Bio::ASN1::Sequence::parse.
-              The acceptable values for $trimopt include:
-                1 - trim as much as possibile
-                2 (or 0, undef) - trim to an easy-to-use structure
-                3 - no trimming (in version 1.06, prior to version
-                    1.06, 0 or undef means no trimming)
-              This function is duplicate to's and code should
-                be compressed in the future (using util module & subclass).
 sub trimdata
@@ -478,17 +292,6 @@ sub trimdata
-=head2 fh
-  Parameters: $filehandle (optional)
-  Example:    trimdata($datahash); # using the default flag
-  Function:   getter/setter for file handle
-  Returns:    file handle for current file being parsed.
-  Notes:      Use with care!
-              Line number report would not be corresponding to file's line 
-                number if seek operation is performed on the file handle!
 sub fh
@@ -501,6 +304,180 @@ sub fh
   return $self->{fh};
+sub rawdata
+  my $self = shift;
+  return "Seq-entry ::= set $self->{input}";
+=encoding utf-8
+=head1 NAME
+Bio::ASN1::Sequence - Regular expression-based Perl Parser for ASN.1-formatted NCBI Sequences.
+=head1 VERSION
+version 1.70
+=head1 SYNOPSIS
+  use Bio::ASN1::Sequence;
+  my $parser = Bio::ASN1::Sequence->new('file' => "downloaded.asn1");
+  while(my $result = $parser->next_seq)
+  {
+    # extract data from $result, or Dumpvalue->new->dumpValue($result);
+  }
+  # a new way to get the $result data hash for a particular sequence id:
+  use Bio::ASN1::Sequence::Indexer;
+  my $inx = Bio::ASN1::Sequence::Indexer->new(-filename => 'seq.idx');
+  my $seq = $inx->fetch_hash('AF093062');
+  # for creation of .idx index files please refer to
+  # Bio::ASN1::Sequence::Indexer perldoc
+Bio::ASN1::Sequence is a regular expression-based Perl Parser for ASN.1-formatted
+NCBI sequences.  It parses an ASN.1-formatted sequence record and returns a data
+structure that contains all data items from the sequence record.
+The parser will report error & line number if input data does not conform to the
+NCBI Sequence annotation file format.
+The sequence parser is basically a modified version of the high-performance
+Bio::ASN1::EntrezGene parser.  However, I created a standalone module for sequence
+since it is more efficient to keep Sequence-specific code out of
+In fact it is possible to provide reading of all NCBI's ASN.1-formatted
+files through simple variations of the Entrez Gene parser (I need more
+investigation to be sure, but at least the sequence parser works well).
+Since demand for parsing NCBI ASN.1-formatted sequences is much lower than EntrezGene,
+this module is more like a beta version that works on the examples I checked, but
+I did not check all available records or data definitions.  The error-reporting
+function of this module has to be useful sometimes. :)
+=head2 maxerrstr
+  Parameters: $maxerrstr (optional) - maximum number of characters after
+                offending element, used by error reporting, default is 20
+  Example:    $parser->maxerrstr(20);
+  Function:   get/set maxerrstr.
+  Returns:    maxerrstr.
+  Notes:
+=head2 input_file
+  Parameters: $filename for file that contains Sequence record(s)
+  Example:    $parser->input_file($filename);
+  Function:   Takes in name of a file containing Sequence records.
+              opens the file and stores file handle
+  Returns:    none.
+  Notes:      Attempts to open file larger than 2 GB even on Perl that
+                does not support 2 GB file (accomplished by calling
+                "cat" and piping output. On OS that does not have "cat"
+                error message will be displayed)
+=head1 METHODS
+=head2 new
+  Parameters: maxerrstr => 20 (optional) - maximum number of characters after
+                offending element, used by error reporting, default is 20
+              file or -file => $filename (optional) - name of the file to be
+                parsed. call next_seq to parse!
+              fh or -fh => $filehandle (optional) - handle of the file to be
+                parsed.
+  Example:    my $parser = Bio::ASN1::Sequence->new();
+  Function:   Instantiate a parser object
+  Returns:    Object reference
+  Notes:      Setting file or fh will reset line numbers etc. that are used
+                for error reporting purposes, and seeking on file handle would
+                mess up linenumbers!
+=head2 parse
+  Parameters: $string that contains Sequence record,
+              $trimopt (optional) that specifies how the data structure
+                returned should be trimmed. 2 is recommended and
+                default
+              $noreset (optional) that species that line number should not
+                be reset
+              DEPRECATED as external function!!! Do not call this function
+                directly!  Call next_seq() instead
+  Example:    my $value = $parser->parse($text); # DEPRECATED as
+                # external function!!! Do not call this function
+                # directly!  Call next_seq() instead
+  Function:   Takes in a string representing Sequence record, parses
+                the record and returns a data structure.
+  Returns:    A data structure containing all data items from the sequence
+                record.
+  Notes:      DEPRECATED as external function!!! Do not call this function
+                directly!  Call next_seq() instead
+              $string should not contain 'Seq-entry ::= set' at beginning!
+=head2 next_seq
+  Parameters: $trimopt (optional) that specifies how the data structure
+                returned should be trimmed. option 2 is recommended and
+                default
+  Example:    my $value = $parser->next_seq();
+  Function:   Use the file handle generated by input_file, parses the next
+                the record and returns a data structure.
+  Returns:    A data structure containing all data items from the sequence
+                record.
+  Notes:      Must pass in a filename through new() or input_file() first!
+              For details on how to use the $trimopt data trimming option
+                please see comment for the trimdata method. An option
+                of 2 is recommended and default
+              The acceptable values for $trimopt include:
+                1 - trim as much as possibile
+                2 (or 0, undef) - trim to an easy-to-use structure
+                3 - no trimming (in version 1.06, prior to version
+                    1.06, 0 or undef means no trimming)
+=head2 trimdata
+  Parameters: $hashref or $arrayref
+              $trimflag (optional, see Notes)
+  Example:    trimdata($datahash); # using the default flag
+  Function:   recursively process all attributes of a hash/array
+              hybrid and get rid of any arrayref that points to
+              one-element arrays (trims data structure) depending on
+              the optional flag.
+  Returns:    none - trimming happenes in-place
+  Notes:      This function is useful to compact a data structure produced by
+                Bio::ASN1::Sequence::parse.
+              The acceptable values for $trimopt include:
+                1 - trim as much as possibile
+                2 (or 0, undef) - trim to an easy-to-use structure
+                3 - no trimming (in version 1.06, prior to version
+                    1.06, 0 or undef means no trimming)
+              This function is duplicate to's and code should
+                be compressed in the future (using util module & subclass).
+=head2 fh
+  Parameters: $filehandle (optional)
+  Example:    trimdata($datahash); # using the default flag
+  Function:   getter/setter for file handle
+  Returns:    file handle for current file being parsed.
+  Notes:      Use with care!
+              Line number report would not be corresponding to file's line
+                number if seek operation is performed on the file handle!
 =head2 rawdata
   Parameters: none
@@ -509,16 +486,83 @@ sub fh
   Returns:    a string containing the ASN1-formatted sequence record
   Notes:      Must first parse a record then call this function!
               Could be useful in interpreting line number value in error
-                report (if user did a seek on file handle right before parsing 
+                report (if user did a seek on file handle right before parsing
-sub rawdata
-  my $self = shift;
-  return "Seq-entry ::= set $self->{input}";
+=head2 _parse
+NCBI's Apr 05, 2005 format change forced much usage of lookahead, which would for
+sure slows parser down.  But can't code efficiently without it.
+Bio::ASN1::Sequence is part of the Bio::ASN1::EntrezGene package.
+Bio::ASN1::EntrezGene package can be installed & tested as follows:
+  perl Makefile.PL
+  make
+  make test
+  make install
+=head1 SEE ALSO
+The script included in this package (please
+see the Bio-ASN1-EntrezGene-x.xx/examples directory) shows the usage.
+Please check out perldoc for Bio::ASN1::EntrezGene for more info.
+=head1 CITATION
+Liu, Mingyi, and Andrei Grigoriev. "Fast parsers for Entrez Gene."
+Bioinformatics 21, no. 14 (2005): 3189-3190.
+Any OS that Perl runs on.
+=head1 FEEDBACK
+=head2 Mailing lists
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+                  - General discussion
+  - About the mailing lists
+=head2 Support
+Please direct usage questions or support issues to the mailing list:
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+=head2 Reporting bugs
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+=head1 AUTHOR
+Dr. Mingyi Liu <>
+This software is copyright (c) 2005 by Mingyi Liu, 2005 by GPC Biotech AG, and 2005 by Altana Research Institute.
+This software is available under the same terms as the perl 5 programming language system itself.
@@ -0,0 +1,47 @@
+use strict;
+use warnings;
+# this test was generated with Dist::Zilla::Plugin::Test::Compile 2.027
+use Test::More  tests => 4 + ($ENV{AUTHOR_TESTING} ? 1 : 0);
+my @module_files = (
+    'Bio/ASN1/',
+    'Bio/ASN1/EntrezGene/',
+    'Bio/ASN1/',
+    'Bio/ASN1/Sequence/'
+# no fake home requested
+use IPC::Open3;
+use IO::Handle;
+my @warnings;
+for my $lib (@module_files)
+    # see L<perlfaq8/How can I capture STDERR from an external command?>
+    my $stdin = '';     # converted to a gensym by open3
+    my $stderr = IO::Handle->new;
+    binmode $stderr, ':crlf' if $^O eq 'MSWin32';
+    my $pid = open3($stdin, '>&STDERR', $stderr, qq{$^X -Mblib -e"require q[$lib]"});
+    waitpid($pid, 0);
+    is($? >> 8, 0, "$lib loaded ok");
+    if (my @_warnings = <$stderr>)
+    {
+        warn @_warnings;
+        push @warnings, @_warnings;
+    }
+is(scalar(@warnings), 0, 'no warnings found') if $ENV{AUTHOR_TESTING};
diff --git a/var/tmp/source/MINGYILIU/Bio-ASN1-EntrezGene-1.091/Bio-ASN1-EntrezGene-1.09/t/input.asn b/var/tmp/source/CJFIELDS/Bio-ASN1-EntrezGene-1.70/Bio-ASN1-EntrezGene-1.70/t/input.asn
old mode 100644
new mode 100755
diff --git a/var/tmp/source/MINGYILIU/Bio-ASN1-EntrezGene-1.091/Bio-ASN1-EntrezGene-1.09/t/input1.asn b/var/tmp/source/CJFIELDS/Bio-ASN1-EntrezGene-1.70/Bio-ASN1-EntrezGene-1.70/t/input1.asn
old mode 100644
new mode 100755
@@ -0,0 +1,16 @@
+  unless ($ENV{RELEASE_TESTING}) {
+    require Test::More;
+    Test::More::plan(skip_all => 'these tests are for release candidate testing');
+  }
+use strict;
+use warnings;
+use Test::More;
+eval 'use Test::EOL';
+plan skip_all => 'Test::EOL required' if $@;
+all_perl_files_ok({ trailing_whitespace => 1 });
@@ -0,0 +1,20 @@
+  unless ($ENV{RELEASE_TESTING}) {
+    require Test::More;
+    Test::More::plan(skip_all => 'these tests are for release candidate testing');
+  }
+use strict;
+use warnings qw(all);
+use Test::More;
+## no critic (ProhibitStringyEval, RequireCheckingReturnValueOfEval)
+eval q(use Test::Mojibake);
+plan skip_all => q(Test::Mojibake required for source encoding testing) if $@;
@@ -0,0 +1,16 @@
+  unless ($ENV{RELEASE_TESTING}) {
+    require Test::More;
+    Test::More::plan(skip_all => 'these tests are for release candidate testing');
+  }
+use strict;
+use warnings;
+use Test::More;
+eval 'use Test::NoTabs';
+plan skip_all => 'Test::NoTabs required' if $@;
@@ -0,0 +1,21 @@
+  unless ($ENV{RELEASE_TESTING}) {
+    require Test::More;
+    Test::More::plan(skip_all => 'these tests are for release candidate testing');
+  }
+use Test::More;
+eval "use Test::Pod::Coverage 1.08";
+plan skip_all => "Test::Pod::Coverage 1.08 required for testing POD coverage"
+  if $@;
+eval "use Pod::Coverage::TrustPod";
+plan skip_all => "Pod::Coverage::TrustPod required for testing POD coverage"
+  if $@;
+all_pod_coverage_ok({ coverage_class => 'Pod::Coverage::TrustPod' });
@@ -0,0 +1,15 @@
+  unless ($ENV{RELEASE_TESTING}) {
+    require Test::More;
+    Test::More::plan(skip_all => 'these tests are for release candidate testing');
+  }
+use Test::More;
+eval "use Test::Pod 1.41";
+plan skip_all => "Test::Pod 1.41 required for testing POD" if $@;
diff --git a/var/tmp/source/MINGYILIU/Bio-ASN1-EntrezGene-1.091/Bio-ASN1-EntrezGene-1.09/t/seq.asn b/var/tmp/source/CJFIELDS/Bio-ASN1-EntrezGene-1.70/Bio-ASN1-EntrezGene-1.70/t/seq.asn
old mode 100644
new mode 100755
@@ -1,77 +1,98 @@
 #!/usr/bin/env perl -w
 use strict;
-use Test::More tests => 11;
+use File::Spec;
+use Test::More tests => 6;
-my ($noindex, $noabseq, $nogene, $noseq, $noseqindex);
+sub check_dependency {
+    my $class = shift;
+    eval "require $class; 1";
+    if ($@) {
+        return;
+    }
+    1;
+my ( $noindex, $noabseq, $nogene, $noseq, $noseqindex );
-  diag("\n\nTest indexers (Bio::ASN1::EntrezGene::Indexer, Bio::ASN1::Sequence::Indexer)\nIndexing and retrieval:\n");
-  use_ok('Bio::ASN1::EntrezGene') || $nogene++;
-  use_ok('Bio::Index::AbstractSeq') || $noabseq++;
-  use_ok('Bio::ASN1::EntrezGene::Indexer') || $noindex++;
-  use_ok('Bio::ASN1::Sequence') || $noseq++;
-  use_ok('Bio::ASN1::Sequence::Indexer') || $noseqindex++;
+    diag(
+"\n\nTest indexers (Bio::ASN1::EntrezGene::Indexer, Bio::ASN1::Sequence::Indexer)\nIndexing and retrieval:\n"
+    );
+    check_dependency('Bio::ASN1::EntrezGene')          || $nogene++;
+    check_dependency('Bio::Index::AbstractSeq')        || $noabseq++;
+    check_dependency('Bio::ASN1::EntrezGene::Indexer') || $noindex++;
+    check_dependency('Bio::ASN1::Sequence')            || $noseq++;
+    check_dependency('Bio::ASN1::Sequence::Indexer')   || $noseqindex++;
 diag("\n\nFirst testing gene indexer:\n");
-  # test indexer
-  if(!$noabseq)
-  {
-    if(!$noindex)
-    {
-      my $inx = Bio::ASN1::EntrezGene::Indexer->new(-filename => 't/testgene.idx',
-                  -write_flag => 'WRITE');
-      isa_ok($inx, 'Bio::ASN1::EntrezGene::Indexer');
-      $inx->make_index('t/input.asn', 't/input1.asn');
+SKIP: {
+    if ( !$nogene ) {
+        skip( "BioPerl not installed, skipping", 3 ) if $noabseq;
+        # test indexer
+        if ( !$noabseq ) {
+            if ( !$noindex ) {
+                my $inx = Bio::ASN1::EntrezGene::Indexer->new(
+                    -filename   => File::Spec->catfile('t','testgene.idx'),
+                    -write_flag => 'WRITE'
+                );
+                isa_ok( $inx, 'Bio::ASN1::EntrezGene::Indexer' );
+                $inx->make_index( File::Spec->catfile('t','input.asn'), File::Spec->catfile('t','input1.asn' ));
 #      cmp_ok($inx->count_records, '==', 4, 'total number of indexed gene records');
-      my $value = $inx->fetch_hash(3);
-      isa_ok($value, 'ARRAY');
-      cmp_ok($value->[0]{'track-info'}[0]{geneid}, '==', 3, 'correct gene record retrieved');
+                my $value = $inx->fetch_hash(3);
+                isa_ok( $value, 'ARRAY' );
+                cmp_ok( $value->[0]{'track-info'}[0]{geneid},
+                    '==', 3, 'correct gene record retrieved' );
+            }
+            else {
+                diag(
+"\nThere's some problem with the installation of Bio::ASN1::EntrezGene::Indexer!\nTry install again using:\n\tperl Makefile.PL\n\tmake\nQuitting now"
+                );
+            }
+        }
-    else
-    {
-      diag("\nThere's some problem with the installation of Bio::ASN1::EntrezGene::Indexer!\nTry install again using:\n\tperl Makefile.PL\n\tmake\nQuitting now");
+    else {
+        diag(
+"\nThere's some problem with the installation of Bio::ASN1::EntrezGene!\nTry install again using:\n\tperl Makefile.PL\n\tmake\nQuitting now"
+        );
-  }
-  else
-  {
-    diag("\nYou need to have Bio::Index::AbstractSeq (\ninstalled for testing the indexer!\nQuitting now");
-  }
-  diag("\nThere's some problem with the installation of Bio::ASN1::EntrezGene!\nTry install again using:\n\tperl Makefile.PL\n\tmake\nQuitting now");
+    diag("\n\nNow testing sequence indexer:\n");
-diag("\n\nNow testing sequence indexer:\n");
-  # test indexer
-  if(!$noabseq)
-  {
-    if(!$noseqindex)
-    {
-      my $inx = Bio::ASN1::Sequence::Indexer->new(-filename => 't/testseq.idx',
-                  -write_flag => 'WRITE');
-      isa_ok($inx, 'Bio::ASN1::Sequence::Indexer');
-      $inx->make_index('t/seq.asn');
+SKIP: {
+    if ( !$noseq ) {
+        skip( "BioPerl not installed, skipping", 3 ) if $noabseq;
+        # test indexer
+        if ( !$noabseq ) {
+            if ( !$noseqindex ) {
+                my $inx = Bio::ASN1::Sequence::Indexer->new(
+                    -filename   => File::Spec->catfile('t','testseq.idx'),
+                    -write_flag => 'WRITE'
+                );
+                isa_ok( $inx, 'Bio::ASN1::Sequence::Indexer' );
+                $inx->make_index(File::Spec->catfile('t','seq.asn'));
 #      cmp_ok($inx->count_records, '==', 2, 'total number of sequence ids in index');
-      my $value = $inx->fetch_hash('AF093062');
-      isa_ok($value, 'ARRAY');
-      cmp_ok($value->[0]{'seq-set'}[0]{seq}[0]{id}[0]{genbank}[0]{accession}, 'eq', 'AF093062', 'correct sequence record retrieved');
+                my $value = $inx->fetch_hash('AF093062');
+                isa_ok( $value, 'ARRAY' );
+                cmp_ok(
+                    $value->[0]{'seq-set'}[0]{seq}[0]{id}[0]{genbank}[0]
+                      {accession},
+                    'eq', 'AF093062', 'correct sequence record retrieved'
+                );
+            }
+            else {
+                diag(
+"\nThere's some problem with the installation of Bio::ASN1::Sequence::Indexer!\nTry install again using:\n\tperl Makefile.PL\n\tmake\nQuitting now"
+                );
+            }
+        }
-    else
-    {
-      diag("\nThere's some problem with the installation of Bio::ASN1::Sequence::Indexer!\nTry install again using:\n\tperl Makefile.PL\n\tmake\nQuitting now");
+    else {
+        diag(
+"\nThere's some problem with the installation of Bio::ASN1::Sequence!\nTry install again using:\n\tperl Makefile.PL\n\tmake\nQuitting now"
+        );
-  }
-  else
-  {
-    diag("\nYou need to have Bio::Index::AbstractSeq (\ninstalled for testing the indexer!\nQuitting now");
-  }
-  diag("\nThere's some problem with the installation of Bio::ASN1::Sequence!\nTry install again using:\n\tperl Makefile.PL\n\tmake\nQuitting now");
@@ -1,5 +1,7 @@
 #!/usr/bin/env perl -w
 use strict;
+use warnings;
+use File::Spec;
 use Test::More tests => 10;
 my ($nogene, $noseq);
@@ -12,7 +14,7 @@ BEGIN {
 diag("\n\nFirst testing gene parser:\n");
-  my $parser = Bio::ASN1::EntrezGene->new(file => 't/input.asn');
+  my $parser = Bio::ASN1::EntrezGene->new(file => File::Spec->catfile('t','input.asn'));
   isa_ok($parser, 'Bio::ASN1::EntrezGene');
   my $value = $parser->next_seq;
   isa_ok($value, 'ARRAY');
@@ -27,7 +29,7 @@ else
 diag("\n\nNow testing sequence parser:\n");
-  my $parser = Bio::ASN1::Sequence->new(file => 't/seq.asn');
+  my $parser = Bio::ASN1::Sequence->new(file => File::Spec->catfile('t','seq.asn'));
   isa_ok($parser, 'Bio::ASN1::Sequence');
   my $value = $parser->next_seq;
   isa_ok($value, 'ARRAY');