The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
# Last changed Time-stamp: <2016-04-21 15:52:48 mtw>
#
# ***********************************************************************
# *  Copyright notice
# *
# *  Copyright 2016 Michael T. Wolfinger <michael@wolfinger.eu>
# *  All rights reserved
# *
# *  This program is free software: you can redistribute it and/or modify
# *  it under the terms of the GNU General Public License as published by
# *  the Free Software Foundation, either version 3 of the License, or
# *  (at your option) any later version.
# *
# *  This program is distributed in the hope that it will be useful,
# *  but WITHOUT ANY WARRANTY; without even the implied warranty of
# *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# *  GNU General Public License for more details.
# *
# *  You should have received a copy of the GNU General Public License},
# *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
# *
# *  This copyright notice MUST APPEAR in all copies of the script!
# ***********************************************************************

use warnings;
use strict;
use PerlIO::gzip;
use Getopt::Long qw( :config posix_default bundling no_ignore_case );
use Pod::Usage;

#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^#
#^^^^^^^^^^ Variables ^^^^^^^^^^^#
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^#

my ($file, $five, $three, $a5, $str, $count, $last);
my $len = 0;
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^#
#^^^^^^^^^^^^^^ Main ^^^^^^^^^^^^^#
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^#

Getopt::Long::config('no_ignore_case');
pod2usage(-verbose => 0) unless GetOptions(
					   "fastq|f=s" => \$file,
					   "up|u=s"    => \$five,
					   "down|d=s"  => \$three,
					   "a5=s"      => \$a5,
					   "man"       => sub{pod2usage(-verbose => 2)},
					   "help|h"    => sub{pod2usage(-verbose => 1)}
					  );

unless ( $file ) {
  warn "Please provide the name of the fastq via the -f option";
  pod2usage(-verbose => 0);
}

unless ($a5) {
  unless ( $five && $three ){
    warn "Please define trim length via -u  and -d";
    pod2usage(-verbose => 0);
  }
}
else {
  if ( $five && $three ){
    warn "The -a5 and -u/-d options are mutually exclusive. Please do not use -u/-d when trimming a 5' adapter sequence";
    pod2usage(-verbose => 0);
  }
  $len = length($a5);
}

unless ( -f $file) {
  warn "Could not find input file $file given via -f option";
  pod2usage(-verbose => 0);
}

$count = 0;
$len = length($a5);

open(IN,"<:gzip(autopop)",$file)|| die "Can't open file: $!";
my $x=1;
while(<IN>){
  my $string="$_";
  chomp($string);
  $string =~ s/\cn/\\cn\cn/gs;
  if( (++$x)%2 ){
  #  print "> $x $string\n";
    if( $a5){
      my $pattern = qr/$a5/;
      if ($string =~ m/^$pattern/){
	$count++;
	$str = substr($string,$len);
	$last = $x;
      }
      if ($x == $last + 2){ # trim quality string for previously trimmed read
	$str = substr($string,$len);
	$last = -1;
      }
      print "$x $str\n";
    }
    else{
      my $i = substr($string,$three,-$five);
      print "$i\n";
    }
  }
  else {
    print "* $string\n";
  }
}
close(IN);

print "found adapter $a5 $count times\n"

__END__

=head1 NAME

trim_fastq.pl - Trim sequence and quality string of fastq files on the fly

=head1 SYNOPSIS

trim_fastq.pl [-f I<FILE>] [-u I<INTEGER>] [-d I<INTEGER>] [-a5 I<STRING>]
[options]

=head1 DESCRIPTION

This program trims sequence and qualitystring fields from a fastq
file, either by a number of nucleotides, or by removing a user defined
5' adapter sequence. The typical usage scenario is to allow re-mapping
if mapping quality is not sufficient without trimming.


=head1 OPTIONS

=over1

=item B<--fastq -f>

Fastq file for trimming

=item B<--up -u>

Number of nucleotides to trim from read / quality string start

=item B<--down -d>

Number of nucleotides to trim from read / quality string end

=item B<--a5>

Sequence of 5' adapter to trim from the left side of the reads. This
option and -u / -d are mutually exclusive

=item B<--help -h>

Print short help

=item B<--man>

Prints the manual page and exits

=back

=head1 AUTHORS

Joerg Fallmann E<lt>joerg.fallmann@univie.ac.atE<gt>
Michael T. Wolfinger E<lt>michael@wolfinger.euE<gt>

=cut