#!/usr/bin/env perl
use threads;
use warnings;
use strict;
use Data::Dumper;
use Carp;
use 5.010;
use Bio::Gonzales::Util::Text qw/ccount_iter/;
use Bio::Gonzales::Seq::IO qw/faiterate/;
use JSON::XS;
use List::Util qw/sum/;
my $js = JSON::XS->new->utf8->allow_nonref->canonical(1);
use Getopt::Long::Descriptive;
use Thread::Task::Concurrent qw(tmsg);
my ( $opt, $usage ) = describe_options(
'%c %o <file>', [],
[ 'threads|t=i', "use threads", { default => 1 } ],
[ 'help', "print usage message and exit" ],
[ 'by_seq', "print counts by sequence" ],
);
print( $usage->text ), exit if $opt->help;
my $file = shift;
confess "no file" unless ( $file && -f $file );
my $tq = Thread::Task::Concurrent->new( task => \&task, max_instances => $opt->threads, verbose => 0 )->start;
my $iterator = faiterate($file);
while ( my $s = $iterator->() ) {
$tq->enqueue( [ $s->id, $s->seq ] );
}
my $res = $tq->join->result;
my %counts;
for my $r (@$res) {
my ( $id, $counts_by_seq ) = @$r;
my $total = sum values %$counts_by_seq;
say $js->encode( { seq_id => $id, '_' => $total, %$counts_by_seq } ) if ( $opt->by_seq );
while ( my ( $c, $count ) = each %$counts_by_seq ) {
$counts{$c} += $count;
}
}
my @chars = sort keys %counts;
my $total = sum values %counts;
say $js->encode( { seq_id => 'genome', '_' => $total, %counts } );
sub task {
my ( $id, $seq ) = @{ $_[0] };
my $counter = ccount_iter();
return [ $id, ccount_iter()->($seq) ];
}