The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
#!/usr/bin/env perl

use threads;
use warnings;
use strict;

use Data::Dumper;
use Carp;

use 5.010;

use Bio::Gonzales::Util::Text qw/ccount_iter/;

use Bio::Gonzales::Seq::IO qw/faiterate/;
use JSON::XS;
use List::Util qw/sum/;
my $js = JSON::XS->new->utf8->allow_nonref->canonical(1);

use Getopt::Long::Descriptive;
use Thread::Task::Concurrent qw(tmsg);

my ( $opt, $usage ) = describe_options(
  '%c %o <file>', [],
  [ 'threads|t=i', "use threads", { default => 1 } ],
  [ 'help',        "print usage message and exit" ],
  [ 'by_seq',      "print counts by sequence" ],
);

print( $usage->text ), exit if $opt->help;

my $file = shift;

confess "no file" unless ( $file && -f $file );

my $tq = Thread::Task::Concurrent->new( task => \&task, max_instances => $opt->threads, verbose => 0 )->start;

my $iterator = faiterate($file);

while ( my $s = $iterator->() ) {
  $tq->enqueue( [ $s->id, $s->seq ] );
}
my $res = $tq->join->result;

my %counts;
for my $r (@$res) {
  my ( $id, $counts_by_seq ) = @$r;
  my $total = sum values %$counts_by_seq;
  say $js->encode( { seq_id => $id, '_' => $total, %$counts_by_seq } ) if ( $opt->by_seq );
  while ( my ( $c, $count ) = each %$counts_by_seq ) {
    $counts{$c} += $count;
  }
}
my @chars = sort keys %counts;
my $total = sum values %counts;
say $js->encode( { seq_id => 'genome', '_' => $total, %counts } );

sub task {
  my ( $id, $seq ) = @{ $_[0] };
  my $counter = ccount_iter();
  return [ $id, ccount_iter()->($seq) ];
}