FindChunks.pm - metacpan.org

package Audio::FindChunks;

use 5.00503;
use strict;

use Data::Flow qw(0.09);

BEGIN {
  require DynaLoader;
  use vars qw($VERSION @ISA);

  @ISA = qw(DynaLoader);

  $VERSION = '2.03';

  bootstrap Audio::FindChunks $VERSION;
  my $do_dbg	   = !!$ENV{FIND_CHUNKS_DEBUG};	# Convert to logical
  eval "sub do_dbg () {$do_dbg}";
}

die "Version 1.00 of Data::Flow is defective.  Upgrade!" if $Data::Flow::VERSION eq '1.00';

# Preloaded methods go here.

sub default ($$$) {my ($o, $k, $v) = @_; $o->{$k} = $v unless defined $o->{$k}}

my $le_short_size  = length pack 'v', 0;
my $short_size	   = length pack 's', 0;
my $int_size	   = length pack 'i', 0;
my $long_post	   = ($] >= 5.006 ? '!' : '');
my $long	   = "l$long_post";
my $long_size	   = length pack $long, 0;
my $double_size    = length pack 'd', 0;
my $pointer_size   = length pack 'p', 0;
my $pointer_unpack = (($pointer_size == $int_size) ? 'I' : "L$long_post");
my $long_min	   = unpack $long, pack $long, -1e100;
my $long_max	   = -$long_min-1;
my $do_dbg	   = $ENV{FIND_CHUNKS_DEBUG};

sub le_short_sample_multichannel ($$$$$$) {
  my ($totstride, $stride, $channels, $out, $chunksize) =
    (shift,shift,shift,shift,shift);
  my $size = length $_[0];
  my $bufaddr = unpack $pointer_unpack, pack 'p', $_[0];
  die "Size of buffer not multiple of total stride" if $size % $totstride;
  # Do in multiples of 7K (to falicitate lcd 8K Level I cache)
  $chunksize = $totstride * int((7*(1<<10))/$totstride) unless defined $chunksize;
  my $processed = 0;
  while ($size > 0) {
    $chunksize = $size if $chunksize > $size;
    $size -= $chunksize;
    my $samples = $chunksize / $totstride;
    $processed += $samples;
    for my $c (0..$channels-1) {
      warn sprintf "Ch %d: Samples %d %d %d %d ..., totstride %d, %d samples\n", 
	$c, unpack('s4', unpack 'P8', pack $pointer_unpack, $bufaddr + $stride * $c), $totstride, $samples
	  if do_dbg();
#  void le_short_sample_stats(char *buf, int stride, long samples, array_stats_t *stat)
      le_short_sample_stats($bufaddr + $stride * $c, $totstride, $samples,
			    $out->[$c]);
      warn sprintf "  => %d\n", unpack 'd', $out->[$c] if do_dbg();
    }
    $bufaddr += $chunksize;
  }
  return $processed;
}

sub rnd ($) {sprintf '%.0f', shift}

my $wav_header = <<EOH;
  a4	# header: 'RIFF'
  V	# size: Size of what follows
  a4	# type: 'WAVE'

  a4	# type1: 'fmt ' subchunk
  V	# size1: Size of the rest of subchunk
  v	# format: 1 for pcm
  v	# channels: 2 stereo 1 mono
  V	# frequency
  V	# bytes_per_sec
  v	# bytes_per_sample
  v	# bits_per_sample_channel

  a4	# type2: 'data' subchunk
  V	# sizedata: Size of the rest of subchunk
EOH

my @wav_fields = ($wav_header =~ /^\s*\w+\s*#\s*(\w+)/mg);

$wav_header =~ s/#.*//g;		# For v5.005

my $header_size = length pack $wav_header, (0) x 20;
sub MY_INF () {1e200}

sub wav_eat_header ($) {
  my $fh = shift;
  my $in;
  my $read = sysread $fh, $in, $header_size or die "can't read the header";
  return {buf => $in} unless $read == $header_size;
  my %vals;
  @vals{@wav_fields} = unpack $wav_header, $in or return {buf => $in};
  return {buf => $in} unless $vals{header} eq 'RIFF';
  die "Unexpected RIFF format"
    unless $vals{type} eq 'WAVE' and $vals{type1} eq 'fmt '
      and $vals{size1} == 0x10 and $vals{format} == 1
	and $vals{bits_per_sample_channel} == 16 and $vals{format} == 1
	  and $vals{type2} eq 'data';
  $vals{buf} = $in;
  return \%vals;
}

sub SOUND () {2}		# Constants... Rarely promoted or demoted
sub SIGNAL () {1}		# May be promoted or demoted
sub NOISE () {0}		# Likewise
sub SILENCE () {-1}		# Rarely promoted or demoted

sub merge_blocks ($) {		# array ref: 0: type, 1: start, 2: len
  my $blocks = shift;
  my $c = 0;
  my @new;
  for my $b (@$blocks) {
    push(@new, [@$b]), next if not @new or $b->[0] != $new[-1][0];
    $new[-1][2] += $b->[2];
  }
  \@new
}

my %defaults = (
  # For getting PCM flow (and if averaging data is read from cache)
    frequency => 44100,
    bytes_per_sample => 4,
    channels => 2,
    sizedata => MY_INF,
    out_fh => \*STDOUT,
    preprocess => {mp3 => [[qw(lame --silent --decode)], [], ['-']]}, # Second contains extra args to read stdin
  # For getting RMS info
    sec_per_chunk => 0.1,
  # RMS cache
    rms_extension => '.rms',
  # For threshold calculation
    threshold_in_sorted_min_rel => 0,
    threshold_in_sorted_min_sec => 1,
    threshold_in_sorted_max_rel => 0.5,
    threshold_in_sorted_max_sec => 0,
    threshold_ratio => 0.15,
    threshold_factor_min => 1,
    threshold_factor_max => 1,
  # Chunkification: smoothification
    above_thres_window => 11,
    above_thres_window_rel => 0.25,
  # Chunkification
    max_tracks => 9999,
    min_signal_sec => 5,
    min_silence_sec => 2,
    ignore_signal_sec => 1,
  # Final enlargement
    local_level_ignore_pre_sec => 0.3,
    local_level_ignore_post_sec => 0.3,
    local_level_ignore_pre_rel => 0.02,
    local_level_ignore_post_rel => 0.02,
    local_threshold_factor => 1.05,
    extend_track_end_sec => 0.5,
    extend_track_begin_sec => 0.3,
    min_boundary_silence_sec => 0.2,
  );

my %mirror_from = (	# May be set separately, otherwise are synonims
    min_actual_silence_sec => 'min_silence_sec',
    min_start_silence_sec => 'min_boundary_silence_sec',
    min_end_silence_sec => 'min_boundary_silence_sec',
    cache_rms_write => 'cache_rms',
    cache_rms_read => 'cache_rms',
    min_silence_chunks_merge => 'min_silence_chunks',
  );

my %chunk_times =
  map {	(my $n = $_) =~ s/_sec/_chunks/;
	($n => {'filter'
		=> [sub {rnd(shift()/shift)}, $_, 'sec_per_chunk']}) }
    grep /_sec$/, keys %defaults, keys %mirror_from;

my @recognized =	# these default to undef, but accessing them is not fatal
  qw(filename stem_strip_extension filter raw_pcm rms_filename close_fh
     override_header_info cache_rms subchunk_size skip_medians);

my %filters = (
 # For getting RMS info
  filestem => [sub { my $f = shift;
		     return 'filehandle' unless defined $f;
		     $f =~ s/\.(\w+)$// if shift;
		     $f }, 'filename', 'stem_strip_extension'],
  input_type => [sub {	return unless defined (my $f = shift);
			return unless $f =~ /\.(\w+)$/;
			my $h = shift;
			return lc $1 if not $h->{$1} and $h->{lc $1};
			$1 }, 'filename', 'preprocess'],
  preprocess_a => [sub {return unless defined $_[0];
			$_[1]->{$_[0]} }, 'input_type', 'preprocess'],
  preprocess_input => [sub { my ($cmd, $f) = @_; return unless $cmd;
			     return [@{$cmd->[0]}, $f, @{$cmd->[2]}]
				if defined $f;
			     return [@{$cmd->[0]}, @{$cmd->[1]}, @{$cmd->[2]}];
		       }, 'preprocess_a', 'filename'],
  fh_bin => [sub { my $fh = shift; binmode $fh; $fh }, 'fh'],
  out_fh_bin => [sub {	return unless shift;
			my $fh = shift; binmode $fh; $fh
		 }, 'filter', 'out_fh'],
  rms_filename_default => [sub {shift() . shift}, 'filestem', 'rms_extension'],
  read_from_rms_file => [sub {	return if shift; # Need output stream, not only RMS
				shift or defined shift
			 }, 'filter', 'cache_rms_read', 'rms_filename'],
  write_to_rms_file => [sub {shift or defined shift},
			'cache_rms_write', 'rms_filename'],
  rms_filename_actual => [sub {my $f = shift; return $f if defined $f; shift},
			  'rms_filename', 'rms_filename_default'],
  samples_per_chunk => [sub {rnd(shift()*shift)}, 'sec_per_chunk', 'frequency'],
  bytes_per_chunk   => [sub {shift()*shift}, 'samples_per_chunk', 'bytes_per_sample'],
  rms_data_arr_f => [sub {return unless shift;
			  local *RMS; open RMS, '< ' . shift or return;	# No file is OK
			  binmode *RMS;
			  my $c = -s \*RMS;
			  my @in;
			  26 == sysread RMS, $in[0], 26 or die "Short read on RMS";
			  $in[0] =~ /^GramoFile Binary RMS Data\n/i
			      or die "Unknown format of RMS file";
			  $c - 26 == sysread RMS, $in[0], $c - 26 or die "Short read on RMS";
			  push @in, unpack "${long}2", substr $in[0], 0, 2*$long_size;
			  substr($in[0], 0, 2*$long_size) = '';
			  die "Malformed length of RMS file"	# sam/chunk, chunks
			      unless $in[2] * $double_size == length $in[0];
			  my $sam = shift;
			  die "Samples per chunk mismatch: RMSfile => $in[1], expected => $sam"	# sam/chunk, chunks
			      unless $in[1] == $sam;
			  \@in }, 'read_from_rms_file', 'rms_filename_actual',
				  'samples_per_chunk'],
 # For threshold calculation
  medians => [sub { my $av = shift; my @r = $av->[0];	# Allocate the buffer
		    double_median3($av->[0], $r[0], shift) unless shift;
		    \@r }, 'rms_data', 'skip_medians', 'chunks'],
  sorted => [sub { my $av = shift; my @r = $av->[0];	# Allocate the buffer
		   double_sort($av->[0], $r[0], shift);
		   \@r }, 'medians', 'chunks'],
  map(("threshold_in_sorted_$_" =>
	 [sub {	my ($c, $r) = shift; $r = $c*shift() + shift() - 1;
		$r = $c - 1 unless $r < $c - 1;
		$r = 0 unless $r > 0; $r
	 }, 'chunks', "threshold_in_sorted_${_}_rel", "threshold_in_sorted_${_}_chunks"],
       "threshold_$_" =>
	 [sub { shift() *
		sqrt unpack 'd', 
		    substr shift->[0], $double_size * rnd(shift), $double_size
	  }, "threshold_factor_$_", 'sorted', "threshold_in_sorted_$_"]),
      'max', 'min'),
  threshold => [sub { my $min = shift; shift() * (shift()-$min) + $min
		}, 'threshold_min', 'threshold_ratio', 'threshold_max'],
 # Chunkification: smoothification
  above_thres => [sub {	my $c = shift; my @r = 'x' x ($int_size * $c); # Reserve space
			double_find_above(shift->[0], $r[0], $c, shift()**2);
			\@r }, 'chunks', 'rms_data', 'threshold'],
  above_thres_in_window => [sub { my $a = shift; my @r = $a->[0];  # Reserve space
				  int_sum_window($a->[0], $r[0], shift, shift);
			    \@r}, 'above_thres', 'chunks', 'above_thres_window'],
  above_thres_window_abs => [sub {shift()*shift},
			     'above_thres_window_rel', 'above_thres_window'],
  maybe_signal => [sub { my $a = shift; my @r = $a->[0]; # Reserve space
			 int_find_above($a->[0], $r[0], shift, shift); \@r
		   }, 'above_thres_in_window', 'chunks', 'above_thres_window_abs'],
 # Chunkification
  maybe_trk_pk => [sub { my $max = shift; my @r = 'x' x (3*$long_size*$max); # Reserve space
			 my $c = bool_find_runs(shift->[0], $r[0], shift, $max);
			 die "Max count $max of track candidates exceeded"
			    unless $c >= 0;
			 substr($r[0], 3*$long_size*$c) = '';	# Truncate
		         \@r }, 'max_tracks', 'maybe_signal', 'chunks'],
 # Unpack
  b0 => [sub {	my ($c, @b) = -1; my $tracks = shift->[0];
		my $cnt = length($tracks)/(3*$long_size);
		my @bl = unpack $long.(3*$cnt), $tracks;
		while (++$c < $cnt) { # [SIGNAL/NOISE, start, len]
		    push @b, [@bl[3*$c, 3*$c + 1, 3*$c + 2]];
		} return [@b] }, 'maybe_trk_pk'],
 # "Force" long enough blocks
  b1 => [sub {	my @b = map [@$_], @{shift()};	# Deep copy
		my ($min_sign, $min_sil) = (shift, shift);
		for my $t (@b) {
		      $t->[0] = SOUND, next
			if $t->[0] == SIGNAL  and $t->[2] >= $min_sign;
		      $t->[0] = SILENCE, next
			if $t->[0] == NOISE and $t->[2] >= $min_sil;
		}
		# Force silence if it happens at boundary:
		$b[$_]->[0] == NOISE and $b[$_]->[0] = SILENCE
		  for 0, -1;
		\@b }, 'b0', 'min_signal_chunks', 'min_silence_chunks'],
 # Ignore short bursts of signals (may be reversed later)
  b2 => [sub {	my @b = map [@$_], @{shift()};	# Deep copy
		my ($c, $ign_sign) = (0, shift);
		while (++$c < @b - 1) { # XXXX What about those with SILENCE?
		  $b[$c]->[0] = NOISE
		    if $b[$c]->[0] == SIGNAL and $b[$c]->[2] <= $ign_sign
		      and $b[$c-1]->[0] == NOISE and $b[$c+1]->[0] == NOISE
		}		# After ignoring, need to merge similar blocks
		merge_blocks \@b }, 'b1', 'ignore_signal_chunks'],
 # Long enough silence block could appear after b1 ==> b2...
  b3 => [sub {	my @b = map [@$_], @{shift()};	# Deep copy
		my $min_sil_mrg = shift;
		for my $t (@b) {
		  $t->[0] = SILENCE, next
		    if $t->[0] == NOISE and $t->[2] >= $min_sil_mrg;
		}		# Need to merge similar blocks???
		merge_blocks \@b }, 'b2', 'min_silence_chunks_merge'],
 # All undecided are signal unless between two silence intervals
  b4 => [sub {	my @b = map [@$_], @{shift()};	# Deep copy
		my ($left, $c) = (SILENCE, -1);
		while (++$c < @b) {
		  my $this = $b[$c][0];
		  $left = $this, next if $this == SILENCE or $this == SOUND;
		  # Found undecided, force to SOUND unless between two SILENCE
		  $b[$c][0] = SOUND, next if $left == SOUND;
		  # $left is SILENCE, need to check the right one...
		  my ($right, $cr) = (SILENCE, $c);
		  while (++$cr < @b) {
		    my $r = $b[$cr][0];
		    $right = $r, last if $r == SILENCE or $r == SOUND;
		  }
		  $b[$c++][0] = $right while $c < $cr;
		  $left = $right;
		}		# After ignoring, need to merge similar blocks
		merge_blocks \@b }, 'b3'],
 # Final enlargement of signal
  b => [sub {	my @b = map [@$_], @{shift()};	# Deep copy
		my ($ign_pre, $ign_pre_rel, $ign_post, $ign_post_rel) = (shift, shift, shift, shift);
		my ($meds, $thres_factor) = (shift, shift);
		my ($ext_beg, $ext_end) = (shift, shift);
		my ($min_silence, $min_silence_s, $min_silence_e) = (shift, shift, shift);
		my $c = -1;
		for my $b (@b) {
		  ++$c;
		  next unless $b->[0] == SILENCE;
		  my $pre  = rnd($ign_pre  + $ign_pre_rel  * $b->[2]);
		  my $post = rnd($ign_post + $ign_post_rel * $b->[2]);
		  my $ilen = $pre + $post;
		  next unless $b->[2] > $ilen;
		  my $s = $b->[1];
		  my $av = double_sum( $meds->[0], $s + $pre, $b->[2] - $ilen ) / ($b->[2] - $ilen);
		  $av *= $thres_factor*$thres_factor;

		  my $e = $s + $b->[2];
		  if ($c) {		# Not for the "leading gap"
		    while ($s < $e) {
		      my $lev = unpack 'd',
			substr $meds->[0], $s*$double_size, $double_size;
		      last if $lev <= $av;
		      $s++;
		    }
		    my $add = $e - $s;
		    $add = $ext_end if $add > $ext_end;
		    $s += $add;
		    $b[$c-1]->[2] += $s - $b->[1];
		    $b->[2] -= $s - $b->[1];
		    $b->[1] += $s - $b->[1];
		  }
		  if ($c != @b-1) {
		    my $e_ini = $e;
		    while ($s < $e) {
		      my $lev = unpack 'd',
			substr $meds->[0], ($e-1)*$double_size, $double_size;
		      last if $lev <= $av;
		      $e--;
		    }
		    my $add = $e - $s;
		    $add = $ext_beg if $add > $ext_beg;
		    $e -= $add;
		    $b[$c+1]->[2] += $e_ini - $e;
		    $b[$c+1]->[1] -= $e_ini - $e;
		    $b->[2] -= $e_ini - $e;
		  }
		  my $min_sil = ($c == 0 ? $min_silence_s :
				 ($c == $#b ? $min_silence_e : $min_silence));
		  $b->[0] = SOUND if $b->[2] < $min_sil;
		} # After ignoring short silence, need to merge similar blocks
		merge_blocks \@b
	 }, 'b4', 'local_level_ignore_pre_chunks', 'local_level_ignore_pre_rel',
	 'local_level_ignore_post_chunks', 'local_level_ignore_post_rel',
	 'medians', 'local_threshold_factor', 'extend_track_begin_chunks',
	 'extend_track_end_chunks', 'min_actual_silence_chunks',
         'min_start_silence_chunks', 'min_end_silence_chunks'],
  );

my %recipes = (
  map(($_ => {default => $defaults{$_}}), keys %defaults),
  map(($_ => {filter => [sub {shift}, $mirror_from{$_}]}), keys %mirror_from),
  %chunk_times,
  map( ($_ => {default => undef}),
	@recognized),
  map(($_ => {filter => $filters{$_}}), keys %filters),
  map(($_ => {prerequisites => ['rms_data']}), 'chunks', 'min', 'max'),
  fh => {self_filter =>
	 [sub {	my ($self, $cmd) = (shift, shift); local *FH;
		if ($cmd) { $cmd = '"' . join('" "', @$cmd) . '"';
		    open FH, "$cmd |" or die "pipe open($cmd) error: $!";
		} else {
		    my $filename = shift;
		    return \*STDIN unless defined $filename;
		    open FH, "< $filename" or die "open($filename) error: $!";
		}
		$self->set(close_fh => 1) unless $self->already_set('close_fh');
		return *FH }, 'preprocess_input', 'filename']},
  rms_data => { oo_output => sub {
		    my $s = shift;
		    my $d = $s->get('rms_data_arr_f');
		    if (defined $d) {
			$s->set(chunks => $d->[2]);
			return $d;
		    }
		    return read_averages($s);
		}},
  );

sub __s_size() {length pack "d2 ${long}2", 0, 0, 0, 0}

sub read_averages ($) {
  my $self = shift;
  my $fh = $self->get('fh_bin');
  my $vals = {};
  $vals = wav_eat_header($fh) unless $self->get('raw_pcm');
  if ($self->get('override_header_info')) {
    for my $k (keys %$vals) {
      $self->set($k => $vals->{$k}) unless $self->already_set($k)
    }
  } else {
    for my $k (keys %$vals) {
      $self->set($k => $vals->{$k})
    }
  }
  my $out_fh = $self->get('out_fh_bin');
  my $buf = $vals->{buf};
  syswrite $out_fh, $buf or die "Error duping output: $!"
    if $out_fh and $vals->{header};	# in PCM mode we write later
  my $off = ($vals->{header} ? 0 : length $buf);
  my @stats = (pack "d2 ${long}2", 0, 0, $long_max, $long_min) x $self->get('channels');

  my $read = $self->get('bytes_per_chunk') - $off;
  my $rem = $self->get('sizedata');
  $rem = MY_INF if $rem == 0x7fffffff;		# Lame puts this sometimes...
  defined (my $cnt = read $fh, $buf, $read, $off)
    or die "Error reading the first chunk: $!";
  syswrite $out_fh, $buf or die "Error duping output: $!"
    if $out_fh;
  $rem -= $cnt;
  die "short read" unless $rem <= 0 or $rem == MY_INF or $cnt == $read;
  my @d = '';
  my ($c, $b_p_s, $channels, $subchunk, $b_p_c) =
    (0, map $self->get($_), qw(bytes_per_sample channels subchunk_size bytes_per_chunk));
  while (1) {
    my $p = le_short_sample_multichannel($b_p_s, 2, $channels, \@stats,
					 $subchunk, $buf)  or last;
    my $max_level = 0;
    for my $s (@stats) {	# Take maximum per channel
      my $level = unpack 'd', $s;
      $max_level = $level if $max_level < $level;
      substr($s, 0, 2*$double_size) = pack 'd2', 0, 0; # Reset per-chunk sums
    }
    $d[0] .= pack 'd', $max_level / $p;
    $c++;
    #warn "avg = ", $sum_square / $p / @stats;
    last unless $rem;
    defined ($cnt = read $fh, $buf, $b_p_c)
      or die "Error reading: $!";
    $rem -= $cnt;
    die "short read: rem=$rem, cnt=$cnt, b_p_c=$b_p_c" unless $rem <= 0 or $rem == MY_INF or $cnt == $b_p_c;
    syswrite $out_fh, $buf or die "Error duping output: $!"
      if $cnt and $out_fh;
    last unless $cnt;
  }
  close $fh or die "Error closing input: $!" if $self->get('close_fh');
  $self->set(chunks => $c);
  $c = 0;
  my (@min, @max);
  for my $s (@stats) {	# Take maximum per channel
    (undef, undef, my $min, my $max) = unpack "d2 ${long}2", $s;
    $min[$c] = $min;
    $max[$c++] = $max;
  }
  $self->set(min => \@min);
  $self->set(max => \@max);
  if ($self->get('write_to_rms_file')) {
    local *RMS;
    local $\ = '';
    my $f = $self->get('rms_filename_actual');
    open RMS, "> $f"
      or die "Can't open RMS file `$f' for write: $!";
    binmode RMS;
    print RMS "GramoFile Binary RMS Data\n";
    print RMS pack "${long}2", map $self->get($_), qw(samples_per_chunk chunks);
    print RMS $d[0];
    close RMS or die "closing RMS file `$f' for write: $!";
  }
  #print "lev=$_" for map sqrt, unpack 'd*', $opts->{avgs};
  push @d, $self->get('samples_per_chunk'), $c;
  \@d
}

sub format_hms ($) {
  my $t = shift;
  my $h = int($t/3600);
  my $m = int(($t - 3600*$h)/60);
  my $s = $t - 3600*$h - $m*60;
  $s = ($h || $m) ? (sprintf '%04.1f', $s) : sprintf '%3.1f', $s;
  $m = $h ? (sprintf '%02dm', $m) : ( $m ? "${m}m" : '');
  $h = $h ? "${h}h" : '';
  "$h$m$s"
}

my @represent = ('', ':', '>');

sub output_level ($$;$) {
  my ($n, $d, $l) = (shift, shift, shift);
  my $db = 10*log(($l * 2)/(1<<30))/log(10); # Max amplitude sine wave = 0db
  my $l2 = sqrt($l);
  $db = sprintf "%.0f", $db;
  my $s = '#' x (($db+96)/3) . $represent[$db % 3];
  printf "%6d:%11s:%7.1f=%4.0fdB: %s\n", $n, format_hms($n*$d), sqrt($l), $db, $s;
}

sub output_levels ($;$) {
  my ($self, $what) = (shift, shift);
  local $\ = "";
  $what ||= 'rms_data';			# 1-element array with a 'd'-packed elt
  my ($opts,$o) = {};
  for $o ($what, qw(frequency bytes_per_sample channels sec_per_chunk
		    bytes_per_chunk)) {
    $opts->{$o} = $self->get($o);
  }
  for $o (qw(min max)) {	# Not available from RMS cache
    eval { $opts->{$o} = $self->get($o) };
  }
  print <<EOP;
Frequency: $opts->{frequency}.  Stride: $opts->{bytes_per_sample}; $opts->{channels} channels.
Chunk=$opts->{sec_per_chunk}sec=$opts->{bytes_per_chunk}bytes.
EOP
  for my $c (0..$opts->{channels}-1) {
    next unless $opts->{min};
    print "\t" if $c;
    my @l = map $opts->{$_}[$c], 'min', 'max';
    my @db = map 20*log(abs($_)/(1<<15))/log(10), @l;
    printf "ch%d: %.1f .. %.1f (%.0fdB;%.0fdB).", $c, @l, @db;
  }
  print "\n";
  my $n = 0;
  output_level($n++, $opts->{sec_per_chunk}, $_) for unpack 'd*', $opts->{$what}[0];
  $self;
}

sub output_blocks ($;$) {
  my $self = shift;
  my $opts = shift;
  my $type = 'b';
  local $\ = "";
  if ($opts and not ref $opts) {
    $type = $opts;
    $opts = {};
  }
  $opts ||= {};
  my %opts = (format => 'long', %$opts);
  my $blocks = $self->get(shift || $type);
  my $l = $self->get('sec_per_chunk');
  printf "# threshold: %s (in %s .. %s)\n",
    map $self->get($_),	qw(threshold threshold_min threshold_max)
      if $opts{format} eq 'long';
  my ($gap, $c, $b) = (0, 0);
  for $b (@$blocks) {
    $gap = $b->[2] * $l, next if $b->[0] < 0;
    printf("%s\t=%s\t# %s len=%s\n",
	   $b->[1] * $l, ($b->[1] + $b->[2]) * $l, ++$c, $b->[2] * $l), next
	if $opts{format} eq 'short';
    printf "%s\t=%s\t# n=%s duration %s; gap %s (%s .. %s; %s)\n",
      $b->[1] * $l, ($b->[1] + $b->[2]) * $l, ++$c,
      $b->[2] * $l, $gap,
	format_hms($b->[1] * $l), format_hms(($b->[1] + $b->[2]) * $l), format_hms($b->[2] * $l);
  }
}

my $splitter_loaded;

sub split_file ($;$$) {
  my ($self, $opt) = (shift, shift);
  my $blocks = $self->get(shift || 'b');
  my $t = $self->get('input_type');
  die "Only MP3 split supported" unless $t and $t eq 'mp3';
  my $l = $self->get('sec_per_chunk');
  my @req = map [$_->[1] * $l, $_->[2] * $l], grep $_->[0] > 0, @$blocks
    or return;
  require MP3::Splitter;
  die "MP3::Splitter v0.02 required"
    if !$splitter_loaded++ and 0.02 > MP3::Splitter->VERSION;
  MP3::Splitter::mp3split($self->get('filename'), $opt || {}, @req);
  $self;
}

sub new {
  my $class = shift;
  my $s = new Data::Flow \%recipes;
  $s->set(@_);
  bless \$s, $class;
}
sub set ($$$) { ${$_[0]}->set($_[1],$_[2]); $_[0] }
sub get ($$)  { ${$_[0]}->get($_[1]) }

my @exchange = qw(chunks rms_data medians sorted channels min max
		  frequency bytes_per_sample sec_per_chunk bytes_per_chunk);

sub get_rmsinfo ($)  {
  my $i = ${$_[0]};
  map $i->get($_), @exchange;
}

sub set_rmsinfo ($@)  {
  my ($self, %h) = shift;
  @h{@exchange} = @_;
  map $$self->set($_, $h{$_}), @exchange;
  $self
}

1;
__END__

=head1 NAME

Audio::FindChunks - breaks audio files into sound/silence parts.

=head1 SYNOPSIS

  use Audio::FindChunks;

  # Duplicate input to output, caching RMS values to a file (as a side effect)
  Audio::FindChunks->new(rms_filename => 'x.rms', filter => 1)->get('rms_data');

  # Output human-readable info, using RMS cache file 'xxx.rms' if present:
  Audio::FindChunks->new(cache_rms => 1, filename => 'xxx.mp3',
			 stem_strip_extension => 1)->output_blocks();

  # Remove start/end silence (if longer than 0.2sec):
  Audio::FindChunks->new(cache_rms => 1, filename => 'xxx.mp3',
			 min_actual_silence_sec => 1e100)->split_file();

  # Split a multiple-sides tape recording
  Audio::FindChunks->new(filename => 'xxx.mp3', min_actual_silence_sec => 11
			)->split_file({verbose => 1});

  # Output the RMS levels of small interval in human-readable form
  Audio::FindChunks->new(filename => 'xxx.mp3')->output_levels();

=head1 DESCRIPTION

Audio sequence is broken into parts which contain only noise ("gaps"),
and parts with usable signal ("tracks").

The following configuration settings (and defaults) are supported:

  # For getting PCM flow (and if averaging data is read from cache)
    frequency => 44100,		# If 'raw_pcm' or 'override_header_info' only
    bytes_per_sample => 4,	# likewise
    channels => 2,		# likewise
    sizedata => MY_INF,		# likewise (how many bytes of PCM to read)
    out_fh => \*STDOUT,		# mirror WAV/PCM to this FH if 'filter'
  # Process non-WAV data:
    preprocess => {mp3 => [[qw(lame --silent --decode)], [], ['-']]}, # Second contains extra args to read stdin
  # RMS cache (used if 'valid_rms')
    rms_extension => '.rms',	# Appended to the 'filestem'
  # Averaging to RMS info
    sec_per_chunk => 0.1,	# The window for taking mean square
  # thresholds picking from the list of sorted 3-medians of RMS data
    threshold_in_sorted_min_rel => 0,	 # relative position of 'threashold_min' 
    threshold_in_sorted_min_sec => 1,	 # shifted by this amount in the list
    threshold_factor_min => 1,		 # the list elt is multiplied by this
    threshold_in_sorted_max_rel => 0.5,  # likewise
    threshold_in_sorted_max_sec => 0,	 # likewise
    threshold_factor_max => 1,  	 # likewise
    threshold_ratio => 0.15,		 # relative position between min/max
  # Chunkification: smoothification
    above_thres_window => 11,		 # in units of chunks
    above_thres_window_rel => 0.25, 	 # fractions of chunks above threshold
					 # in a window to make chunk signal
  # Splitting into runs of signal/noise
    max_tracks => 9999,			 # fail if more signal/noise runs
    min_signal_sec => 5,		 # such runs of signal are forced
    min_silence_sec => 2,		 # likewise
    ignore_signal_sec => 1,		 # short runs of signal are ignored
    min_silence_chunks_merge (see below) # and long resulting runs of silence
					 # are forced
  # Calculate average signal in an interval "deeply inside" silence runs
    local_level_ignore_pre_sec => 0.3,	 # offset the start of this interval
    local_level_ignore_pre_rel => 0.02,  # additional relative offset
    local_level_ignore_post_sec => 0.3,  # likewise for end of the interval
    local_level_ignore_post_rel => 0.02, # likewise
  # Enlargement of signal runs: attach consequent chunks with signal this much
  # above this average over the neighbour silence run
    local_threshold_factor => 1.05,
  # Final enlargement of runs of signal
    extend_track_end_sec => 0.5,	 # Unconditional enlargement
    extend_track_begin_sec => 0.3,	 # likewise
    min_boundary_silence_sec => 0.2,	 # Ignore short silence at start/end

Note that C<above_thres_window> is the only value specified directly in
units of chunks; the other C<*_sec> may be optionally specified in units
of chunks by setting the corresponding C<*_chunks> value.  Note also that
this window should better be decreased if minimal allowed silence length
parameters are decreased.

These values are mirrored from other values if not explicitly specified:

 min_actual_silence_sec << min_silence_sec		# Ignore short gaps
 min_start_silence_sec  << min_boundary_silence_sec	# Same at start
 min_end_silence_sec    << min_boundary_silence_sec	# Same at end
 min_silence_chunks_merge << min_silence_chunks		# See above

 cache_rms_write <<< cache_rms	  # Boolean: write RMS cache
 cache_rms_read  <<< cache_rms	  # Boolean: read RMS cache (unless 'filter')

The following values default to C<undef>:

    filename			# if undef, read data from STDIN
    stem_strip_extension	# Boolean: 'filestem' has no extension
    filter			# If true, PCM data is mirrored to out_fh
    rms_filename		# Specify cache file explicitly
    raw_pcm			# The input has no WAV header
    override_header_info	# The user specified values override WAV header
    cache_rms			# Use cache file (see *_write, *_read above)
    skip_medians		# Boolean: do not calculate 3-medians
    subchunk_size		# Optimization of calculation of RMS; the
				# best value depends on the processor cache

=head1 METHODS

=over

=item C<new(key1 =E<gt> value1, key2 =E<gt> value2, ....)>

The arguments form a hash of configuration parameters.

=item C<set(key =E<gt> value)>

set a configuration parameter.

=item C<get(key)>

get a configuration parameter or a value which may be calculated basing on
them.

=item C<output_levels([key])>

prints a human-readable display of RMS (or similar) values.  Defaults to
C<rms_data>; additional possible values are C<medians> and C<sorted>.

The format of the output data is similar to

  Frequency: 44100.  Stride: 4; 2 channels.
  Chunk=0.1sec=17640bytes.
  ch0: -9999.0 .. 9999.0 (-10dB;-10dB).	ch1: -9999.0 .. 9999.0 (-10dB;-10dB).
       0:        0.0:   20.7= -61dB: ###########>
       1:        0.1:   20.7= -61dB: ###########>
       2:        0.2:   20.7= -61dB: ###########>
  ...

(with the C<ch0 ETC> line empty if data is read from an RMS file).  Each
chunk gives a line with the chunk number, start (in sec), RMS intensity
(in linear scale and in decibel), and the graphical representation of the
decibel level (each C<#> counts as 3dB, C<:> adds 1dB, and C<E<gt>>
adds 2dB).

=item C<output_blocks([option_hashref], [key])>

prints a human-readable display of obtained audio chunks.  C<key> defaults to
C<b>; additional possible values are C<b0> to C<b4>.  Recognized options key
is C<format>; defaults to C<long>, which results in windy output; the value
C<short> results in shorter output and no preamble.  Preamble lines are all
C<#>-commented; any output line is in the form

  START_SEC =END_SEC # COMMENT

With C<short> format there is no preamble, and (currently) C<COMMENT> is of
the form C<PIECE_NUMBER len=PIECE_DURATION_SEC>.  These formats are
recognized, e.g., by MP3::Split::mp3split_read().

The default format is currently

  # threshold: 1078.46653890971 (in 20.7214163971884 .. 7072.35556648067)
  4.4	=25.8	# n=1 duration 21.4; gap 4.4 (4.4 .. 25.8; 21.4)
  27.7	=67	# n=2 duration 39.3; gap 1.9 (27.7 .. 1m07.0; 39.3)

=item C<split_file([options], [key])>

Splits the file (only MP3 via L<MP3::Splitter> is supported now).  The
meaning of options is the same as for L<MP3::Splitter>.  Defaults to
blocks of type C<b>; additional possible values are C<b0> to C<b4>.

=item @vals = get_rmsinfo(); set_rmsinfo(@vals)

Duplicate RMS info between two different C<Audio::FindChunks> objects.
The exchanged info is the following:

    chunks rms_data medians sorted channels min max
    frequency bytes_per_sample sec_per_chunk bytes_per_chunk

set_rmsinfo() returns the object itself.

=back

=head1 set() and get()

=head2 In and Out

The functionality of the module is modelled on the architecture of
L<Data::Flow>: the two principal methods are C<set(key =E<gt> value)>
and C<get(key)>; the module knows how to calculate keys basing on values of
other keys.

The results of calculation are cached; in particular, if one needs to calculate
some value for different values of a configuration parameter, one should
create many copies of C<Audio::FindChunks> object, as in

  my @info = Audio::FindChunks->new(filename => $f)->get_rmsinfo;
  for my $ratio (0..100) {
    Audio::FindChunks->new(threshold_ratio => $r/100)
	->set_rmsinfo(@info)->print_blocks();
  }

The internally used format of intermediate data is designed for quick shallow
copying even for enourmous audio files.

=head2 Dependencies

The current dependecies for values which are not explicitly set():

  filestem		<<< filename stem_strip_extension
  input_type		<<< filename
  preprocess_a		<<< input_type preprocess
  preprocess_input 	<<< preprocess_a filename
  fh AND close_fh	<<< preprocess_input filename
  fh_bin		<<< fh
  out_fh_bin		<<< filter out_fh
  rms_filename_default	<<< filestem rms_extension
  read_from_rms_file	<<< filter cache_rms_read rms_filename
  write_to_rms_file	<<< cache_rms_write rms_filename
  rms_filename_actual	<<< rms_filename rms_filename_default
  samples_per_chunk	<<< sec_per_chunk frequency
  bytes_per_chunk	<<< samples_per_chunk bytes_per_sample
  rms_data_arr_f	<<< read_from_rms_file rms_filename_actual
				samples_per_chunk
  rms_data AND chunks	<<< rms_data_arr_f OR A LOT OF OTHER PARAMETERS
  medians		<<< rms_data skip_medians chunks
  sorted		<<< medians chunks,
  threshold_in_sorted_* <<< chunks threshold_in_sorted_*_*
  threshold_min/max	<<< threshold_factor_* sorted threshold_in_sorted_min/max
  threshold		<<< threshold_min threshold_ratio threshold_max
  above_thres		<<< chunks rms_data threshold
  above_thres_in_window	<<< above_thres chunks above_thres_window
  above_thres_window_abs<<< above_thres_window_rel above_thres_window
  maybe_signal		<<< above_thres_in_window chunks above_thres_window_abs
  maybe_trk_pk		<<< max_tracks maybe_signal chunks
  b0			<<< maybe_trk_pk
  b1			<<< b0 min_signal_chunks min_silence_chunks
  b2			<<< b1 ignore_signal_chunks
  b3			<<< b2 min_silence_chunks_merge
  b4			<<< b3
  b			<<< b4 local_level_ignore_*
				medians local_threshold_factor
				extend_track_begin_chunks
				extend_track_end_chunks
				min_actual_silence_chunks
				min_start_silence_chunks min_end_silence_chunks

If C<rms_data> is not read from cached source, a lot of other fields may
be also set from the WAV header (unless C<raw_pcm>).

=head3 Formats

Potentially large internally-cached values are stored as array references
to decrease the overhead of shallow copying.

The data which relates to
the initial chunks (of size C<sec_per_chunk>) is stored as length 1 arrays
with packed (either by C<l*> or C<d*>, depending on the semantic) data; this
allows small memory footprint work with huge audio files, and allows
an easy implemenation of most computationally intensive work in C.

The blocks of audio/signal/noise/silence are stored as Perl arrays; each
element is a reference to an array of length 3: type (-1 for silence, 0
for noise, 1 for signal, and 2 for audio), start chunks, duration in chunks.

=head1 ALGORITHM

The algorithm for finding boundaries of parts follows closely the algorithm
used by GramoFile v1.7 (however, I<this> version is I<fully> customizable,
fully documented, and has some significant bugs fixed).  The keywords in the
discussion below refer to customization parameters; keywords of the form
C<E<gt>E<gt>E<gt>key> refer to C<get()>able values set on the step in
question.

=over

=item Smooth the input

This is done in 2 distinct steps:

Break the input into chunks of equal duration (governed by C<sec_per_chunk>);
find the acoustic energy of each channel per chunk (no customization);
energy is the quadratic average of signal level; calculate maximal
energy among channels per chunk (no customization; C<E<gt>E<gt>E<gt>rms_data>).

Trim "extremal" chunks by replacing the energy level of each chunk by
the median of it and its two neighbors (switched off if C<skip_medians>;
C<E<gt>E<gt>E<gt>medians>).

=item Calculate the signal/noise threshold

basing on the distribution (C<E<gt>E<gt>E<gt>sorted>) of smoothed values.
Governed by C<threshold_*> parameters.  C<E<gt>E<gt>E<gt>threshold_min>,
C<E<gt>E<gt>E<gt>threshold_max>, C<E<gt>E<gt>E<gt>threshold>.

=item Smooth it again

Separate into I<signal> and I<noise> chunks basing on the number of
above-threshold chunks in a small window about the given chunk.  Governed by
C<above_thres_window>, C<above_thres_window_rel>.  C<E<gt>E<gt>E<gt>maybe_signal>,
C<E<gt>E<gt>E<gt>b0>.

=item Find certain intervals of sound and silence

Long enough runs of signal chunks are proclaimed carrying sound; likewise
for noise chunks and silence.  Governed by C<max_tracks>, C<min_signal_chunks>,
C<min_silence_chunks>.  C<E<gt>E<gt>E<gt>b1>.

Long enough "unproclaimed" runs of chunks with only short bursts of
signal are proclaimed silence.  Governed by C<ignore_signal_chunks>,
C<E<gt>E<gt>E<gt>b2>; and C<min_silence_chunks_merge>, C<E<gt>E<gt>E<gt>b3>.

=item Merge undecided into sound/silence

A run of chunks (signal or noise) "yet unproclaimed" to be sound or
silence is proclaimed sound if it is adjacent to a run of sound on at
least one side.  The rest of unproclaimed runs are proclaimed silence.
No customization.

Runs of sound/silence are audio/gap candidates (no customization;
C<E<gt>E<gt>E<gt>b4>).

=item Calculate average signal level in each gap candidate

ignoring short intervals near ends of gaps.  Governed by C<local_level_*>.

=item Allow for slow attack/decay or fade in/out

Extend runs of audio: join the consequent runs of chunks of adjacent gaps
where the energy level
remains significantly larger than the average level in this gap.
Additionally, unconditionally extend the tracks by a small amount.
Governed by C<local_threshold_factor>, C<extend_track_end_chunks>,
C<extend_track_begin_chunks>.

=item Long enough gap candidates are gaps

Gaps which became too short are considered audio and are merged into
neighbors.  Governed by C<min_actual_silence_chunks>, C<min_start_silence_chunks>,
C<min_end_silence_chunks>; C<E<gt>E<gt>E<gt>b>.

=back

=head2 Functions implemented in C

  long bool_find_runs(int *input, array_run_t *output, long cnt, long out_cnt)
  void double_find_above(double *input, int *output, long cnt, double threshold)
  void double_median3(double *rmsarray, double *medarray, long total_blocks)
  void double_sort(double *input, double *output, long cnt)
  void int_find_above(int *input, int *output, long cnt, int threshold)
  void int_sum_window(int *input, int *output, long cnt, int window_size)
  void le_short_sample_stats(char *buf, int stride, long samples, array_stats_t *stat)

=head1 SEE ALSO

C<Data::Flow>, C<MP3::Split>

=head1 AUTHOR

Ilya Zakharevich, E<lt>cpan@ilyaz.org<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2004 by Ilya Zakharevich

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.2 or,
at your option, any later version of Perl 5 you may have available.


=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)