package HTML::ListScraper::Foam;
use warnings;
use strict;
my $epsilon = 0.0001;
sub new {
my ($class, $book) = @_;
my $self = { book => $book, map => { } };
bless $self, $class;
return $self;
}
sub get_sequences {
my $self = shift;
my $os = $self->{max_log_score};
return grep {
$self->{map}->{$_}->log_score + $epsilon >= $os;
} keys %{$self->{map}};
}
sub get_occurence {
my ($self, $seq) = @_;
return $self->{map}->{$seq};
}
sub store {
my ($self, $seq, $occ) = @_;
if (exists($self->{map}->{$seq})) {
die "duplicated sequence";
}
if (!exists($self->{max_log_score})) {
$self->_cond_store($seq, $occ);
return 1;
}
my $s = $occ->log_score;
my $os = $self->{max_log_score};
if ($s + $epsilon < $os) {
return 0;
}
if ($s > $os) {
if ($self->_cond_store($seq, $occ)) {
if ($s > $os + $epsilon) {
$self->_prune_map;
}
}
} else {
$self->_cond_store($seq, $occ);
}
return 1;
}
sub _cond_store {
my ($self, $seq, $occ) = @_;
if (!$self->{book}->is_presentable($occ->first_pos, $occ->len)) {
return 0;
}
$self->{max_log_score} = $occ->log_score;
$self->{map}->{$seq} = $occ;
return 1;
}
sub _prune_map {
my $self = shift;
my $os = $self->{max_log_score};
my @seq = keys %{$self->{map}};
foreach my $seq (@seq) {
my $s = $self->{map}->{$seq}->log_score;
if ($s + $epsilon < $os) {
delete $self->{map}->{$seq};
}
}
}
1;