The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
use v6-alpha;

my %words;

sub load_db returns Void {
    return() unless -e "words.db.pl";
    my $db = open("words.db.pl") err die "Cannot open the words.db.pl file: $!";
    for (=$db) -> $_line {
        my $line = $_line;
        my ($key, $value) = split("\t", $line);
        %words{"$key"} = $value;
    }
    $db.close();
}

sub save_db returns Void {
    my $db = open("words.db.pl", :w) err die "Cannot open the words.db.pl file: $!";
    for (%words.kv) -> $key, $value {
        $db.say($key ~ "\t" ~ $value);
    }
    $db.close();
}

sub parse_file (Str $file) returns Hash {
    my %words_in_file;    
    my $fh = open("$file") err die "Cannot open the '$file' file: $!";
    for (=$fh) -> $_line {
        my $line = $_line;       
        while ($line ~~ s:perl5/(\w+)[ \t\n\r]//) {
            %words_in_file{lc($0)}++;
        }
    }
    $fh.close;
    return %words_in_file;
}

sub add_words (Str $category, %words_in_file) returns Void {
    for (%words_in_file.kv) -> $key, $value {
        %words{"$category-$key"} += $value;
    }    
}

sub classify (%words_in_file) returns Void {

    my %count;
    my $total = 0;
    
    for (%words.kv) -> $key, $value {
        $key ~~ rx:perl5/^(.+)-(.+)$/;
        %count{$0} += $value;
        $total     += $value;
    }

    my %score;
    for (%words_in_file.keys) -> $word  {
        for (%count.kv) -> $category, $count {
            if (defined(%words{"$category-$word"})) {
                %score{$category} += log(%words{"$category-$word"} / $count);
            }
            else {
                %score{$category} += log(0.01 / $count);
            }
        }
    }
   
    for (%count.kv) -> $category, $count {
        %score{$category} += log($count / $total)
    }
    
    # do this weird sort block because: 
    #    %score{$^a} <=> %score{$^b}
    # does not currently work
    for (%count.keys.sort:{ %score{$^a} == %score{$^b} ?? 0 !! %score{$^a} > %score{$^b} ?? -1 !! 1 }) -> $category {
        say("$category %score{$category}");
    }
}

load_db();

if (@*ARGS[0] eq 'add' && +@*ARGS == 3) {
    add_words(@*ARGS[1], parse_file(@*ARGS[2]));
}
elsif (@*ARGS[0] eq 'classify' && +@*ARGS == 2) {
    classify(parse_file(@*ARGS[1]));
}
else {
    say("USAGE:
    add <category> <file>
    classify <file>");
}

save_db();