The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w

use strict;
use Test;
BEGIN { plan tests => 27, todo => [] };

use AI::Categorizer;
use AI::Categorizer::Document;
use AI::Categorizer::FeatureVector;

ok(1);
my $docclass = 'AI::Categorizer::Document';

# Test empty document creation
{
  my $d = $docclass->new;
  ok ref($d), $docclass, "Basic empty document creation";
  ok $d->features, undef;
}

# Test basic document creation
{
  my $d = $docclass->new(content => "Hello world");
  ok ref($d), $docclass, "Basic document creation with 'content' parameter";
  ok $d->features->includes('hello'), 1;
  ok $d->features->includes('world'), 1;
  ok $d->features->includes('foo'),  '';
}

# Test document creation with 'parse'
{
  require AI::Categorizer::Document::Text;
  my $d = AI::Categorizer::Document::Text->new( parse => "Hello world" );
  ok ref($d), 'AI::Categorizer::Document::Text', "Document creation with 'parse' parameter";
  ok $d->features->includes('hello'), 1;
  ok $d->features->includes('world'), 1;
  ok $d->features->includes('foo'),  '';
}

# Test document creation with 'features'
{
  my $d = $docclass->new(features => AI::Categorizer::FeatureVector->new(features => {one => 1, two => 2}));
  ok ref($d), $docclass, "Document creation with 'features' parameter";
  ok $d->features->value('one'), 1;
  ok $d->features->value('two'), 2;
  ok $d->features->includes('foo'), '';
}
  

# Test some stemming & stopword stuff.
{
  my $d = $docclass->new
    (
     name => 'test',
     stopwords => ['stemmed'],
     stemming => 'porter',
     content  => 'stopword processing should happen after stemming',
     # Becomes qw(stopword process    should happen after stem    )
    );
  ok $d->stopword_behavior, 'stem', "stopword_behavior() is 'stem'";
  
  ok $d->features->includes('stopword'), 1,  "Should include 'stopword'";
  ok $d->features->includes('stemming'), '', "Shouldn't include 'stemming'";
  ok $d->features->includes('stem'),     '', "Shouldn't include 'stem'";
  print "Features: @{[ $d->features->names ]}\n";
}

{
  my $d = $docclass->new
    (
     name => 'test',
     stopwords => ['stemmed'],
     stemming => 'porter',
     stopword_behavior => 'no_stem',
     content  => 'stopword processing should happen after stemming',
     # Becomes qw(stopword process    should happen after stem    )
    );
  ok $d->stopword_behavior, 'no_stem', "stopword_behavior() is 'no_stem'";
  
  ok $d->features->includes('stopword'), 1,  "Should include 'stopword'";
  ok $d->features->includes('stemming'), '', "Shouldn't include 'stemming'";
  ok $d->features->includes('stem'),     1,  "Should include 'stem'";
  print "Features: @{[ $d->features->names ]}\n";
}

{
  my $d = $docclass->new
    (
     name => 'test',
     stopwords => ['stem'],
     stemming => 'porter',
     stopword_behavior => 'pre_stemmed',
     content  => 'stopword processing should happen after stemming',
     # Becomes qw(stopword process    should happen after stem    )
    );
  ok $d->stopword_behavior, 'pre_stemmed', "stopword_behavior() is 'pre_stemmed'";
  
  ok $d->features->includes('stopword'), 1,  "Should include 'stopword'";
  ok $d->features->includes('stemming'), '', "Shouldn't include 'stemming'";
  ok $d->features->includes('stem'),     '', "Shouldn't include 'stem'";
  print "Features: @{[ $d->features->names ]}\n";
}