#!/usr/bin/perl -w
use strict;
use Test;
BEGIN { plan tests => 27, todo => [] };
use AI::Categorizer;
use AI::Categorizer::Document;
use AI::Categorizer::FeatureVector;
ok(1);
my $docclass = 'AI::Categorizer::Document';
# Test empty document creation
{
my $d = $docclass->new;
ok ref($d), $docclass, "Basic empty document creation";
ok $d->features, undef;
}
# Test basic document creation
{
my $d = $docclass->new(content => "Hello world");
ok ref($d), $docclass, "Basic document creation with 'content' parameter";
ok $d->features->includes('hello'), 1;
ok $d->features->includes('world'), 1;
ok $d->features->includes('foo'), '';
}
# Test document creation with 'parse'
{
require AI::Categorizer::Document::Text;
my $d = AI::Categorizer::Document::Text->new( parse => "Hello world" );
ok ref($d), 'AI::Categorizer::Document::Text', "Document creation with 'parse' parameter";
ok $d->features->includes('hello'), 1;
ok $d->features->includes('world'), 1;
ok $d->features->includes('foo'), '';
}
# Test document creation with 'features'
{
my $d = $docclass->new(features => AI::Categorizer::FeatureVector->new(features => {one => 1, two => 2}));
ok ref($d), $docclass, "Document creation with 'features' parameter";
ok $d->features->value('one'), 1;
ok $d->features->value('two'), 2;
ok $d->features->includes('foo'), '';
}
# Test some stemming & stopword stuff.
{
my $d = $docclass->new
(
name => 'test',
stopwords => ['stemmed'],
stemming => 'porter',
content => 'stopword processing should happen after stemming',
# Becomes qw(stopword process should happen after stem )
);
ok $d->stopword_behavior, 'stem', "stopword_behavior() is 'stem'";
ok $d->features->includes('stopword'), 1, "Should include 'stopword'";
ok $d->features->includes('stemming'), '', "Shouldn't include 'stemming'";
ok $d->features->includes('stem'), '', "Shouldn't include 'stem'";
print "Features: @{[ $d->features->names ]}\n";
}
{
my $d = $docclass->new
(
name => 'test',
stopwords => ['stemmed'],
stemming => 'porter',
stopword_behavior => 'no_stem',
content => 'stopword processing should happen after stemming',
# Becomes qw(stopword process should happen after stem )
);
ok $d->stopword_behavior, 'no_stem', "stopword_behavior() is 'no_stem'";
ok $d->features->includes('stopword'), 1, "Should include 'stopword'";
ok $d->features->includes('stemming'), '', "Shouldn't include 'stemming'";
ok $d->features->includes('stem'), 1, "Should include 'stem'";
print "Features: @{[ $d->features->names ]}\n";
}
{
my $d = $docclass->new
(
name => 'test',
stopwords => ['stem'],
stemming => 'porter',
stopword_behavior => 'pre_stemmed',
content => 'stopword processing should happen after stemming',
# Becomes qw(stopword process should happen after stem )
);
ok $d->stopword_behavior, 'pre_stemmed', "stopword_behavior() is 'pre_stemmed'";
ok $d->features->includes('stopword'), 1, "Should include 'stopword'";
ok $d->features->includes('stemming'), '', "Shouldn't include 'stemming'";
ok $d->features->includes('stem'), '', "Shouldn't include 'stem'";
print "Features: @{[ $d->features->names ]}\n";
}