package AI::Categorizer::Category;
use strict;
use AI::Categorizer::ObjectSet;
use Class::Container;
use base qw(Class::Container);
use Params::Validate qw(:types);
use AI::Categorizer::FeatureVector;
__PACKAGE__->valid_params
(
name => {type => SCALAR, public => 0},
documents => {
type => ARRAYREF,
default => [],
callbacks => { 'all are Document objects' =>
sub { ! grep !UNIVERSAL::isa($_, 'AI::Categorizer::Document'), @_ },
},
public => 0,
},
);
__PACKAGE__->contained_objects
(
features => {
class => 'AI::Categorizer::FeatureVector',
delayed => 1,
},
);
my %REGISTRY = ();
sub new {
my $self = shift()->SUPER::new(@_);
$self->{documents} = new AI::Categorizer::ObjectSet( @{$self->{documents}} );
$REGISTRY{$self->{name}} = $self;
return $self;
}
sub by_name {
my ($class, %args) = @_;
return $REGISTRY{$args{name}} if exists $REGISTRY{$args{name}};
return $class->new(%args);
}
sub name { $_[0]->{name} }
sub documents {
my $d = $_[0]->{documents};
return wantarray ? $d->members : $d->size;
}
sub contains_document {
return $_[0]->{documents}->includes( $_[1] );
}
sub add_document {
my $self = shift;
$self->{documents}->insert( $_[0] );
delete $self->{features}; # Could be more efficient?
}
sub features {
my $self = shift;
if (@_) {
$self->{features} = shift;
}
return $self->{features} if $self->{features};
my $v = $self->create_delayed_object('features');
return $self->{features} = $v unless $self->documents;
foreach my $document ($self->documents) {
$v->add( $document->features );
}
return $self->{features} = $v;
}
1;
__END__
=head1 NAME
AI::Categorizer::Category - A named category of documents
=head1 SYNOPSIS
my $category = AI::Categorizer::Category->by_name("sports");
my $name = $category->name;
my @docs = $category->documents;
my $num_docs = $category->documents;
my $features = $category->features;
$category->add_document($doc);
if ($category->contains_document($doc)) { ...
=head1 DESCRIPTION
This simple class represents a named category which may contain zero
or more documents. Each category is a "singleton" by name, so two
Category objects with the same name should not be created at once.
=head1 METHODS
=over 4
=item new()
Creates a new Category object and returns it. Accepts the following
parameters:
=over 4
=item name
The name of this category
=item documents
A reference to an array of Document objects that should belong to this
category.
=back
=item by_name(name => $string)
Returns the Category object with the given name, or creates one if no
such object exists.
=item documents()
Returns a list of the Document objects in this category in a list
context, or the number of such objects in a scalar context.
=item features()
Returns a FeatureVector object representing the sum of all the
FeatureVectors of the Documents in this Category.
=item add_document($document)
Informs the Category that the given Document belongs to it.
=item contains_document($document)
Returns true if the given document belongs to this category, or false
otherwise.
=back
=head1 AUTHOR
Ken Williams, ken@mathforum.org
=head1 COPYRIGHT
Copyright 2000-2003 Ken Williams. All rights reserved.
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=head1 SEE ALSO
AI::Categorizer(3), Storable(3)
=cut