The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package AI::Categorizer::Category;

use strict;
use AI::Categorizer::ObjectSet;
use Class::Container;
use base qw(Class::Container);

use Params::Validate qw(:types);
use AI::Categorizer::FeatureVector;

__PACKAGE__->valid_params
  (
   name => {type => SCALAR, public => 0},
   documents  => {
		  type => ARRAYREF,
		  default => [],
		  callbacks => { 'all are Document objects' => 
				 sub { ! grep !UNIVERSAL::isa($_, 'AI::Categorizer::Document'), @_ },
			       },
		  public => 0,
		 },
  );

__PACKAGE__->contained_objects
  (
   features => {
		class => 'AI::Categorizer::FeatureVector',
		delayed => 1,
	       },
  );

my %REGISTRY = ();

sub new {
  my $self = shift()->SUPER::new(@_);
  $self->{documents} = new AI::Categorizer::ObjectSet( @{$self->{documents}} );
  $REGISTRY{$self->{name}} = $self;
  return $self;
}

sub by_name {
  my ($class, %args) = @_;
  return $REGISTRY{$args{name}} if exists $REGISTRY{$args{name}};
  return $class->new(%args);
}

sub name { $_[0]->{name} }

sub documents {
  my $d = $_[0]->{documents};
  return wantarray ? $d->members : $d->size;
}

sub contains_document {
  return $_[0]->{documents}->includes( $_[1] );
}

sub add_document {
  my $self = shift;
  $self->{documents}->insert( $_[0] );
  delete $self->{features};  # Could be more efficient?
}

sub features {
  my $self = shift;

  if (@_) {
    $self->{features} = shift;
  }
  return $self->{features} if $self->{features};

  my $v = $self->create_delayed_object('features');
  return $self->{features} = $v unless $self->documents;

  foreach my $document ($self->documents) {
    $v->add( $document->features );
  }
  
  return $self->{features} = $v;
}

1;
__END__

=head1 NAME

AI::Categorizer::Category - A named category of documents

=head1 SYNOPSIS

  my $category = AI::Categorizer::Category->by_name("sports");
  my $name = $category->name;
  
  my @docs = $category->documents;
  my $num_docs = $category->documents;
  my $features = $category->features;
  
  $category->add_document($doc);
  if ($category->contains_document($doc)) { ...

=head1 DESCRIPTION

This simple class represents a named category which may contain zero
or more documents.  Each category is a "singleton" by name, so two
Category objects with the same name should not be created at once.

=head1 METHODS

=over 4

=item new()

Creates a new Category object and returns it.  Accepts the following
parameters:

=over 4

=item name

The name of this category

=item documents

A reference to an array of Document objects that should belong to this
category.

=back

=item by_name(name => $string)

Returns the Category object with the given name, or creates one if no
such object exists.

=item documents()

Returns a list of the Document objects in this category in a list
context, or the number of such objects in a scalar context.

=item features()

Returns a FeatureVector object representing the sum of all the
FeatureVectors of the Documents in this Category.

=item add_document($document)

Informs the Category that the given Document belongs to it.

=item contains_document($document)

Returns true if the given document belongs to this category, or false
otherwise.

=back

=head1 AUTHOR

Ken Williams, ken@mathforum.org

=head1 COPYRIGHT

Copyright 2000-2003 Ken Williams.  All rights reserved.

This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 SEE ALSO

AI::Categorizer(3), Storable(3)

=cut