package Class::Indexed;
################################################################################
# Indexed - An abstract class providing fulltext indexing for classes
# (c) Copyright 2003 Aaron Trevena <aaron.trevena@droogs.org>
# Based on my article for www.perl.com
# inspired by Bulletproof Monk and Tenacious D
#
# Couldn't remember the code to the greatest reverse index in the world,
# this is just a tribute.
#
# The peculiar thing is this my friends,
# the piece of code that I wrote that fateful day looks nothing like this code.
# This is just a tribute. You've got to believe me, and I wish you were there.
=head1 NAME
Class::Indexed : An abstract class providing fine-grained and incremental update fulltext indexing for classes
=head1 SYNOPSIS
use Class::Indexed;
our @ISA = qw(Class::Indexed);
# build the index and metadata tables
Class::Indexed->build_index_tables(database=>$db,host=>$host,username=>$user,password=>$password);
# set which attributes / fields are to be indexed and their weighting, etc
$self->indexed_fields (
dbh=>$dbh, key=>'Pub_ID',
fields=>[
{ name=>'Pub_Name', weight=>1 },
],
);
# index an object
$self->index_object();
# index a field or attribute of an object
$self->index_field($self->{Pub_ID}, $field, $value);
# remove the object from the metadata and index tables
$self->delete_location();
# add the object to the metadata table
$self->add_location();
=head1 DESCRIPTION
This abstract class provides inherited indexing functionality to any
class using it as a superclass.
Class::Indexed is designed to provide most of the functionality described
in the article : 'Adding Search Functionality to Perl Applications'
( http://www.perl.com/pub/a/2003/09/25/searching.html ) and I recommend
you read it through to gain understanding of the code and principles
involved.
see the examples for the best explaination of how to use this class
=head1 EXPORT
None by default.
=cut
use strict;
use DBI;
use Class::Indexed::Words;
our $VERSION = 0.01;
################################################################################
# Public methods
=head1 METHODS
=head2 build_index_tables
builds the index and metadata tables, you need to run this before you can use the indexing
my $success = Class::Indexed->build_index_tables(database=>$db,host=>$host,username=>$user,password=>$password);
=cut
sub build_index_tables {
my ($self,%options) = @_;
my $success = 0;
my $dbh = DBI->connect("dbi:mysql:$options{database}:$options{host}", $options{username}, $options{password})
or die " couldn't connect to db : $options{database} host : $options{host} ";
my $indextable = $options{indexname} || 'CIRIND';
my $metadatatable = $options{indexmetadata} || 'CIMETA';
# create index table
my $query = <<endindex;
create table $indextable (
CIRIND_Word varchar(64) not null,
CIRIND_Score float,
CIMETA_ID int not null,
CIRIND_Fields varchar(255),
primary key ( CIRIND_Word, CIMETA_ID )
)
endindex
my $rv = $dbh->do($query);
# create index metadata table
$query = <<endmetadata;
create table $metadatatable (
CIMETA_ID integer primary key auto_increment,
CIMETA_Title varchar(64),
CIMETA_Type varchar(16),
CIMETA_Key varchar(32),
CIMETA_KeyValue varchar(128),
CIMETA_URL varchar(255),
CIMETA_Summary text
)
endmetadata
$rv = $dbh->do($query);
return $success;
}
=head2 index_object
indexes the object, updates the metadata if required
$self->index_object();
before you can call index_object you must set the fields to be indexed
with the indexed_fields method
=cut
sub index_object {
my $self = shift;
foreach my $field (keys %{$self->{_RIND_fields}}) {
warn "index object : $field : $self->{$field}\n";
my $success = $self->index_field($field,$self->{$field});
warn "success : $success\n";
}
}
=head2 index_field
indexes a particular field or attribute of the object
$self->index_field($fieldname,$value)
takes the name of the attribute/field and the new value
before you can call index_field you must set the fields to be indexed
with the indexed_fields method
=cut
sub index_field {
my ($self,$field,$value) = @_;
warn "index_field : $field,$value \n";
return 0 unless ($self->{_RIND_fields}{$field});
$self->{_RIND_index_table} ||= 'CIRIND';
$self->{_RIND_location_table} ||= 'CIMETA';
my %newwords;
my @newwords;
# extract new words from current field or lookup or replacement text
if ((defined $value and $value ne '') or ( $self->{_RIND_fields}{$field}{replace} || $self->{_RIND_fields}{$field}{lookup} )) {
MODE: {
if (defined $self->{_RIND_fields}{$field}{replace}) {
$value = get_words($self->{_RIND_fields}{$field}{replace});
last;
}
if (defined $self->{_RIND_fields}{$field}{lookup} ) {
my $column = $self->{_RIND_fields}{$field}{lookup};
my $table = $self->{_RIND_fields}{$field}{lookup_table};
my $query = $self->{_RIND_fields}{$field}{query};
warn "value : $value / column : $column / query : $query \n";
unless (defined $query and $query ne '') {
$query = qq{select $column from $table where $field = };
if ($value =~ /\D/) {
$value =~ s/(['"])/\\$1/g;
$query .= qq{'$value'};
} else {
$query .= $value;
}
}
warn "query : $query \n";
$value = join ( ' ',@{$self->{_RIND_dbh}->selectcol_arrayref($query)} );
last;
}
} # end of MODE switch
warn "value : $value \n";
# get words from value
@newwords = get_words($value);
foreach ( @newwords ) {
next if $stopwords{$_};
$newwords{$_} += $self->{_RIND_fields}{$field}{weight};
}
warn "new words : ", @newwords, "\n";
}
# get old words from reverse index for current object
my $location = $self->{_RIND_location};
my $query = "select * from $self->{_RIND_index_table} where CIMETA_ID = ?";
my $sth = $self->{_RIND_dbh}->prepare($query);
my $rv = $sth->execute($location);
# update reverse index words for this field of this object
warn "update reverse index \n";
while ( my $row = $sth->fetchrow_hashref() ) {
next unless ($row->{CIRIND_Fields} =~ m/'$field'/); # skip unless this word was in the old value of this field
$self->{__RIND_locationwords}{$row->{CIRIND_Word}} = $row;
if (exists $newwords{$row->{CIRIND_Word}}) {
$self->_RIND_UpdateFieldEntry($row,$field,$newwords{$row->{CIRIND_Word}});
delete $newwords{$row->{CIRIND_Word}}
} else {
$self->_RIND_RemoveFieldEntry($row,$field,$location);
}
}
warn "add to reverse index", keys %newwords , "\n";
foreach (keys %newwords) {
warn "adding field entry $_ : $newwords{$_} : $field \n";
$self->_RIND_AddFieldEntry($location,$_,$newwords{$_},$field);
}
return 1;
}
=head2 delete_location
remove the object from the metadata and index tables
$self->delete_location();
=cut
sub delete_location {
my $self = shift;
$self->{_RIND_index_table} ||= 'CIRIND';
$self->{_RIND_location_table} ||= 'CIMETA';
my $query = "delete from $self->{_RIND_index_table} where CIMETA_ID = ?";
my $sth = $self->{_RIND_dbh}->prepare($query);
my $rv1 = $sth->execute($self->{_RIND_location});
$query = "delete from $self->{_RIND_location_table} where CIMETA_ID = ?";
$sth = $self->{_RIND_dbh}->prepare($query);
my $rv2 = $sth->execute($self->{_RIND_location});
return "$rv1:$rv2";
}
=head2 add_location
add the object to the metadata table
$self->add_location();
=cut
sub add_location {
my ($self,%options) = @_;
$self->{_RIND_index_table} ||= 'CIRIND';
$self->{_RIND_location_table} ||= 'CIMETA';
my $dbh = $options{dbh} || $self->{_dbh};
my $query = qq{ insert into $self->{_RIND_location_table}
( CIMETA_Title,CIMETA_Type, CIMETA_Key, CIMETA_KeyValue, CIMETA_URL, CIMETA_Summary )
values (?,?,?,?,?,?) };
warn "query : $query \n";
my $location_sth = $dbh->prepare($query);
my @values = map { $_ || 'null' } @options{qw(Title Type Key KeyValue URL Summary)};
my $rv = $location_sth->execute(@values);
$self->{_RIND_location} = $location_sth->{mysql_insertid};
return $rv;
}
=head2 indexed_fields
set which attributes / fields are to be indexed and their weighting, etc
$self->indexed_fields (
dbh=>$dbh, key=>'Pub_ID',
fields=>[
{ name=>'Pub_Name', weight=>1 },
],
);
=cut
sub indexed_fields {
my ($self,%args) = @_;
$self->{_RIND_index_table} ||= 'CIRIND';
$self->{_RIND_location_table} ||= 'CIMETA';
if (keys %args) {
$self->{_RIND_dbh} = $args{dbh} if defined $args{dbh};
if ( defined $args{key} ) {
$self->{RIND_key} = $args{key};
($self->{_RIND_location}) = $args{dbh}->selectrow_array("Select CIMETA_ID from $self->{_RIND_location_table} where CIMETA_Key = '$args{key}' and CIMETA_KeyValue = " . $args{dbh}->quote($self->{$args{key}}));
}
if ( defined $args{fields} ) {
foreach ( @{$args{fields}} ) {
$self->{_RIND_fields}{$_->{name}} = $_;
}
}
}
return @{$self->{_RIND_fields}} if wantarray;
}
=head1 AUTHOR
Aaron J. Trevena, E<lt>aaron.trevena@droogs.orgE<gt>
=head1 SEE ALSO
L<perl>.
=cut
####################################################################################
# private methods : don't touch below here
#
# I don't reccomend mucking about with these as they are very self-referential
sub _RIND_UpdateFieldEntry {
my ($self,$row, $field, $score) = @_;
my %fields = ( $row->{CIRIND_Fields} =~ m/'(.*?)':([\d.]+)/g );
# recalculate total score
my $newscore = ($row->{CIRIND_Score} - $fields{$field} ) + $score;
return 1 if ($fields{$field} == $score); # skip if score unchanged
# update entry
$fields{$field} = $score;
my $newfields;
foreach (keys %fields) {
$newfields .= "'$_':$fields{$_}";
}
$self->_RIND_UpdateIndex( word=>$row->{CIRIND_Word},location=>$row->{CIMETA_ID},
newscore=>$newscore,newfields=>$newfields );
}
sub _RIND_AddFieldEntry {
my ($self,$location, $word, $score, $field) = @_;
warn "_RIND_AddFieldEntry : ($location, $word, $score, $field) \n";
# check if record already exists for this location and update/insert entry
if (exists $self->{__RIND_locationwords}{$word}) {
# recalculate total score
my $newscore = $self->{__RIND_locationwords}{$word}{CIRIND_Score} + $score;
# update entry, appending field and score to end
my $newfields = $self->{__RIND_locationwords}{$word}{CIRIND_Fields} . "'$field':$score";
$self->_RIND_UpdateIndex( word=>$word,location=>$location, newscore=>$newscore,newfields=>$newfields );
} else {
# insert new entry
$self->_RIND_UpdateIndex( insert=>1, word=>$word,location=>$location, newscore=>$score,newfields=>"'$field':$score" );
}
}
sub _RIND_RemoveFieldEntry {
my ($self,$row, $field, $location) = @_;
# check if record contains scores from other fields
my %fields = ( $row->{CIRIND_Fields} =~ m/'(.*?)':([d.]+)/g ) ;
if ( keys %fields > 1 ) {
# recalculate total score
my $newscore = $row->{CIRIND_Score} - $fields{$field};
delete $fields{$field};
my $newfields;
foreach (keys %fields) {
$newfields .= "'$_':$fields{$_}";
}
# update entry
$self->_RIND_UpdateIndex( word=>$row->{CIRIND_Word},location=>$location, newscore=>$newscore,newfields=>$newfields );
} else {
# delete entry
$self->_RIND_UpdateIndex( delete=>1, word=>$row->{CIRIND_Word}, location=>$location);
}
}
sub _RIND_UpdateIndex {
my ($self,%args) = @_;
my $query = qq{ update $self->{_RIND_index_table}
set CIRIND_Score = ?, CIRIND_Fields = ?
where CIRIND_Word = ? and CIMETA_ID = ? };
my @args = ($args{newscore},$args{newfields},$args{word},$args{location});
MODE:{
if ($args{insert}) {
$query = qq{ insert into $self->{_RIND_index_table} ( CIRIND_Score, CIRIND_Fields, CIRIND_Word, CIMETA_ID)
values (?,?,?,?) };
last;
}
if ($args{delete}) {
$query = "delete from $self->{_RIND_index_table} where CIRIND_Word = ? and CIMETA_ID = ?";
shift(@args); shift(@args); # remove unused arguments
last;
}
} # end of MODE switch
my $sth = $self->{_RIND_dbh}->prepare($query);
warn " .. _RIND_UpdateIndex ";
warn " args : @args";
my $rv = $sth->execute(@args);
return $rv;
}
##################################################################################
1;
##################################################################################
##################################################################################