# -*-Perl-*-
#
# LTU.pm - A Perl module implementing linear threshold units.
#
# Copyright (c) 1995 Tom Fawcett. All rights reserved.
# This program is free software; you may redistribute it and/or
# modify it under the same terms as Perl itself.
#
# Original C code written by James Callan.
# Rewritten for Perl from Version 2.4 by Tom Fawcett.
# Bugs, comments, suggestions: Tom Fawcett <fawcett@nynexst.com>
#
# Description:
#
# This module contains subroutines for creating, using, and destroying
# linear threshold units (LTUs). It offers four different training rules:
#
# 1) The absolute correction rule (ACR), copied from Nils Nilsson's book
# "Learning Machines: Foundations of Trainable Pattern-Classification
# Systems," published by McGraw-Hill in 1965. This rule is identical
# to the Perceptron rule. Its advantage is that it converges quickly
# when the training instances are linearly separable.
# 2) The least-mean-square (LMS) rule, devised by Widrow and Hoff in 1960.
# See the AI Handbook or Duda & Hart for information on the LMS rule.
# The advantage of the LMS rule over absolute correction or fixed
# increment is that it tends to minimize the mean-squared error, even
# when the classes are not linearly separable.
# 3) The recursive least square (RLS) rule, copied from Peter Young's book
# "Recursive Estimation and Time-Series Analysis," published by
# Springer-Verlag in 1984. This rule is like the LMS rule, but far
# superior to it. It's faster, because each instance only needs to
# be seen once. It's also more accurate.
# 4) The thermal perceptron (TACR) rule, copied from Marcus Frean's PhD
# thesis "Learning in Single Perceptrons," published by University of
# Edinburgh in 1990. This rule is like the ACR rule, except that its
# annealing capabilities enable it to handle classes that are not
# linearly separable.
#
# A "maybe-train" training strategy (i.e. train only when the linear
# threshold unit misclassifies an instance) is implicit in the absolute
# correction rule, the LMS rule and the thermal perceptron rule. The RLS
# rule always trains.
#
# The training rules are most effective when attribute values are scaled
# to a fixed range. When values exceed 1.0, some of the rules (e.g. ACR,
# TACR) may allow the magnitude of weights to grow without bound. When
# different attributes have different ranges, some of the rules (e.g. RLS)
# give greater influence to attributes with larger ranges. Therefore, you
# can request automatic scaling, if you desire it, when the LTU is created.
# If you enable scaling, your data is automatically scaled, so that its value
# does not exceed 1.0. Scales are computed and adjusted when necessary,
# without any intervention from you.
#
# LTU's converge most quickly when the data includes both positive and
# negative values. Therefore, when your data is scaled, it is scaled so
# that the midpoint of the range is 0.0.
#
# Each time scales are adjusted, the weights in the LTU become inaccurate.
# The scaling procedure cannot compensate for this inaccuracy. Therefore,
# the automatic scaling can affect the speed with which the algorithm
# converges. However, once the scales "settle down" (i.e. once the extreme
# values for each attribute have been seen), the speed of convergence is not
# affected.
#
# Automatic scaling adversely affects the RLS rule, because the RLS rule
# implicitly "remembers" each instance. When scales are adjusted, the
# instances remembered by the LTU become noisy. If you enable scaling with
# the RLS rule, you should cycle through your instances several times, at
# least until the extreme values (min/maxs) of each attribute have been seen.
#
#
##############################################################################
# $Log: LTU.pm,v $
# Revision 2.7 1996/02/20 15:15:18 fawcett
# Fixed package confusion made in the original version.
# Now exports $LTU_MINUS, LTU_PLUS and LTU_THRESHOLD.
# Created Makefile.PL so that MakeMaker can handle installation,
# cleaning, distribution, etc.
#
#
# Revision 2.5 1995 fawcett
# Initial public offering, released as Statistics::LTU.pm.
# Based on Jamie's LTU.c version 2.4.
#
##############################################################################
package Statistics::LTU;
my($rcs) = ' $Id: LTU.pm,v 2.7 1996/02/20 15:15:18 fawcett Exp $ ' ;
require 5;
use Carp;
require Exporter;
@ISA = ('Exporter');
@EXPORT = qw($LTU_PLUS $LTU_MINUS $LTU_THRESHOLD);
($Statistics::LTU::VERSION) = $rcs =~ /Id: LTU\.pm.* ([\d\.]+) /;
die "Can't determine Statistics::LTU::VERSION from rcs string!"
unless $Statistics::LTU::VERSION;
# We understand LTU's dumped with 2.5 or later
my($read_LTU_files_back_to) = 2.5;
##
## EXPORTED CONSTANTS
##
$LTU_MINUS = -1.0;
$LTU_PLUS = 1.0;
$LTU_THRESHOLD = 0;
##
## PRIVATE CONSTANTS
##
my($LTU_ORIGIN_OFF) = 0;
my($LTU_ORIGIN_ON) = 1;
my($LTU_SCALING_OFF) = 0;
my($LTU_SCALING_ON) = 1;
##
## Data Structure Definitions
##
## Field indices for the LTU structure.
## All index names end in "i";
my($LTU_LENGTH) = 15; # Number of entries in an LTU
my($LENGTHi,
$SCALINGi,
$ORIGIN_RESTRICTIONi,
$UNUSEDi,
$CYCLES_SINCE_WEIGHT_CHANGEi,
# Remainder are references to vectors
$WEIGHTi,
$WEIGHT_MINi,
$WEIGHT_MAXi,
$ATTRIB_MINi,
$ATTRIB_MAXi,
$ATTRIB_SCALEi,
$ATTRIBUTEi,
$RLS_Pi,
$RLS_TMP_1i,
$RLS_TMP_2i,
) =
(0 .. $LTU_LENGTH-1);
$LTU_ATTRIBUTE_MIN = -0.5;
##
## new creates a new LTU with weights set to 0.
##
sub new {
my($type, $nvars, $scaling) = @_;
##
## First allocate the LTU, and fill in its basic information.
##
my($self) = [ (0) x $LTU_LENGTH ];
my($length) = $nvars + 1;
$self->[$SCALINGi] = $scaling;
$self->[$LENGTHi] = $length;
$self->[$ORIGIN_RESTRICTIONi] = $LTU_ORIGIN_OFF;
##
## Create vectors used by all types.
##
$self->[$WEIGHTi] = [ (0.0) x $length ];
$self->[$WEIGHT_MINi] = [ (0.0) x $length ];
$self->[$WEIGHT_MAXi] = [ (0.0) x $length ];
$self->[$ATTRIBUTEi] = [ (0.0) x $length ];
##
## Allocate scaling vectors if necessary.
##
if ($scaling == $LTU_SCALING_ON) {
## Signal need to initialize these values by setting attrib_min > max
$self->[$ATTRIB_MINi] = [ (1.0) x $length ];
$self->[$ATTRIB_MAXi] = [ (-1.0) x $length ];
$self->[$ATTRIB_SCALEi] = [ (1.0) x $length ];
};
bless $self, $type; # Bless into type
}
##
## COPY creates a copy of itself, which it returns
##
sub copy {
my($self) = @_;
##
## First, create a new LTU.
##
my($new) = new Statistics::LTU($self->[$LENGTHi]-1,
$self->[$SCALINGi]);
##
## Copy the basic information that all LTU's have. (The attribute vector
## isn't copied because its contents are temporary.)
##
$new->[$CYCLES_SINCE_WEIGHT_CHANGEi] =
$self->[$CYCLES_SINCE_WEIGHT_CHANGEi];
$new->[$ORIGIN_RESTRICTIONi] =
$self->[$ORIGIN_RESTRICTIONi];
$new->[$WEIGHTi] = [@{$self->[$WEIGHTi]}];
$new->[$WEIGHT_MINi] = [@{$self->[$WEIGHT_MINi]}];
$new->[$WEIGHT_MAXi] = [@{$self->[$WEIGHT_MAXi]}];
##
## Copy scaling data, if necessary.
##
if ($new->[$SCALINGi] == $LTU_SCALING_ON) {
$new->[$ATTRIB_MINi] = $self->[$ATTRIB_MINi];
$new->[$ATTRIB_MAXi] = $self->[$ATTRIB_MAXi];
$new->[$ATTRIB_SCALEi] = $self->[$ATTRIB_SCALEi];
}
$new;
}
##
## LTU_DESTROY destroys an existing linear threshold unit.
##### I don't know if this is necessary but it can't hurt
##
sub destroy {
my($self) = @_;
for (0 .. $LTU_LENGTH-1) {
undef $self->[$_];
}
undef $self;
}
##
## IS_CYCLING returns a boolean value which indicates whether or not the
## LTU's weights are cycling. If they are, further training will not produce
## better results. This judgement is based upon the heuristic that if the
## weights are not cycling, then the min/max's of weights will change fairly
## often. The caller must specify what it thinks is "fairly often". This is
## done with a parameter that specifies how many times the training function
## can adjust weights before the caller would expect at least one weight to
## have its min/max adjusted. Some common values for this parameter are the
## number of attributes, or the log of the number of attributes.
##
sub is_cycling {
my($self, $how_often_weights_must_change) = @_;
$self->[$CYCLES_SINCE_WEIGHT_CHANGEi] > $how_often_weights_must_change;
}
##
## PRINT describes the LTU and its weights.
##
sub print {
my($self) = @_;
print "The linear threshold unit is of type ", ref($self);
print " and contains ", $self->[$LENGTHi], " weights.\n";
printf("Origin restriction=%d.\n",
$self->[$ORIGIN_RESTRICTIONi]);
printf("Cycles since last change of a weight min/max: %d.\n",
$self->[$CYCLES_SINCE_WEIGHT_CHANGEi]);
##
## Print 8 weights to a line, just to make it easier to read.
##
my($i);
for $i (0 .. $self->[$LENGTHi]-1) {
print $self->[$WEIGHTi]->[$i], " ";
printf("\n") if $i%8 == 7;
};
print "\n";
}
##
## RESTORE restores a linear threshold unit from the specified file.
##
sub restore {
my($class, $file_name) = @_;
if (!open(FILE, "<$file_name")) {
carp "restore: $file_name: $!\n";
return(0);
}
##
## Check that the LTU is consistent with this version
##
my($ltu_version);
$ltu_version = <FILE>;
chop($ltu_version);
if ($ltu_version < $read_LTU_files_back_to) {
carp "restore: $file_name written with LTU version $ltu_version.\n";
carp "This is version $Statistics::LTU::VERSION, which only understands\n";
carp "LTU files back to $read_LTU_files_back_to.\n";
carp "LTU not restored!\n";
return(0);
}
## First, read the type, the number of weights, and whether or not scaling
## is enabled. Then, create a linear threshold unit. Note that the
## number of variables in the training instances is one less than the
## number of weights (one weight is a constant).
##
my($line);
$line = <FILE>;
chop($line);
my($type, $length, $scaling, $cycles_since_weight_change, $origin) =
split(/ /, $line);
my($new) = $type->new($length-1, $scaling);
$new->[$CYCLES_SINCE_WEIGHT_CHANGEi] = $cycles_since_weight_change;
$new->[$ORIGIN_RESTRICTIONi] = $origin;
##
## Now, read the weights, the minimum value of the attributes, and the
## maximum value of the attributes, from the file, and store them in the
## linear threshold unit.
my($new_weight) = $new->[$WEIGHTi];
my($new_weight_min) = $new->[$WEIGHT_MINi];
my($new_weight_max) = $new->[$WEIGHT_MAXi];
my($new_attrib_min) = $new->[$ATTRIB_MINi];
my($new_attrib_max) = $new->[$ATTRIB_MAXi];
my($new_attrib_scale) = $new->[$ATTRIB_SCALEi];
my($i, @fields);
for $i (0 .. $length-1) {
$line = <FILE>; chop($line);
@fields = split(/ /, $line);
($new_weight->[$i], $new_weight_min->[$i], $new_weight_max->[$i]) =
@fields;
if ($scaling == $LTU_SCALING_ON) {
($new_attrib_min->[$i], $new_attrib_max->[$i]) = @fields[3,4];
if ($new_attrib_max->[$i] > $new_attrib_min->[$i]) {
$new_attrib_scale->[$i] = 1.0 /
($new_attrib_max->[$i] - $new_attrib_min->[$i]);
}
}
}
##### Pick up final theta value
$line = <FILE>; chop($line);
$new->[$WEIGHTi]->[$length-1] = $line;
close(FILE);
$new;
}
##
## SAVE saves the linear threshold unit in the specified file.
## If the file can't be created for some reason, 0 is returned. Otherwise, 1
## is returned.
##
sub save {
my($self, $file_name) = @_;
my($length) = $self->[$LENGTHi];
if (!open(FILE, ">$file_name")) {
carp "Statistics::LTU::save: $file_name: $!";
return(0);
}
##
## Stamp the file with a version number.
##
print FILE $Statistics::LTU::VERSION, "\n";
## First, print the type, the number of weights and whether or not scaling
## is enabled. Then, print the weights, the minimum value of the
## attribute, and the maximum value of the attribute. Print 1
## attribute/line, just to make it easier to read.
print FILE join(' ', ((ref $self),
$self->[$LENGTHi],
$self->[$SCALINGi],
$self->[$CYCLES_SINCE_WEIGHT_CHANGEi],
$self->[$ORIGIN_RESTRICTIONi]
)),
"\n";
my($i);
for $i (0 .. $length-1) {
printf FILE "%lf %lf %lf ",
$self->[$WEIGHTi]->[$i],
$self->[$WEIGHT_MINi]->[$i],
$self->[$WEIGHT_MAXi]->[$i];
if ($self->[$SCALINGi] == $LTU_SCALING_ON) {
printf FILE "%lf %lf ",
$self->[$ATTRIB_MINi]->[$i],
$self->[$ATTRIB_MAXi]->[$i];
}
print FILE "\n";
}
printf FILE "%lf\n", $self->[$WEIGHTi]->[$length-1];
close(FILE);
1;
}
##
## TEST computes and returns the result of applying the linear threshold
## unit to the instance_vector.
##
sub test {
my($self, $instance_vector) = @_;
my($length) = $self->[$LENGTHi];
my($weight) = $self->[$WEIGHTi];
my($attribute) = $self->[$ATTRIBUTEi];
$self->_scale_attributes($instance_vector);
my($result) = $weight->[$length-1];
my($i);
for $i (0 .. $length-2) {
$result += $attribute->[$i] * $weight->[$i];
}
$result;
}
##
## _SCALE_ATTRIBUTES translates and scales the attributes comprising an
## instance vector. The scaled attributes are stored in the attributes slot
## on the LTU, so that the original instance_vector will not be changed.
##
sub _scale_attributes {
my($self, $instance_vector) = @_;
my($length) = $self->[$LENGTHi];
my($attribute) = $self->[$ATTRIBUTEi];
my($attrib_min) = $self->[$ATTRIB_MINi];
my($attrib_scale) = $self->[$ATTRIB_SCALEi];
##
## If scaling is disabled, just copy the instance attribute values into
## the temporary attribute vector. This isolates the input values from
## the caller's storage and converts to double precision. Otherwise
## translate and scale the attributes. When min=max, only one value has
## been seen for the attribute. In that case, the attribute is mapped
## to the minimum value. It is advantageous to use one of the endpoints,
## instead of the midpoint 0.0, because it lets training begin.
##
my($i);
if ($self->[$SCALINGi] == $LTU_SCALING_OFF) {
for $i (0 .. $#$instance_vector) {
$attribute->[$i] = $instance_vector->[$i];
}
} else {
for $i (0 .. $#$instance_vector) {
$attribute->[$i] = (($instance_vector->[$i] - $attrib_min->[$i]) *
$attrib_scale->[$i]) + $LTU_ATTRIBUTE_MIN;
}
}
##
## A constant attribute must be added to the end of the instance vector
## in order to learn some kinds of functions (e.g. f(x)=mx+b, the function
## for a line in 2D space). The constant attribute enables the training
## function to learn a weight that translates the hyperplane. However,
## it is undesirable for other kinds of functions (e.g. f(x)=mx, which is
## what some preference predicates look like). Decide what to do.
##
if ($self->[$ORIGIN_RESTRICTIONi] == $LTU_ORIGIN_ON) {
$attribute->[$length-1] = 0.0;
} else {
$attribute->[$length-1] = 1.0;
}
}
##
## MAINTAIN_SCALING_FACTORS checks that the specified instance falls within
## the min-max ranges of the specified LTU. If it does, nothing changes.
## If it does not, the min-max ranges and the scaling factors are adjusted
## to accomodate this new instance.
##
sub maintain_scaling_factors {
my($self, $instance_vector) = @_;
##
## If scaling is disabled, don't do anything.
##
return if $self->[$SCALINGi] == $LTU_SCALING_OFF;
my($length) = $self->[$LENGTHi];
my($attrib_min) = $self->[$ATTRIB_MINi];
my($attrib_max) = $self->[$ATTRIB_MAXi];
my($attrib_scale) = $self->[$ATTRIB_SCALEi];
my($i);
for $i (0 .. $length-2) {
if ($attrib_min->[$i] > $attrib_max->[$i]) {
##
## Initialization.
##
$attrib_min->[$i] = $attrib_max->[$i] =
$instance_vector->[$i];
} elsif ($instance_vector->[$i] > $attrib_max->[$i]) {
$attrib_max->[$i] = $instance_vector->[$i];
$attrib_scale->[$i] =
1.0 / ($attrib_max->[$i] - $attrib_min->[$i]);
} elsif ($instance_vector->[$i] < $self->[$ATTRIB_MINi]->[$i]) {
$attrib_min->[$i] = $instance_vector->[$i];
$attrib_scale->[$i] =
1.0 / ($attrib_max->[$i] - $attrib_min->[$i]);
}
}
}
##
## SET_ORIGIN_RESTRICTION
##
sub set_origin_restriction {
my($self, $value) = @_;
if (($value == $LTU_ORIGIN_OFF) or ($value == $LTU_ORIGIN_ON)) {
$self->[$ORIGIN_RESTRICTIONi] = $value;
} else {
carp "$value is an unacceptable value for the origin restriction\n";
}
}
##
## LTU_WEIGHTS returns a ref to copy of the LTU weights.
##
sub weights {
my($self) = @_;
[@{$self->[$WEIGHTi]}];
}
##
## UPDATE_WEIGHT_MIN_MAX updates an LTU's weight min/max's and its
## cycles_since_weight_change field. Although it is simple, it is
## implemented as a separate routine because each training routine
## should perform this function before returning the LTU to the caller.
##
sub update_weight_min_max {
my($self) = @_;
my($boundary_changed) = 0;
my($weight) = $self->[$WEIGHTi];
my($weight_max) = $self->[$WEIGHT_MAXi];
my($weight_min) = $self->[$WEIGHT_MINi];
my($i);
for $i (0 .. $self->[$LENGTHi]-2) {
if ($weight_max->[$i] < $weight->[$i]) {
$weight_max->[$i] = $weight->[$i];
$boundary_changed = 1;
} elsif ($weight_min->[$i] > $weight->[$i]) {
$weight_min->[$i] = $weight->[$i];
$boundary_changed = 1;
}
}
if ($boundary_changed) {
$self->[$CYCLES_SINCE_WEIGHT_CHANGEi] = 0;
} else {
$self->[$CYCLES_SINCE_WEIGHT_CHANGEi]++;
}
}
##
## $LTU->correctly_classifies($instance, $value)
## Returns 1 iff $instance is on the same side of Statistics::LTU::THRESHOLD
## as $value is.
##
sub correctly_classifies {
my($self, $instance, $desired_value) = @_;
my($actual_value) = $self->test($instance);
((($actual_value < $LTU_THRESHOLD) and
($desired_value < $LTU_THRESHOLD)) or
(($actual_value >= $LTU_THRESHOLD) and
($desired_value >= $LTU_THRESHOLD)));
}
##
## ltu->eval_on_set(examples)
##
## Evaluates an LTU on a set of example, returning 4 integers:
## True negatives, false positives, false negatives and true positives.
##
## Argument is a ref to a list of examples.
## Each example is a ref to an array of [Vector, Value].
## Each Vector is a feature vector.
##
## Example:
## @Results = $LTU->eval_on_set([[[1,4,-2],1], [[1,-2,2],-1]]);
## ($TN, $FP, $FN, $TP) = @Results;
##
sub eval_on_set {
my($self, $Examples) = @_;
my($TN, $FP, $FN, $TP) = (0,0,0,0);
my($example, $Instance, $DesiredValue, $ActualValue);
foreach $example (@{$Examples}) {
($Instance, $DesiredValue) = @{$example};
my($ActualValue) = $self->test($Instance);
if ($ActualValue >= $LTU_THRESHOLD) {
if ($DesiredValue >= $LTU_THRESHOLD) {
$TP++;
} else {
$FP++;
}
} elsif ($DesiredValue >= $LTU_THRESHOLD) {
$FN++;
} else {
$TN++;
}
}
($TN, $FP, $FN, $TP);
}
###
### Specific LTU types built upon LTU.
### 1. The absolute correction rule.
package Statistics::LTU::ACR;
@Statistics::LTU::ACR::ISA = qw( Statistics::LTU );
use Statistics::LTU;
##
## ACR::TRAIN trains the specified linear threshold unit on a particular
## instance_vector. It returns 1 if the linear threshold unit already
## classified the instance_vector correctly, otherwise it returns 0.
## The training rule is taken from Nilsson's "Learning Machines" book.
##
sub train {
my($self, $instance_vector, $desired_value) = @_;
my($length) = $self->[$LENGTHi];
##
## Only train the linear threshold unit if it does not classify correctly.
##
my($actual_value) = $self->test($instance_vector);
die "\$Statistics::LTU::LTU_THRESHOLD undefined in ACR" unless defined($Statistics::LTU::LTU_THRESHOLD);
return(1) if ((($actual_value < $Statistics::LTU::LTU_THRESHOLD) &&
($desired_value < $Statistics::LTU::LTU_THRESHOLD)) ||
(($actual_value >= $Statistics::LTU::LTU_THRESHOLD) &&
($desired_value >= $Statistics::LTU::LTU_THRESHOLD)));
## The scale factor can only be changed when the weights are being
## changed, because a change to the scale factor invalidates the current
## set of weights.
$self->maintain_scaling_factors($instance_vector);
$self->_scale_attributes($instance_vector);
##
## Decide how much to adjust the weights by. The absolute correction rule
## (which provides the fastest learning) requires the dot product of the
## instance vector, so do that first.
##
my($dot_product) = 0;
foreach (@{$self->[$ATTRIBUTEi]}) {
$dot_product += $_ * $_;
}
my($delta) = $dot_product==0 ? .1
: 1.0 + (int(abs($actual_value) / $dot_product));
$delta = -$delta if $desired_value < $Statistics::LTU::LTU_THRESHOLD;
##
## Now, adjust the weights. The last weight is handled differently,
## because it is always applied to the constant 1.
##
my($weight) = $self->[$WEIGHTi];
my($attribute) = $self->[$ATTRIBUTEi];
my($i);
for $i (0 .. $length-1) {
$weight->[$i] += $delta * $attribute->[$i];
}
$self->update_weight_min_max;
0;
}
###
### Recursive Least Squares linear threshold units
###
package Statistics::LTU::RLS;
use Statistics::LTU;
use Carp;
@Statistics::LTU::RLS::Inherit::ISA = @Statistics::LTU::RLS::ISA =
qw( Statistics::LTU );
sub new {
my($type, $nvars, $scaling) = @_;
# I don't know how to get inheritance to work automatically here
my($self) = new Statistics::LTU($nvars, $scaling);
my($length) = $nvars + 1;
##
## Allocate temporary space for the RLS rule.
##
my($p) = [ (0.0) x ($length * $length) ];
$self->[$RLS_Pi] = $p;
$self->[$RLS_TMP_1i] = [ (0.0) x $length ];
$self->[$RLS_TMP_2i] = [ (0.0) x $length ];
## [Used to be rls_init_p]
## Initialize the p matrix to an arbitrarily large value along the
## diagonal, and zero everywhere else. Suggested in the algorithm, p 27.
##
## Young says that "large diagonal elements [for the p matrix] (say 10^6
## in general) will yield convergence and performance comensurate with the
## stage-wise solution of the same problem" (p 27). I have found that
## values larger than 10^6 sometimes converge to more accurate weights;
## however, values that are too large (e.g. 2.0 * 10^15) produce less
## accurate weights. The value defined below is intended to be as large
## as is possible without adversely affecting performance. It was
## determined empirically.
##
my($DIAGONAL_VALUE) = 10.0e+6;
my($i, $j);
for $i (0 .. $length-1) {
$p->[$i * $length + $i] = $DIAGONAL_VALUE;
for $j ($i+1 .. $length-1) {
$p->[$i * $length + $j] = 0.0;
$p->[$j * $length + $i] = 0.0;
}
}
bless $self;
}
sub copy {
my($self) = @_;
my($new) = $self->Statistics::LTU::RLS::Inherit::copy($self);
##
## Copy the RLS data. (The tmp_1 and tmp_2 vectors
## aren't copied because their contents are temporary.)
##
$new->[$RLS_Pi] = [@{$self->[$RLS_Pi]}];
bless $new;
}
##
## TRAIN trains the specified linear threshold unit on a particular
## instance_vector. The training rule is taken from Young's "Recursive
## Estimation and Time-Series Analysis" book, pp26-27.
## Return value is undefined.
sub train {
my($self, $instance_vector, $desired_value) = @_;
##
## First, translate and scale the attributes. The scale factor can only
## be changed when the weights are being changed, because a change to the
## scale factor invalidates the current set of weights.
##
$self->maintain_scaling_factors($instance_vector);
$self->_scale_attributes($instance_vector);
##
## Now, update the weights. This is done in two parts,
## updating the P matrix and A vector, respectively.
##
$self->rls_update_p;
$self->rls_update_a($desired_value);
$self->update_weight_min_max;
undef;
}
##
## RLS_UPDATE_P implements equation II(1) on p26. This is the first half of
## the RLS algorithm.
##
## The terminology matches what is in the book. The algorithm computes
## new values for array p at time k, based upon the vector x at time k and
## the array p at time k-1.
##
## The temporary arrays pkm1_xk and xkT_pkm1 are provided by the caller, to
## eliminate the overhead of creating and destroying temporary arrays.
##
sub rls_update_p {
my($self) = @_;
my($p) = $self->[$RLS_Pi];
my($length) = $self->[$LENGTHi];
my($xk) = $self->[$ATTRIBUTEi];
my($pkm1_xk) = $self->[$RLS_TMP_1i];
my($xkT_pkm1) = $self->[$RLS_TMP_2i];
my($i, $j);
##
## Multiply pkm1 (the array p at time k-1) by xk (x at time k).
##
for ($i=0; $i<$length; $i++) {
$pkm1_xk->[$i] = 0.0;
for ($j=0; $j<$length; $j++) {
$pkm1_xk->[$i] += $p->[$i * $length + $j] * $xk->[$j];
}
}
##
## Get the scalar value [1 + xkT_pkm1_xk] ^ -1. Call it scalar_value.
## This can't be done until after pkm1_xk has been computed.
##
my($scalar_value) = 1.0;
for ($i=0; $i<$length; $i++) {
$scalar_value += $pkm1_xk->[$i] * $xk->[$i];
}
$scalar_value = 1.0 / $scalar_value;
##
## Fold the scalar_value into pkm1_xk. This is more efficient than doing
## it later, because pkm1_xk is an nx1 array and later arrays will be nxn.
##
for $i (0 .. $length-1) {
$pkm1_xk->[$i] *= $scalar_value;
}
##
## Multiply xkT (x at time k, transposed) by pkm1 (the array p
## at time k-1).
##
for $i (0 .. $length-1) {
$xkT_pkm1->[$i] = 0.0;
for $j (0 .. $length-1) {
$xkT_pkm1->[$i] += $xk->[$j] * $p->[$j * $length + $i];
}
}
##
## Multiply pkm1_xk by xkT_pkm1. The result is used to update p, so it
## does not need to be stored explicitly.
##
for $i (0 .. $length-1) {
for $j (0 .. $length-1) {
$p->[$i * $length + $j] -= $pkm1_xk->[$i] * $xkT_pkm1->[$j];
}
}
}
##
## RLS_UPDATE_A implements equation II(2) on p26. This is the second half of
## the RLS algorithm.
##
sub rls_update_a {
my($self, $yk) = @_;
my($a) = $self->[$WEIGHTi];
my($p) = $self->[$RLS_Pi];
my($length) = $self->[$LENGTHi];
my($xk) = $self->[$ATTRIBUTEi];
##
## Multiply xkT (x at time k, transposed) by akm1 (a at time k-1). The
## result is a scalar value. Subtract yk (y at time k).
##
my($scalar_value) = 0.0 - $yk;
my($i, $j);
for ($i=0 ; $i<$length ; $i++) {
$scalar_value += $xk->[$i] * $a->[$i];
}
##
## Multiply pk (p at time k) by xk (x at time k). This is what the
## algorithm calls Kk. Matrix Kk is used to update a, so it does not
## need to be stored explicitly.
##
my($sum_of_products);
for $i (0 .. $length-1) {
$sum_of_products = 0.0;
for $j (0 .. $length-1) {
$sum_of_products += $p->[$i * $length + $j] * $xk->[$j];
}
$a->[$i] -= $sum_of_products * $scalar_value;
}
}
##
## RESTORE restores a linear threshold unit from the specified file.
##
sub restore {
my($class, $file_name) = @_;
if (!open(FILE, "<$file_name")) {
carp("restore: $file_name: $!\n");
return(0);
}
##
## Check that the LTU is consistent with this version.
##
my($ltu_version);
$ltu_version = <FILE>;
chop($ltu_version);
if ($ltu_version < $read_LTU_files_back_to) {
carp "restore: $file_name written with LTU version $ltu_version.\n";
carp "This is version $Statistics::LTU::VERSION, which only understands\n";
carp "LTU files back to version $read_LTU_files_back_to\n";
carp "LTU not restored!\n";
return(0);
}
## First, read the type, the number of weights, and whether or not scaling
## is enabled. Then, create a linear threshold unit. Note that the
## number of variables in the training instances is one less than the
## number of weights (one weight is a constant).
##
my($line);
$line = <FILE>;
chop($line);
my($type, $length, $scaling, $cycles_since_weight_change, $origin) =
split(/ /, $line);
my($new) = $type->new($length-1, $scaling);
$new->[$CYCLES_SINCE_WEIGHT_CHANGEi] = $cycles_since_weight_change;
$new->[$ORIGIN_RESTRICTIONi] = $origin;
##
## Now, read the weights, the minimum value of the attributes, and the
## maximum value of the attributes, from the file, and store them in the
## linear threshold unit.
my($new_weight) = $new->[$WEIGHTi];
my($new_weight_min) = $new->[$WEIGHT_MINi];
my($new_weight_max) = $new->[$WEIGHT_MAXi];
my($new_attrib_min) = $new->[$ATTRIB_MINi];
my($new_attrib_max) = $new->[$ATTRIB_MAXi];
my($new_attrib_scale) = $new->[$ATTRIB_SCALEi];
my($i, @fields);
for $i (0 .. $length-1) {
$line = <FILE>; chop($line);
@fields = split(/ /, $line);
($new_weight->[$i], $new_weight_min->[$i], $new_weight_max->[$i]) =
@fields;
if ($scaling == $LTU_SCALING_ON) {
($new_attrib_min->[$i], $new_attrib_max->[$i]) = @fields[3,4];
if ($new_attrib_max->[$i] > $new_attrib_min->[$i]) {
$new_attrib_scale->[$i] = 1.0 /
($new_attrib_max->[$i] - $new_attrib_min->[$i]);
}
}
}
##### Pick up final theta value
$line = <FILE>; chop($line);
$new->[$WEIGHTi]->[$length-1] = $line;
##
## Read information specific to RLS LTU's.
##
$line = <FILE>; chop($line);
$new->[$RLS_Pi] = [(split(/ /, $line))];
close(FILE);
$new;
}
##
## SAVE saves the linear threshold unit in the specified file.
## If the file can't be created for some reason, 0 is returned. Otherwise, 1
## is returned.
##
sub save {
my($self, $file_name) = @_;
my($length) = $self->[$LENGTHi];
if (!open(FILE, ">$file_name")) {
carp("Statistics::LTU::save: $file_name: $!");
return(0);
}
##
## Stamp the file with a version number.
##
print FILE $Statistics::LTU::VERSION, "\n";
## First, print the type, the number of weights and whether or not scaling
## is enabled. Then, print the weights, the minimum value of the
## attribute, and the maximum value of the attribute. Print 1
## attribute/line, just to make it easier to read.
print FILE join(' ', ((ref $self),
$self->[$LENGTHi],
$self->[$SCALINGi],
$self->[$CYCLES_SINCE_WEIGHT_CHANGEi],
$self->[$ORIGIN_RESTRICTIONi]
)),
"\n";
my($i);
for $i (0 .. $length-1) {
printf FILE "%lf %lf %lf ",
$self->[$WEIGHTi]->[$i],
$self->[$WEIGHT_MINi]->[$i],
$self->[$WEIGHT_MAXi]->[$i];
if ($self->[$SCALINGi] == $LTU_SCALING_ON) {
printf FILE "%lf %lf ",
$self->[$ATTRIB_MINi]->[$i],
$self->[$ATTRIB_MAXi]->[$i];
}
print FILE "\n";
}
printf FILE "%lf\n", $self->[$WEIGHTi]->[$length-1];
##
## Write out information specific to RLS LTU's.
##
print FILE join(' ', @{$self->[$RLS_Pi]}), "\n";
close(FILE);
1;
}
package Statistics::LTU::LMS;
use Statistics::LTU;
@Statistics::LTU::LMS::ISA = qw( Statistics::LTU );
##
## TRAIN trains the specified linear threshold unit on a particular
## instance_vector. It returns 1 if the linear threshold unit already
## classified the instance_vector correctly, otherwise it returns 0.
## The training rule is the least-mean-square (LMS) rule, devised by
## Widrow and Hoff in 1960. The advantage of the LMS rule over absolute
## correction or fixed increment is that it tends to minimize the
## mean-squared error, even when the classes are not linearly separable.
## See the AI Handbook or Duda & Hart for more information.
##
sub train {
my($self, $instance_vector, $desired_value, $rho) = @_;
my($length) = $self->[$LENGTHi];
my($weight) = $self->[$WEIGHTi];
my($attribute) = $self->[$ATTRIBUTEi];
##
## Make sure that rho makes sense. If it doesn't, default to 0.2. This
## number was chosen empirically, on the basis of limited experimentation.
##
$rho = 0.2 if !defined($rho) or $rho <= 0.0;
##
## Only train the linear threshold unit if it does not classify correctly.
##
my($actual_value) = $self->test($instance_vector);
return(1) if ((($actual_value < $Statistics::LTU::LTU_THRESHOLD) &&
($desired_value < $Statistics::LTU::LTU_THRESHOLD)) ||
(($actual_value >= $Statistics::LTU::LTU_THRESHOLD) &&
($desired_value >= $Statistics::LTU::LTU_THRESHOLD)));
## The scale factor can only be changed when the weights are being
## changed, because a change to the scale factor invalidates the current
## set of weights.
$self->maintain_scaling_factors($instance_vector);
$self->_scale_attributes($instance_vector);
##
## Decide how much to adjust the weights by. If the actual_value is 0,
## then the least-mean square rule won't change the weights (it multiplies
## by 0). Therefore, use the fixed-increment rule (which is slower, but
## also guaranteed to converge, if convergence is possible) when the
## actual value is 0. As far as I know, this modification of the LMS rule
## is original (and harmless).
##
my($delta) =
($actual_value == 0.0) ?
$rho * $desired_value
: $rho * (- $actual_value);
##
## Now, adjust the weights. The last weight is handled differently,
## because it is always applied to the constant 1.
##
my($i);
for $i (0 .. $length-1) {
$weight->[$i] += $delta * $attribute->[$i];
}
$self->update_weight_min_max;
0;
}
package Statistics::LTU::TACR;
use Statistics::LTU;
@Statistics::LTU::TACR::ISA = qw( Statistics::LTU );
##
## TRAIN trains the specified linear threshold unit on a particular
## instance_vector. It returns 1 if the linear threshold unit already
## classified the instance_vector correctly, otherwise it returns 0.
## The training rule is the Thermal Absolute Correction Rule, taken from
## Frean's "Learning in Single Perceptrons" dissertation.
##
sub train {
my($self, $instance_vector, $desired_value, $temp, $rate) = @_;
my($length) = $self->[$LENGTHi];
##
## Only train the linear threshold unit if it does not classify correctly.
##
my($actual_value) = $self->test($instance_vector);
return(1) if ((($actual_value < $Statistics::LTU::LTU_THRESHOLD) &&
($desired_value < $Statistics::LTU::LTU_THRESHOLD)) ||
(($actual_value >= $Statistics::LTU::LTU_THRESHOLD) &&
($desired_value >= $Statistics::LTU::LTU_THRESHOLD)));
##
## If either temp or rate is 0, then no weight adjustment takes place.
## This shouldn't occur, but if it does, handle it easily and quickly.
## Note that not handling it allows a divide by zero later.
##
return(0) if ($temp <= 0) or ($rate <= 0);
##
## The scale factor can only be changed when the weights are being
## changed, because a change to the scale factor invalidates the current
## set of weights.
##
$self->maintain_scaling_factors($instance_vector);
$self->_scale_attributes($instance_vector);
##
## Decide how much to adjust the weights by. The absolute correction rule
## (which provides the fastest learning) requires the dot product of the
## instance vector, so do that first.
##
my($attribute) = $self->[$ATTRIBUTEi];
my($dot_product) = 0.0;
my($i);
for $i (0 .. $length-1) {
$dot_product += ($attribute->[$i] * $attribute->[$i]);
}
my($delta) = 1.0 + int(abs($actual_value) / $dot_product);
$delta = -$delta if $desired_value < $Statistics::LTU::LTU_THRESHOLD;
##
## This is the thermal part of the rule.
##
$delta *= $rate * exp((- abs($actual_value) / $temp));
##
## Now, adjust the weights. The last weight is handled differently,
## because it is always applied to the constant 1.
##
my($weight) = $self->[$WEIGHTi];
for $i (0 .. $length-1) {
$weight->[$i] += ($delta * $attribute->[$i]);
}
$self->update_weight_min_max;
0;
}
{ package main; eval join('',<DATA>) || die $@ unless caller(); }
1;##### End of LTU.pm
__END__
#
# Test code
#
package main;
use Statistics::LTU;
srand;
@LTUs = (new Statistics::LTU::ACR(2, 1),
new Statistics::LTU::RLS(2, 1),
new Statistics::LTU::TACR(2, 1),
new Statistics::LTU::LMS(2, 1)
);
# Create examples
my($x, $y, $class);
for (1 .. 20) {
$x = rand; $y = rand;
$class = (($x-.5) > $y) ? 1 : -1;
push(@::Examples, [[$x,$y],$class]);
}
my($ltu, $save_name, $ltu_restored, $ltu_copied, $tolerance);
$tolerance = 0.0001;
my(@OtherArgs);
my($temp, $rate, $rho);
$temp = 0.1;
$rate = 0.01;
$rho = 0.1;
foreach $ltu (@::LTUs) {
$ltu->set_origin_restriction(0);
if (ref($ltu) =~ /TACR/) {
@OtherArgs = ($temp, $rate);
} elsif (ref($ltu) =~ /LMS/) {
@OtherArgs = ($rho);
} else {
@OtherArgs = ();
}
my($example);
foreach $example (@::Examples) {
$ltu->maintain_scaling_factors($example->[0]);
}
for (1 .. 10) {
print "\n\nITERATION $_\n";
foreach $example (@::Examples) {
$ltu->train($example->[0], $example->[1], @OtherArgs);
}
$ltu->print;
}
$save_name = ref($ltu) . ".saved";
$ltu->save($save_name);
$ltu_restored = $ltu->restore($save_name);
$ltu_copied = $ltu->copy;
foreach $example (@::Examples) {
if (abs($ltu->test($example->[0]) - $ltu_restored->test($example->[0]))
> $tolerance) {
warn "Original and restored LTUs disagree!";
$ltu->print;
$ltu_restored->print;
die "SAVE/RESTORE TEST FAILED!";
}
if (abs($ltu->test($example->[0]) - $ltu_copied->test($example->[0]))
> $tolerance) {
warn "Original and copied LTUs disagree!";
$ltu->print;
$ltu_copied->print;
die "COPY TEST FAILED!";
}
}
$ltu->destroy;
$ltu_restored->destroy;
$ltu_copied->destroy;
}
print "Tests passed\n";
1;