@@ -39,5 +39,5 @@
}
},
"release_status" : "stable",
- "version" : "2.02"
+ "version" : "2.03"
}
@@ -21,4 +21,4 @@ requires:
Graphics::GnuplotIF: 1.6
Math::GSL: 0.32
Math::Random: 0.71
-version: 2.02
+version: 2.03
@@ -6,7 +6,7 @@ Examples directory:
cluster_and_visualize.pl
- The more time you spend with this script the more comfortable you will
+ The more time you spend with this script, the more comfortable you will
become with the use of this module. The script file contains a large
comment block that talks about the SIX LOCATIONS in the script where
you have to make decisions about how to use the module.
@@ -15,9 +15,14 @@
##
## 1) First choose which data file you want to use for clustering
##
+##
## 2) Next, choose the data mask to apply to the columns of the data file. The
## position of the letter `N' in the mast indicates the column that
-## contains a symbolic name for each data record.
+## contains a symbolic name for each data record. If the symbolic name for
+## each data record is in the first column and you want to cluster 3D data
+## that is in the next three columns, your data mask will be N111. On the
+## other hand, if for the same data file, you want to carry out 2D
+## clustering on the last two columns, your data mask will be N011.
##
## 3) Next, you need to decide how many clusters you want the program to return.
## If you want the program to figure out on its own how many clusters to
@@ -50,7 +55,8 @@ my $datafile = "mydatafile2.dat"; # contains 2 well separated clusters,
#my $datafile = "mydatafile3.dat"; # contains 2 clusters, 2D data
-# Mask:
+# Mask: (For emphasis, this is a slightly more detailed repetition of the comment
+# made above in Item 2)
# The mask tells the module which columns of the data file are are to be used for
# clustering, which columns are to be ignored, and which column contains a symbolic
@@ -36,11 +36,12 @@ my $datafile = "mydatafile2.dat"; # contains 2 clusters, 3D da
# The mask tells the module which columns of the data file are are to be used for
# clustering, which columns are to be ignored and which column contains the symbolic
-# ID tag for a data point. If the ID is in column 1 and you are clustering 3D data,
-# the mast would be "N111". Note the first character in the mask in this case is `N'
-# for "Name". If, on the other hand, you wanted to ignore the first data coordinate
-# for clustering, the mask would be "N011". The symbolic ID can be in any column ---
-# you just have to place the character `N' at the right place:
+# ID tag for a data point. If the ID is in the first column and you are clustering
+# 3D data in the next three columns, the mask would be "N111". Note the first
+# character in the mask in this case is `N' for "Name". If, on the other hand, you
+# wanted to ignore the first data coordinate for clustering, the mask would be
+# "N011". The symbolic ID can be in any column --- you just have to place the
+# character `N' at the right place:
my $mask = "N111";
#my $mask = "N11";
@@ -1,6 +1,6 @@
#!/usr/bin/perl -w
-use lib '../blib/lib', '../blib/arch';
+#use lib '../blib/lib', '../blib/arch';
## which_cluster_for_new_data.pl
@@ -18,7 +18,7 @@ use Graphics::GnuplotIF;
use Math::GSL::Matrix;
-our $VERSION = '2.02';
+our $VERSION = '2.03';
# from Perl docs:
my $_num_regex = '^[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?$';
@@ -1443,6 +1443,7 @@ sub get_index_at_value {
foreach my $i (0..@array-1) {
return $i if $value == $array[$i];
}
+ return -1;
}
# This routine is really not necessary in light of the new `~~' operator in Perl.
@@ -1583,11 +1584,9 @@ sub fisher_yates_shuffle {
}
sub variance_normalization {
- print "Normalizing data with respect to variances\n";
my %data_hash = %{shift @_};
my @all_data_points = values %data_hash;
my $dimensions = @{$all_data_points[0]};
-
my @data_projections;
foreach my $data_point (@all_data_points) {
my $i = 0;
@@ -1945,6 +1944,9 @@ Algorithm::KMeans - for clustering multidimensional data
=head1 CHANGES
+Version 2.03 incorporates minor code cleanup. The main implementation of the module
+remains unchanged.
+
Version 2.02 downshifts the version of Perl that is required for this module. The
module should work with versions 5.10 and higher of Perl. The implementation code
for the module remains unchanged.
@@ -2148,7 +2150,7 @@ separation between the means than a consequence of the intra-cluster variability
=back
-=head2 Constructor Parameters:
+=head2 Constructor Parameters
=over 8
@@ -2464,10 +2466,10 @@ the string 'KMeans' in the subject line.
Download the archive from CPAN in any directory of your choice. Unpack the archive
with a command that on a Linux machine would look like:
- tar zxvf Algorithm-KMeans-2.02.tar.gz
+ tar zxvf Algorithm-KMeans-2.03.tar.gz
This will create an installation directory for you whose name will be
-C<Algorithm-KMeans-2.02>. Enter this directory and execute the following commands
+C<Algorithm-KMeans-2.03>. Enter this directory and execute the following commands
for a standard install of the module if you have root privileges:
perl Makefile.PL
@@ -2475,7 +2477,7 @@ for a standard install of the module if you have root privileges:
make test
sudo make install
-if you do not have root privileges, you can carry out a non-standard install the
+If you do not have root privileges, you can carry out a non-standard install the
module in any directory of your choice by:
perl Makefile.PL prefix=/some/other/directory/