The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
META.json 11
META.yml 11
examples/README 11
examples/cluster_and_visualize.pl 28
examples/find_best_K_and_cluster.pl 56
examples/which_cluster_for_new_data.pl 11
lib/Algorithm/KMeans.pm 79
7 files changed (This is a version diff) 1827
@@ -39,5 +39,5 @@
       }
    },
    "release_status" : "stable",
-   "version" : "2.02"
+   "version" : "2.03"
 }
@@ -21,4 +21,4 @@ requires:
   Graphics::GnuplotIF: 1.6
   Math::GSL: 0.32
   Math::Random: 0.71
-version: 2.02
+version: 2.03
@@ -6,7 +6,7 @@ Examples directory:
 
         cluster_and_visualize.pl
 
-    The more time you spend with this script the more comfortable you will
+    The more time you spend with this script, the more comfortable you will
     become with the use of this module. The script file contains a large
     comment block that talks about the SIX LOCATIONS in the script where
     you have to make decisions about how to use the module.
@@ -15,9 +15,14 @@
 ##
 ##      1) First choose which data file you want to use for clustering
 ##
+##
 ##      2) Next, choose the data mask to apply to the columns of the data file.  The
 ##           position of the letter `N' in the mast indicates the column that
-##           contains a symbolic name for each data record.
+##           contains a symbolic name for each data record.  If the symbolic name for
+##           each data record is in the first column and you want to cluster 3D data
+##           that is in the next three columns, your data mask will be N111.  On the
+##           other hand, if for the same data file, you want to carry out 2D
+##           clustering on the last two columns, your data mask will be N011.
 ##
 ##      3) Next, you need to decide how many clusters you want the program to return.
 ##           If you want the program to figure out on its own how many clusters to 
@@ -50,7 +55,8 @@ my $datafile = "mydatafile2.dat";          # contains 2 well separated clusters,
 #my $datafile = "mydatafile3.dat";         # contains 2 clusters, 2D data
 
 
-# Mask:
+# Mask: (For emphasis, this is a slightly more detailed repetition of the comment
+# made above in Item 2)
 
 # The mask tells the module which columns of the data file are are to be used for
 # clustering, which columns are to be ignored, and which column contains a symbolic
@@ -36,11 +36,12 @@ my $datafile = "mydatafile2.dat";                   # contains 2 clusters, 3D da
 
 # The mask tells the module which columns of the data file are are to be used for
 # clustering, which columns are to be ignored and which column contains the symbolic
-# ID tag for a data point.  If the ID is in column 1 and you are clustering 3D data,
-# the mast would be "N111".  Note the first character in the mask in this case is `N'
-# for "Name".  If, on the other hand, you wanted to ignore the first data coordinate
-# for clustering, the mask would be "N011".  The symbolic ID can be in any column ---
-# you just have to place the character `N' at the right place:
+# ID tag for a data point.  If the ID is in the first column and you are clustering
+# 3D data in the next three columns, the mask would be "N111".  Note the first
+# character in the mask in this case is `N' for "Name".  If, on the other hand, you
+# wanted to ignore the first data coordinate for clustering, the mask would be
+# "N011".  The symbolic ID can be in any column --- you just have to place the
+# character `N' at the right place:
 
 my $mask = "N111";
 #my $mask = "N11";
@@ -1,6 +1,6 @@
 #!/usr/bin/perl -w
 
-use lib '../blib/lib', '../blib/arch';
+#use lib '../blib/lib', '../blib/arch';
 
 
 ##  which_cluster_for_new_data.pl
@@ -18,7 +18,7 @@ use Graphics::GnuplotIF;
 use Math::GSL::Matrix;
 
 
-our $VERSION = '2.02';
+our $VERSION = '2.03';
 
 # from Perl docs:
 my $_num_regex =  '^[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?$'; 
@@ -1443,6 +1443,7 @@ sub get_index_at_value {
     foreach my $i (0..@array-1) {
         return $i if $value == $array[$i];
     }
+    return -1;
 }
 
 # This routine is really not necessary in light of the new `~~' operator in Perl.
@@ -1583,11 +1584,9 @@ sub fisher_yates_shuffle {
 }
 
 sub variance_normalization {
-    print "Normalizing data with respect to variances\n";
     my %data_hash = %{shift @_};
     my @all_data_points = values %data_hash;
     my $dimensions = @{$all_data_points[0]};
-
     my @data_projections;
     foreach my $data_point (@all_data_points) {
         my $i = 0;
@@ -1945,6 +1944,9 @@ Algorithm::KMeans - for clustering multidimensional data
 
 =head1 CHANGES
 
+Version 2.03 incorporates minor code cleanup.  The main implementation of the module
+remains unchanged.
+
 Version 2.02 downshifts the version of Perl that is required for this module.  The
 module should work with versions 5.10 and higher of Perl.  The implementation code
 for the module remains unchanged.
@@ -2148,7 +2150,7 @@ separation between the means than a consequence of the intra-cluster variability
 
 =back
 
-=head2 Constructor Parameters:
+=head2 Constructor Parameters
 
 =over 8
 
@@ -2464,10 +2466,10 @@ the string 'KMeans' in the subject line.
 Download the archive from CPAN in any directory of your choice.  Unpack the archive
 with a command that on a Linux machine would look like:
 
-    tar zxvf Algorithm-KMeans-2.02.tar.gz
+    tar zxvf Algorithm-KMeans-2.03.tar.gz
 
 This will create an installation directory for you whose name will be
-C<Algorithm-KMeans-2.02>.  Enter this directory and execute the following commands
+C<Algorithm-KMeans-2.03>.  Enter this directory and execute the following commands
 for a standard install of the module if you have root privileges:
 
     perl Makefile.PL
@@ -2475,7 +2477,7 @@ for a standard install of the module if you have root privileges:
     make test
     sudo make install
 
-if you do not have root privileges, you can carry out a non-standard install the
+If you do not have root privileges, you can carry out a non-standard install the
 module in any directory of your choice by:
 
     perl Makefile.PL prefix=/some/other/directory/