The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w

use lib '../blib/lib', '../blib/arch';


##   evaluate_training_data2.pl

##  This script is for testing the class discriminatory
##  power of the training data contained in the training
##  files `training.csv', `training2.csv', and
##  `training3.csv'.

##  The three training files mentioned above contain two
##  Gaussian classes with increasing degrees of overlap
##  between them.

##  Through the class EvalTrainingData as shown below, this
##  script runs a 10-fold cross-validation test on the
##  training data.  This test divides all of the training
##  data into ten parts, with nine parts used for training a
##  decision tree and one part used for testing its ability
##  to classify correctly. This selection of nine parts for
##  training and one part for testing is carried out in all
##  of the ten different possible ways.

##  A script like this can also be used to test the
##  appropriateness of your choices for the constructor
##  parameters entropy_threshold, max_depth_desired, and
##  symbolic_to_numeric_cardinality_threshold.

use strict;
use Algorithm::DecisionTree;

my $training_datafile = "training3.csv";

my $eval_data = EvalTrainingData->new( 
                              training_datafile => $training_datafile,
                              csv_class_column_index => 1,
                              csv_columns_for_features => [2,3],
                              entropy_threshold => 0.01,
                              max_depth_desired => 3,
                              symbolic_to_numeric_cardinality_threshold => 10,
                );
$eval_data->get_training_data();
$eval_data->evaluate_training_data()