The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl

##  randomized_trees_for_classifying_one_test_sample_1.pl

##  This script demonstrates using the RandomizedTreesForBigData class for
##  for solving a data classification problem when there is a significant
##  disparity between the populations of the training samples for the
##  the different classes.  You need to set the following two parameters
##  in the call to the constructor for the 'needle-in-a-haystack' logic
##  to work:
##
##              looking_for_needles_in_haystac
##              how_many_trees

use strict;
use warnings;
use Algorithm::RandomizedTreesForBigData;

##  NOTE: The databaes file mentioned below is proprietary and is NOT
##        included in the module package.
#my $training_datafile = "/home/kak/DecisionTree_data/AtRisk/AtRiskModel_File_modified.csv";
#my $training_datafile = "try_50.csv";
my $training_datafile = "try_rand_150.csv";

my $rt = Algorithm::RandomizedTreesForBigData->new(
                              training_datafile => $training_datafile,
                              csv_class_column_index => 48,
                              csv_columns_for_features => [24,32,33,34,41],
                              entropy_threshold => 0.01,
                              max_depth_desired => 8,
                              symbolic_to_numeric_cardinality_threshold => 10,
                              how_many_trees => 5,
                              looking_for_needles_in_haystack => 1,
         );

print "\nReading the training data ...\n";
$rt->get_training_data_for_N_trees();

##   UNCOMMENT the following statement if you want to see the training data used for each tree::
$rt->show_training_data_for_all_trees();


print "\nCalculating first order probabilities...\n";
$rt->calculate_first_order_probabilities();

print "\nCalculating class priors...\n";
$rt->calculate_class_priors();

print "\nConstructing all decision trees ....\n";
$rt->construct_all_decision_trees();

##   UNCOMMENT the following statement if you want to see all decision trees individually:
$rt->display_all_decision_trees();

print "\nReading the test sample....\n";
my $test_sample  = ['SATV = 110',
                    'SATM = 130',
                    'SATW = 180',
                    'HSGPA = 1.5'];

print "\nClassify the test sample with each decision tree....\n";
$rt->classify_with_all_trees( $test_sample );

##   COMMENT OUT the following statement if you do NOT want to see the classification results
##   produced by each tree separately:
$rt->display_classification_results_for_all_trees();

print "\n\nWill now calculate the majority decision from all trees:\n";
my $decision = $rt->get_majority_vote_classification();
print "\nMajority vote decision: $decision\n";