StoppingCriteria.cpp
Go to the documentation of this file.
1 #include <shark/Data/Csv.h>
2 #include <shark/Models/LinearModel.h>//single dense layer
3 #include <shark/Models/ConcatenatedModel.h>//for stacking layers to a feed forward neural network
4 #include <shark/Algorithms/GradientDescent/Rprop.h> //Optimization algorithm
5 #include <shark/ObjectiveFunctions/Loss/CrossEntropy.h> //Loss used for training
6 #include <shark/ObjectiveFunctions/Loss/ZeroOneLoss.h> //The real loss for testing.
7 #include <shark/Algorithms/Trainers/OptimizationTrainer.h> // Trainer wrapping iterative optimization
8 #include <shark/Algorithms/StoppingCriteria/MaxIterations.h> //A simple stopping criterion that stops after a fixed number of iterations
9 #include <shark/Algorithms/StoppingCriteria/TrainingError.h> //Stops when the algorithm seems to converge
10 #include <shark/Algorithms/StoppingCriteria/GeneralizationQuotient.h> //Uses the validation error to track the progress
11 #include <shark/Algorithms/StoppingCriteria/ValidatedStoppingCriterion.h> //Adds the validation error to the value of the point
12 
13 #include <iostream>
14 
15 using namespace shark;
16 using namespace std;
17 
18 //this program demonstrates the effect of different stopping criteria on the performance of a neural network.
19 template<class T>
20 double experiment(
22  AbstractStoppingCriterion<T> & stoppingCriterion,
23  ClassificationDataset const& trainingset,
24  ClassificationDataset const& testset
25 ){
26  initRandomUniform(network,-0.1,0.1);
27 
28  //The Cross Entropy maximises the activation of the cth output neuron
29  // compared to all other outputs for a sample with class c.
30  CrossEntropy loss;
31 
32  //we use IRpropPlus for network optimization
33  IRpropPlus optimizer;
34 
35  //create an optimization trainer and train the model
36  OptimizationTrainer<AbstractModel<RealVector, RealVector>,unsigned int > trainer(&loss, &optimizer, &stoppingCriterion);
37  trainer.train(network, trainingset);
38 
39  //evaluate the performance on the test set using the classification loss we choose 0.5 as threshold since Logistic neurons have values between 0 and 1.
40 
42  Data<RealVector> predictions = network(testset.inputs());
43  return loss01(testset.labels(),predictions);
44 }
45 int main(){
46  //load the diabetes dataset shuffle its entries and split it in training, validation and test set.
48  importCSV(data, "data/diabetes.csv",LAST_COLUMN, ',');
49  data.shuffle();
50  ClassificationDataset test = splitAtElement(data,static_cast<std::size_t>(0.75*data.numberOfElements()));
51  ClassificationDataset validation = splitAtElement(data,static_cast<std::size_t>(0.66*data.numberOfElements()));
52 
55  ConcatenatedModel<RealVector> network = layer1 >> layer2;
56 
57  //simple stopping criterion which allows for n iterations (here n = 10,100,500)
58  MaxIterations<> maxIterations(10);
59  double resultMaxIterations1 = experiment(network, maxIterations,data,test);
60  maxIterations.setMaxIterations(100);
61  double resultMaxIterations2 = experiment(network, maxIterations,data,test);
62  maxIterations.setMaxIterations(500);
63  double resultMaxIterations3 = experiment(network, maxIterations,data,test);
64 
65  TrainingError<> trainingError(10,1.e-5);
66  double resultTrainingError = experiment(network, trainingError,data,test);
67 
68  //for the validated stopping criteria we need to define an error function using the validation set
69  CrossEntropy loss;
70  ErrorFunction validationFunction(validation,&network,&loss);
71  GeneralizationQuotient<> generalizationQuotient(10,0.1);
72  ValidatedStoppingCriterion validatedLoss(&validationFunction,&generalizationQuotient);
73  double resultGeneralizationQuotient = experiment(network, validatedLoss,data,test);
74 
75  //print the results
76  cout << "RESULTS: " << endl;
77  cout << "======== \n" << endl;
78  cout << "10 iterations : " << resultMaxIterations1 << endl;
79  cout << "100 iterations : " << resultMaxIterations2 << endl;
80  cout << "500 iterations : " << resultMaxIterations3 << endl;
81  cout << "training Error : " << resultTrainingError << endl;
82  cout << "generalization Quotient : " << resultGeneralizationQuotient << endl;
83 }