CSvmMaxLikelihoodMS.cpp
Go to the documentation of this file.
1 #include <shark/Data/Dataset.h>
12 
13 using namespace std;
14 using namespace shark;
15 
16 
17 
18 // define the basic dimensionality of the problem
19 unsigned int useful_dim = 5;
20 unsigned int noise_dim = 5;
21 unsigned int total_dim = useful_dim + noise_dim;
22 
23 RealVector run_one_trial( bool verbose) {
24 
25  // set up the classification problem from a DataDistribution
26  PamiToy problem( useful_dim, noise_dim );
27 
28  // construct training and test sets from the problem distribution
29  unsigned int train_size = 500;
30  unsigned int test_size = 5000;
31  ClassificationDataset train = problem.generateDataset( train_size );
32  ClassificationDataset test = problem.generateDataset( test_size );
33 
34  // normalize data as usual
35  Normalizer<> normalizer;
36  NormalizeComponentsUnitVariance<> normalizationTrainer(false);
37  normalizationTrainer.train( normalizer, train.inputs() );
38  train = transformInputs( train, normalizer );
39  test = transformInputs( test, normalizer );
40 
41  // set up the ArdKernel
42  DenseARDKernel kernel( total_dim, 0.1 ); //for now with arbitrary value for gamma (gets properly initialized later)
43 
44  // set up partitions for cross-validation
45  unsigned int num_folds = 5;
46  CVFolds<ClassificationDataset> cv_folds = createCVIID( train, num_folds );
47 
48  // set up the learning machine
49  bool log_enc_c = true; //use log encoding for the regularization parameter C
50  QpStoppingCondition stop(1e-12); //use a very conservative stopping criterion for the individual SVM runs
51  SvmLogisticInterpretation<> mlms( cv_folds, &kernel, log_enc_c, &stop ); //the main class for this tutorial
52  //SvmLogisticInterpretation<> mlms( cv_folds, &kernel, log_enc_c ); //also possible without stopping criterion
53 
54  // set up a starting point for the optimization process
55  RealVector start( total_dim+1 );
56  if ( log_enc_c ) start( total_dim ) = 0.0; else start( total_dim ) = 1.0; //start at C = 1.0
57  for ( unsigned int k=0; k<total_dim; k++ )
58  start(k) = 0.5 / total_dim;
59 
60  // for illustration purposes, we also evalute the model selection criterion a single time at the starting point
61  double start_value = mlms.eval( start );
62 
63  if ( verbose ) {
64  std::cout << "Value of model selection criterion at starting point: " << start_value << std::endl << std::endl;
65  std::cout << " -------------------------------------------------------------------------------- " << std::endl;
66  std::cout << " ----------- Beginning gradient-based optimization of MLMS criterion ------------ " << std::endl;
67  std::cout << " -------------------------------------------------------------------------------- " << std::endl << std::endl;
68  }
69 
70  // set up the optimizer
71  IRpropPlus rprop;
72  double stepsize = 0.1;
73  double stop_delta = 1e-3;
74  mlms.init();
75  rprop.init( mlms, start, stepsize );
76  unsigned int its = 50;
77 
78  // start the optimization loop
79  for (unsigned int i=0; i<its; i++) {
80  rprop.step( mlms );
81  if ( verbose )
82  std::cout << "iteration " << i << ": current NCLL = " << rprop.solution().value << " at parameter: " << rprop.solution().point << std::endl;
83  if ( rprop.maxDelta() < stop_delta ) {
84  if ( verbose ) std::cout << " Rprop quit pecause of small progress " << std::endl;
85  break;
86  }
87  }
88 
89  if ( verbose ) {
90  std::cout << std::endl;
91  std::cout << " -------------------------------------------------------------------------------- " << std::endl;
92  std::cout << " ----------- Done with gradient-based optimization of MLMS criterion ------------ " << std::endl;
93  std::cout << " -------------------------------------------------------------------------------- " << std::endl << std::endl;
94  }
95  if ( verbose ) std::cout << std::endl << std::endl << " EVALUATION of hyperparameters found:" << std::endl << std::endl << std::endl;
96 
97  double C_reg; //will hold regularization parameter
98  double test_error_v1, train_error_v1; //will hold errors determined via method 1
99  double test_error_v2, train_error_v2; //will hold errors determined via method 2
100 
101  // BEGIN POSSIBILITY ONE OF HYPERPARAMETER COPY
102  if ( verbose ) std::cout << std::endl << " Possibility 1: copy kernel parameters via eval() and C by hand..." << std::endl << std::endl;
103 
104  // copy final parameters, variant one
105  double end_value = mlms.eval( rprop.solution().point ); //this at the same time copies the most recent parameters from rprop to the kernel.
106  C_reg = ( log_enc_c ? exp( rprop.solution().point(total_dim) ) : rprop.solution().point(total_dim) ); //ATTENTION: mind the encoding
107 
108  if ( verbose ) {
109  std::cout << " Value of model selection criterion at final point: " << end_value << std::endl;
110  std::cout << " Done optimizing the SVM hyperparameters. The final parameters (true/unencoded) are:" << std::endl << std::endl;
111  std::cout << " C = " << C_reg << std::endl;
112  for ( unsigned int i=0; i<total_dim; i++ )
113  std::cout << " gamma(" << i << ") = " << kernel.parameterVector()(i)*kernel.parameterVector()(i) << std::endl;
114  std::cout << std::endl << " (as also given by kernel.gammaVector() : " << kernel.gammaVector() << " ) " << std::endl;
115  }
116 
117  // construct and train the final learner
119  CSvmTrainer<RealVector> trainer_v1( &kernel, C_reg, true, log_enc_c ); //encoding does not really matter in this case b/c it does not affect the ctor
120  if ( verbose ) {
121  std::cout << std::endl << std::endl << " Used mlms.eval(...) to copy kernel.parameterVector() " << kernel.parameterVector() << std::endl;
122  std::cout << " into trainer_v1.parameterVector() " << trainer_v1.parameterVector() << std::endl;
123  std::cout << " , where C (the last parameter) was set manually to " << trainer_v1.C() << std::endl << std::endl << std::endl;
124  }
125  trainer_v1.train( svm_v1, train ); //the kernel has the right parameters, and we copied C, so we are good to go
126 
127  // evaluate the final trained classifier on training and test set
129  Data<unsigned int> output_v1; //real-valued output
130  output_v1 = svm_v1( train.inputs() );
131  train_error_v1 = loss_v1.eval( train.labels(), output_v1 );
132  output_v1 = svm_v1( test.inputs() );
133  test_error_v1 = loss_v1.eval( test.labels(), output_v1 );
134  if ( verbose ) {
135  std::cout << " training error via possibility 1: " << train_error_v1 << std::endl;
136  std::cout << " test error via possibility 1: " << test_error_v1 << std::endl << std::endl << std::endl;
137  }
138  // END POSSIBILITY ONE OF HYPERPARAMETER COPY
139 
140  // BEGIN POSSIBILITY TWO OF HYPERPARAMETER COPY
141  if ( verbose ) std::cout << std::endl << " Possibility 2: copy best parameters via solution().point()..." << std::endl << std::endl;
142 
144  CSvmTrainer<RealVector> trainer_v2( &kernel, 0.1, true, log_enc_c ); //ATTENTION: must be constructed with same log-encoding preference
145  trainer_v2.setParameterVector( rprop.solution().point ); //copy best hyperparameters to svm trainer
146 
147  if ( verbose ) {
148  std::cout << " Copied rprop.solution().point = " << rprop.solution().point << std::endl;
149  std::cout << " into trainer_v2.parameterVector(), now = " << trainer_v2.parameterVector() << std::endl << std::endl << std::endl;
150  }
151 
152  trainer_v2.train( svm_v2, train );
153 
154  // evaluate the final trained classifier on training and test set
156  Data<unsigned int> output_v2; //real-valued output
157  output_v2 = svm_v2( train.inputs() );
158  train_error_v2 = loss_v2.eval( train.labels(), output_v2 );
159  output_v2 = svm_v2( test.inputs() );
160  test_error_v2 = loss_v2.eval( test.labels(), output_v2 );
161  if ( verbose ) {
162  std::cout << " training error via possibility 2: " << train_error_v2 << std::endl;
163  std::cout << " test error via possibility 2: " << test_error_v2 << std::endl << std::endl << std::endl;
164  std::cout << std::endl << "That's all folks - we are done!" << std::endl;
165  }
166  // END POSSIBILITY TWO OF HYPERPARAMETER COPY
167 
168  // copy the best parameters, as well as performance values into averaging vector:
169  RealVector final_params(total_dim+3);
170  final_params(total_dim) = C_reg;
171  for ( unsigned int i=0; i<total_dim; i++ )
172  final_params(i) = rprop.solution().point(i)*rprop.solution().point(i);
173  final_params(total_dim+1) = train_error_v1;
174  final_params(total_dim+2) = test_error_v1;
175  return final_params;
176 
177 }
178 
179 
180 int main() {
181 
182  // run one trial with output
183  run_one_trial( true);
184  std::cout << "\nNOW REPEAT WITH 100 TRIALS: now we do the exact same thing multiple times in a row, and note the average kernel weights. Please wait." << std::endl << std::endl;
185 
186  // run several trials without output, and average the results
187  unsigned int num_trials = 100;
188  Data<RealVector> many_results(num_trials,RealVector(total_dim+3));//each row is one run of resulting hyperparameters
189  for ( unsigned int i=0; i<num_trials; i++ ) {
190  many_results.element(i) = run_one_trial(false);
191  std::cout << "." << std::flush;
192  }
193  std::cout << "\n" << std::endl;
194 
195  RealVector overall_mean, overall_variance;
196  meanvar( many_results, overall_mean, overall_variance );
197  for ( unsigned int i=0; i<total_dim+1; i++ ) {
198  std::cout << "avg-param(" << i << ") = " << overall_mean(i) << " +- "<< overall_variance(i) << std::endl;
199  }
200  std::cout << std::endl << "avg-error-train = " << overall_mean(total_dim+1) << " +- "<< overall_variance(total_dim+1) << std::endl;
201  std::cout << "avg-error-test = " << overall_mean(total_dim+2) << " +- "<< overall_variance(total_dim+2) << std::endl;
202 
203 }