DeepNetworkTrainingRBM.cpp
Go to the documentation of this file.
1 //noisy AutoencoderModel model and deep network
2 #include <shark/Models/LinearModel.h>//single dense layer
3 #include <shark/Models/ConcatenatedModel.h>//for stacking layers and concatenating noise model
4 #include <shark/Unsupervised/RBM/BinaryRBM.h> // model for unsupervised pre-training
5 
6 //training the model
7 #include <shark/ObjectiveFunctions/ErrorFunction.h>//the error function performing the regularisation of the hidden neurons
8 #include <shark/ObjectiveFunctions/Loss/SquaredLoss.h> // squared loss used for unsupervised pre-training
9 #include <shark/ObjectiveFunctions/Loss/CrossEntropy.h> // loss used for supervised training
10 #include <shark/ObjectiveFunctions/Loss/ZeroOneLoss.h> // loss used for evaluation of performance
11 #include <shark/ObjectiveFunctions/Regularizer.h> //L1 and L2 regularisation
12 #include <shark/Algorithms/GradientDescent/SteepestDescent.h> //optimizer: simple gradient descent.
13 #include <shark/Algorithms/GradientDescent/Rprop.h> //optimizer for autoencoders
14 
15 using namespace std;
16 using namespace shark;
17 
18 //our artificial problem
20  std::vector<RealVector> data(320,RealVector(16));
21  std::vector<unsigned int> label(320);
22  RealVector line(4);
23  for(std::size_t k = 0; k != 10; ++k){
24  for(size_t x=0; x != 16; x++) {
25  for(size_t j=0; j != 4; j++) {
26  bool val = (x & (1<<j)) > 0;
27  line(j) = val;
29  line(j) = !val;
30  }
31 
32  for(int i=0; i != 4; i++) {
33  subrange(data[x+k*16],i*4 ,i*4 + 4) = line;
34  }
35  for(int i=0; i != 4; i++) {
36  for(int l=0; l<4; l++) {
37  data[x+k*16+160](l*4 + i) = line(l);
38  }
39  }
40  label[x+k*16] = 1;
41  label[x+k*16+160] = 0;
42  }
43  }
44  return createLabeledDataFromRange(data,label);
45 }
46 
47 //training of an RBM
49  UnlabeledData<RealVector> const& data,//the data to train with
50  std::size_t numHidden,//number of features in the AutoencoderModel
51  std::size_t iterations, //number of iterations to optimize
52  double regularisation,//strength of the regularisation
53  double learningRate // learning rate of steepest descent
54 ){
55  //create rbm with simple binary units using the global random number generator
56  std::size_t inputs = dataDimension(data);
58  rbm.setStructure(inputs,numHidden);
59  initRandomUniform(rbm,-0.1*std::sqrt(1.0/inputs),0.1*std::sqrt(1.0/inputs));//initialize weights uniformly
60 
61  //create derivative to optimize the rbm
62  //we want a simple vanilla CD-1.
63  BinaryCD estimator(&rbm);
64  TwoNormRegularizer regularizer;
65  //0.0 is the regularization strength. 0.0 means no regularization. choose as >= 0.0
66  estimator.setRegularizer(regularisation,&regularizer);
67  estimator.setK(1);//number of sampling steps
68  estimator.setData(data);//the data used for optimization
69 
70  //create and configure optimizer
71  SteepestDescent optimizer;
72  optimizer.setLearningRate(learningRate);//learning rate of the algorithm
73 
74  //now we train the rbm and evaluate the mean negative log-likelihood at the end
75  unsigned int numIterations = iterations;//iterations for training
76  estimator.init();
77  optimizer.init(estimator);
78  for(unsigned int iteration = 0; iteration != numIterations; ++iteration) {
79  optimizer.step(estimator);
80  }
81  rbm.setParameterVector(optimizer.solution().point);
82  return rbm;
83 }
84 
85 
86 int main()
87 {
88  //model parameters
89  std::size_t numHidden1 = 8;
90  std::size_t numHidden2 = 8;
91  //unsupervised hyper parameters
92  double unsupRegularisation = 0.001;
93  double unsupLearningRate = 0.1;
94  std::size_t unsupIterations = 10000;
95  //supervised hyper parameters
96  double regularisation = 0.0001;
97  std::size_t iterations = 200;
98 
99  //load data and split into training and test
101  data.shuffle();
102  LabeledData<RealVector,unsigned int> test = splitAtElement(data,static_cast<std::size_t>(0.5*data.numberOfElements()));
103 
104  //train the first hidden layer
105  std::cout<<"pre-training first layer"<<std::endl;
106  BinaryRBM rbm1 = trainRBM(
107  data.inputs(),numHidden1,
108  unsupRegularisation,unsupIterations, unsupLearningRate
109  );
110 
111  //compute the mapping onto features of the first hidden layer
112  rbm1.evaluationType(true,true);//we compute the direction visible->hidden and want the features and no samples
113  UnlabeledData<RealVector> intermediateData=rbm1(data.inputs());
114 
115  //train the next layer
116  std::cout<<"pre-training second layer"<<std::endl;
117  BinaryRBM rbm2 = trainRBM(
118  intermediateData,numHidden2,
119  unsupRegularisation,unsupIterations, unsupLearningRate
120  );
121 
122 
123  //build three layer neural network from the re-trained RBMs
126  LinearModel<RealVector> output(layer2.outputShape(),numberOfClasses(data));
127  initRandomNormal(output,0.01);
128  auto network = layer1 >> layer2 >> output;
129 
130  //create the supervised problem. Cross Entropy loss with one norm regularisation
131  CrossEntropy loss;
132  ErrorFunction error(data, &network, &loss);
133  OneNormRegularizer regularizer(error.numberOfVariables());
134  error.setRegularizer(regularisation,&regularizer);
135 
136  //optimize the model
137  std::cout<<"training supervised model"<<std::endl;
138  IRpropPlusFull optimizer;
139  error.init();
140  optimizer.init(error);
141  for(std::size_t i = 0; i != iterations; ++i){
142  optimizer.step(error);
143  std::cout<<i<<" "<<optimizer.solution().value<<std::endl;
144  }
145  network.setParameterVector(optimizer.solution().point);
146 
147  //evaluation
149  Data<RealVector> predictionTrain = network(data.inputs());
150  cout << "classification error,train: " << loss01.eval(data.labels(), predictionTrain) << endl;
151 
152  Data<RealVector> prediction = network(test.inputs());
153  cout << "classification error,test: " << loss01.eval(test.labels(), prediction) << endl;
154 
155 }