FFNet.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief Implements a Feef-Forward multilayer perceptron
5  *
6  *
7  *
8  * \author O. Krause
9  * \date 2010-2014
10  *
11  *
12  * \par Copyright 1995-2017 Shark Development Team
13  *
14  * <BR><HR>
15  * This file is part of Shark.
16  * <http://shark-ml.org/>
17  *
18  * Shark is free software: you can redistribute it and/or modify
19  * it under the terms of the GNU Lesser General Public License as published
20  * by the Free Software Foundation, either version 3 of the License, or
21  * (at your option) any later version.
22  *
23  * Shark is distributed in the hope that it will be useful,
24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26  * GNU Lesser General Public License for more details.
27  *
28  * You should have received a copy of the GNU Lesser General Public License
29  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
30  *
31  */
32 #ifndef SHARK_MODELS_FFNET_H
33 #define SHARK_MODELS_FFNET_H
34 
36 #include <shark/Models/Neurons.h>
37 #include <boost/serialization/vector.hpp>
38 
39 namespace shark{
40 
43  Normal, //< Layerwise connectivity without shortcuts
44  InputOutputShortcut, //< Normal with additional shortcuts from input to output neuron
45  Full //< Every layer is fully connected to all neurons in the lower layer
46  };
47 };
48 
49 //! \brief Offers the functions to create and to work with a feed-forward network.
50 //!
51 //! A feed forward network consists of several layers. every layer consists of a linear
52 //! function with optional bias whose response is modified by a (nonlinear) activation function.
53 //! starting from the input layer, the output of every layer is the input of the next.
54 //! The two template arguments govern the activation functions of the network.
55 //! The activation functions are typically sigmoidal.
56 //! All hidden layers share one activation function, while the output layer can be chosen to use
57 //! a different one, for example to allow the last output to be unbounded, in which case a
58 //! linear output function is used.
59 //! It is not possible to use arbitrary activation functions but Neurons following in the structure
60 //! in Models/Neurons.h Especially it holds that the derivative of the activation function
61 //! must have the form f'(x) = g(f(x)).
62 //!
63 //! This network class allows for several different topologies of structure. The layer-wise structure
64 //! outlined above is the default one, but the network also allows for shortcuts. most typically
65 //! an input-output shortcut is used, that is a shortcut that connects the input neurons directly
66 //! with the output using linear weights. But also a fully connected structure is possible, where
67 //! every layer is fed as input to every successive layer instead of only the next one.
68 template<class HiddenNeuron,class OutputNeuron>
69 class FFNet :public AbstractModel<RealVector,RealVector>
70 {
71  struct InternalState: public State{
72  //! \brief Used to store the current results of the activation
73  //! function for all neurons for the last batch of patterns \f$x\f$.
74  //!
75  //! There is one value for input+hidden+output units for every element of the batch.
76  //! For every value, the following holds:
77  //! Given a network with \f$M\f$ neurons, including
78  //! \f$c\f$ input and \f$n\f$ output neurons the single
79  //! values for \f$z\f$ are given as:
80  //! <ul>
81  //! <li>\f$z_i = x_i,\ \mbox{for\ } 0 \leq i < c\f$</li>
82  //! <li>\f$z_i = g_{hidden}(x),\ \mbox{for\ } c \leq i < M - n\f$</li>
83  //! <li>\f$z_i = y_{i-M+n} = g_{output}(x),\ \mbox{for\ } M - n \leq
84  //! i < M\f$</li>
85  //! </ul>
86  RealMatrix responses;
87 
88  void resize(std::size_t neurons, std::size_t patterns){
89  responses.resize(neurons,patterns);
90  }
91  };
92 
93 
94 public:
95 
96  //! Creates an empty feed-forward network. After the constructor is called,
97  //! one version of the #setStructure methods needs to be called
98  //! to define the network topology.
100  :m_numberOfNeurons(0),m_inputNeurons(0),m_outputNeurons(0){
101  m_features|=HAS_FIRST_PARAMETER_DERIVATIVE;
102  m_features|=HAS_FIRST_INPUT_DERIVATIVE;
103  }
104 
105  //! \brief From INameable: return the class name.
106  std::string name() const
107  { return "FFNet"; }
108 
109  //! \brief Number of input neurons.
110  std::size_t inputSize()const{
111  return m_inputNeurons;
112  }
113  //! \brief Number of output neurons.
114  std::size_t outputSize()const{
115  return m_outputNeurons;
116  }
117  //! \brief Total number of neurons, that is inputs+hidden+outputs.
118  std::size_t numberOfNeurons()const{
119  return m_numberOfNeurons;
120  }
121  //! \brief Total number of hidden neurons.
122  std::size_t numberOfHiddenNeurons()const{
123  return numberOfNeurons() - inputSize() -outputSize();
124  }
125 
126  //! \brief Returns the matrices for every layer used by eval.
127  std::vector<RealMatrix> const& layerMatrices()const{
128  return m_layerMatrix;
129  }
130 
131  //! \brief Returns the weight matrix of the i-th layer.
132  RealMatrix const& layerMatrix(std::size_t layer)const{
133  return m_layerMatrix[layer];
134  }
135 
136  void setLayer(std::size_t layerNumber, RealMatrix const& m, RealVector const& bias){
137  SIZE_CHECK(m.size1() == bias.size());
138  SIZE_CHECK(m.size1() == m_layerMatrix[layerNumber].size1());
139  SIZE_CHECK(m.size2() == m_layerMatrix[layerNumber].size2());
140  m_layerMatrix[layerNumber] = m;
141  std::size_t start = 0;
142  for(std::size_t i = 0; i != layerNumber; ++i){
143  start += m_layerMatrix[i].size1();
144  }
145  noalias(subrange(m_bias,start,start+bias.size())) = bias;
146  //set backprop matrices
147  setParameterVector(parameterVector());
148  }
149 
150  //! \brief Returns the matrices for every layer used by backpropagation.
151  std::vector<RealMatrix> const& backpropMatrices()const{
152  return m_backpropMatrix;
153  }
154 
155  //! \brief Returns the direct shortcuts between input and output neurons.
156  //!
157  //! This does not necessarily exist.
158  RealMatrix const& inputOutputShortcut() const{
159  return m_inputOutputShortcut;
160  }
161 
162  /// \brief Returns the activation function of the hidden units.
163  HiddenNeuron const& hiddenActivationFunction()const{
164  return m_hiddenNeuron;
165  }
166  /// \brief Returns the activation function of the output units.
167  OutputNeuron const& outputActivationFunction()const{
168  return m_outputNeuron;
169  }
170 
171  /// \brief Returns the activation function of the hidden units.
172  HiddenNeuron& hiddenActivationFunction(){
173  return m_hiddenNeuron;
174  }
175  /// \brief Returns the activation function of the output units.
176  OutputNeuron& outputActivationFunction(){
177  return m_outputNeuron;
178  }
179 
180  //! \brief Returns the bias values for hidden and output units.
181  //!
182  //! This is either empty or a vector of size numberOfNeurons()-inputSize().
183  //! the first entry is the value of the first hidden unit while the last outputSize() units
184  //! are the values of the output units.
185  const RealVector& bias()const{
186  return m_bias;
187  }
188 
189  ///\brief Returns the portion of the bias vector of the i-th layer.
190  RealVector bias(std::size_t layer)const{
191  std::size_t start = 0;
192  for(std::size_t i = 0; i != layer; ++i){
193  start +=layerMatrices()[i].size1();
194  }
195  return subrange(m_bias,start,start+layerMatrices()[layer].size1());
196  }
197 
198  //! \brief Returns the total number of parameters of the network.
199  std::size_t numberOfParameters()const{
200  std::size_t numParams = m_inputOutputShortcut.size1()*m_inputOutputShortcut.size2();
201  numParams += bias().size();
202  for(std::size_t i = 0; i != layerMatrices().size(); ++i){
203  numParams += layerMatrices()[i].size1()*layerMatrices()[i].size2();
204  }
205  return numParams;
206  }
207 
208  //! returns the vector of used parameters inside the weight matrix
209  RealVector parameterVector() const{
210  RealVector parameters(numberOfParameters());
211  std::size_t pos = 0;
212  for(auto const& mat: m_layerMatrix){
213  auto vec = to_vector(mat);
214  noalias(subrange(parameters,pos,pos+vec.size())) = vec;
215  pos += vec.size();
216  }
217  noalias(subrange(parameters,pos,parameters.size())) = m_bias | to_vector(m_inputOutputShortcut);
218  return parameters;
219  }
220  //! uses the values inside the parameter vector to set the used values inside the weight matrix
221  void setParameterVector(RealVector const& newParameters){
222  //set the forward propagation weights
223  std::size_t pos = 0;
224  for(auto& mat: m_layerMatrix){
225  auto vec = to_vector(mat);
226 
227  noalias(vec) = subrange(newParameters,pos,pos+vec.size());
228  pos += vec.size();
229  }
230  noalias(m_bias) = subrange(newParameters,pos, pos + m_bias.size());
231  noalias(to_vector(m_inputOutputShortcut)) = subrange(newParameters,pos + m_bias.size(), newParameters.size());
232 
233  //we also have to update the backpropagation weights
234  //this is more or less an inversion. for all connections of a neuron i with a neuron j, i->j
235  //the backpropagation matrix has an entry j->i.
236 
237  // we start with all neurons in layer i, looking at all layers j > i and checking whether
238  // they are connected, in this case we transpose the part of the matrix which is connecting
239  // layer j with layer i and copying it into the backprop matrix.
240  // we assume here, that either all neurons in layer j are connected to all neurons in layer i
241  // or that there are no connections at all between the layers.
242  std::size_t layeriStart = 0;
243  for(std::size_t layeri = 0; layeri != m_layerMatrix.size(); ++layeri){
244  std::size_t columni = 0;
245  std::size_t neuronsi = inputSize();
246  if(layeri > 0)
247  neuronsi = m_layerMatrix[layeri-1].size1();
248 
249  std::size_t layerjStart = layeriStart + neuronsi;
250  for(std::size_t layerj = layeri; layerj != m_layerMatrix.size(); ++layerj){
251  std::size_t neuronsj = m_layerMatrix[layerj].size1();
252  //only process, if layer j has connections with layer i
253  if(layerjStart-m_layerMatrix[layerj].size2() <= layeriStart){
254 
255  //Start of the weight columns to layer i in layer j.
256  //parantheses are important to protect against underflow
257  std::size_t weightStartj = layeriStart -(layerjStart - m_layerMatrix[layerj].size2());
258  noalias(columns(m_backpropMatrix[layeri],columni,columni+neuronsj))
259  = trans(columns(m_layerMatrix[layerj],weightStartj,weightStartj+neuronsi));
260  }
261  columni += neuronsj;
262  layerjStart += neuronsj;
263  }
264  layeriStart += neuronsi;
265  }
266  }
267 
268  //! \brief Returns the output of all neurons after the last call of eval
269  //!
270  //! \param state last result of eval
271  //! \return Output value of the neurons.
272  RealMatrix const& neuronResponses(State const& state)const{
273  InternalState const& s = state.toState<InternalState>();
274  return s.responses;
275  }
276 
277  boost::shared_ptr<State> createState()const{
278  return boost::shared_ptr<State>(new InternalState());
279  }
280 
281  ///\brief Returns the response of the i-th layer given the input of that layer.
282  ///
283  /// this is useful if only a portion of the network needs to be evaluated
284  /// be aware that this only works without shortcuts in the network
285  void evalLayer(std::size_t layer,RealMatrix const& patterns,RealMatrix& outputs)const{
286  std::size_t numPatterns = patterns.size1();
287  std::size_t numOutputs = m_layerMatrix[layer].size1();
288  outputs.resize(numPatterns,numOutputs);
289  outputs.clear();
290 
291  //calculate activation. first compute the linear part and the optional bias and then apply
292  // the non-linearity
293  noalias(outputs) = prod(patterns,trans(layerMatrix(layer)));
294  if(!bias().empty()){
295  noalias(outputs) += repeat(bias(layer),numPatterns);
296  }
297  // if this is the last layer, use output neuron response
298  if(layer < m_layerMatrix.size()-1) {
299  noalias(outputs) = m_hiddenNeuron(outputs);
300  }
301  else {
302  noalias(outputs) = m_outputNeuron(outputs);
303  }
304  }
305 
306  ///\brief Returns the response of the i-th layer given the input of that layer.
307  ///
308  /// this is useful if only a portion of the network needs to be evaluated
309  /// be aware that this only works without shortcuts in the network
310  Data<RealVector> evalLayer(std::size_t layer, Data<RealVector> const& patterns)const{
311  int batches = (int) patterns.numberOfBatches();
312  Data<RealVector> result(batches);
313  SHARK_PARALLEL_FOR(int i = 0; i < batches; ++i){
314  evalLayer(layer,patterns.batch(i),result.batch(i));
315  }
316  return result;
317  }
318 
319  void eval(RealMatrix const& patterns,RealMatrix& output, State& state)const{
320  InternalState& s = state.toState<InternalState>();
321  std::size_t numPatterns = patterns.size1();
322  //initialize the input layer using the patterns.
323  s.resize(numberOfNeurons(),numPatterns);
324  s.responses.clear();
325  noalias(rows(s.responses,0,m_inputNeurons)) = trans(patterns);
326  std::size_t beginNeuron = m_inputNeurons;
327 
328  for(std::size_t layer = 0; layer != m_layerMatrix.size();++layer){
329  RealMatrix const& weights = m_layerMatrix[layer];
330  //number of rows of the layer is also the number of neurons
331  std::size_t endNeuron = beginNeuron + weights.size1();
332  //some subranges of vectors
333  //inputs are the last n neurons, where n is the number of columns of the matrix
334  auto const input = rows(s.responses,beginNeuron - weights.size2(),beginNeuron);
335  //the neurons responses
336  auto responses = rows(s.responses,beginNeuron,endNeuron);
337 
338  //calculate activation. first compute the linear part and the optional bias and then apply
339  // the non-linearity
340  noalias(responses) = prod(weights,input);
341  if(!bias().empty()){
342  //the bias of the layer is shifted as input units can not have bias.
343  auto bias = subrange(m_bias,beginNeuron-inputSize(),endNeuron-inputSize());
344  noalias(responses) += trans(repeat(bias,numPatterns));
345  }
346  SHARK_CRITICAL_REGION{//beware Dropout Neurons!
347  // if this is the last layer, use output neuron response instead
348  if(layer < m_layerMatrix.size()-1) {
349  noalias(responses) = m_hiddenNeuron(responses);
350  }
351  else {
352  //add shortcuts if necessary
353  if(m_inputOutputShortcut.size1() != 0){
354  noalias(responses) += prod(m_inputOutputShortcut,trans(patterns));
355  }
356  noalias(responses) = m_outputNeuron(responses);
357  }
358  }
359  //go to the next layer
360  beginNeuron = endNeuron;
361  }
362  //Sanity check
363  SIZE_CHECK(beginNeuron == m_numberOfNeurons);
364 
365  //copy output layer into output
366  output.resize(numPatterns,m_outputNeurons);
367  noalias(output) = trans(rows(s.responses,m_numberOfNeurons-outputSize(),m_numberOfNeurons));
368  }
370 
372  BatchInputType const& patterns, RealMatrix const& coefficients, State const& state, RealVector& gradient
373  )const{
374  SIZE_CHECK(coefficients.size2() == m_outputNeurons);
375  SIZE_CHECK(coefficients.size1() == patterns.size1());
376  std::size_t numPatterns=patterns.size1();
377 
378  //initialize delta using coefficients and clear the rest. also don't compute the delta for
379  // the input neurons as they are not needed.
380  RealMatrix delta(numberOfNeurons(),numPatterns,0.0);
381  auto outputDelta = rows(delta,delta.size1()-outputSize(),delta.size1());
382  noalias(outputDelta) = trans(coefficients);
383 
384  computeDelta(delta,state,false);
385  computeParameterDerivative(delta,state,gradient);
386 
387  }
388 
390  BatchInputType const& patterns, RealMatrix const& coefficients, State const& state, BatchInputType& inputDerivative
391  )const{
392  SIZE_CHECK(coefficients.size2() == m_outputNeurons);
393  SIZE_CHECK(coefficients.size1() == patterns.size1());
394  std::size_t numPatterns=patterns.size1();
395 
396  //initialize delta using coefficients and clear the rest
397  //we compute the full set of delta values here. the delta values of the inputs are the inputDerivative
398  RealMatrix delta(numberOfNeurons(),numPatterns,0.0);
399  auto outputDelta = rows(delta,delta.size1()-outputSize(),delta.size1());
400  noalias(outputDelta) = trans(coefficients);
401 
402  computeDelta(delta,state,true);
403  inputDerivative.resize(numPatterns,inputSize());
404  noalias(inputDerivative) = trans(rows(delta,0,inputSize()));
405  }
406 
407  virtual void weightedDerivatives(
408  BatchInputType const & patterns,
409  BatchOutputType const & coefficients,
410  State const& state,
411  RealVector& parameterDerivative,
412  BatchInputType& inputDerivative
413  )const{
414  SIZE_CHECK(coefficients.size2() == m_outputNeurons);
415  SIZE_CHECK(coefficients.size1() == patterns.size1());
416  std::size_t numPatterns = patterns.size1();
417 
418 
419  //compute full delta and thus the input derivative
420  RealMatrix delta(numberOfNeurons(),numPatterns,0.0);
421  auto outputDelta = rows(delta,delta.size1()-outputSize(),delta.size1());
422  noalias(outputDelta) = trans(coefficients);
423 
424  computeDelta(delta,state,true);
425  inputDerivative.resize(numPatterns,inputSize());
426  noalias(inputDerivative) = trans(rows(delta,0,inputSize()));
427 
428  //reuse delta to compute the parameter derivative
429  computeParameterDerivative(delta,state,parameterDerivative);
430  }
431 
432  //! \brief Calculates the derivative for the special case, when error terms for all neurons of the network exist.
433  //!
434  //! This is useful when the hidden neurons need to meet additional requirements.
435  //! The value of delta is changed during computation and holds the results of the backpropagation steps.
436  //! The format is such that the rows of delta are the neurons and the columns the patterns.
438  RealMatrix const& patterns, RealMatrix& delta, State const& state, RealVector& gradient
439  )const{
440  InternalState const& s = state.toState<InternalState>();
441  SIZE_CHECK(delta.size1() == m_numberOfNeurons);
442  SIZE_CHECK(delta.size2() == patterns.size1());
443  SIZE_CHECK(s.responses.size2() == patterns.size1());
444 
445  computeDelta(delta,state,false);
446  //now compute the parameter derivative from the delta values
447  computeParameterDerivative(delta,state,gradient);
448  }
449 
450  //! \brief Creates a connection matrix for a network.
451  //!
452  //! Automatically creates a network with several layers, with
453  //! the numbers of neurons for each layer defined by \em layers.
454  //! layers must be at least size 2, which will result in a network with no hidden layers.
455  //! the first and last values correspond to the number of inputs and outputs respectively.
456  //!
457  //! The network supports three different tpes of connection models:
458  //! FFNetStructures::Normal corresponds to a layerwise connection between consecutive
459  //! layers. FFNetStructures::InputOutputShortcut additionally adds a shortcut between
460  //! input and output neurons. FFNetStructures::Full connects every layer to every following
461  //! layer, this includes also the shortcuts for input and output neurons. Additionally
462  //! a bias term an be used.
463  //!
464  //! While Normal and Full only use the layer matrices, inputOutputShortcut also uses
465  //! the corresponding matrix variable (be aware that in the case of only one hidden layer,
466  //! the shortcut between input and output leads to the same network as the Full - in that case
467  //! the Full topology is chosen for optimization reasons)
468  //!
469  //! \param layers contains the numbers of neurons for each layer of the network.
470  //! \param connectivity type of connection used between layers
471  //! \param biasNeuron if set to \em true, connections from
472  //! all neurons (except the input neurons)
473  //! to the bias will be set.
475  std::vector<size_t> const& layers,
477  bool biasNeuron = true
478  ){
479  SIZE_CHECK(layers.size() >= 2);
480  m_layerMatrix.resize(layers.size()-1);//we don't model the input layer
481  m_backpropMatrix.resize(layers.size()-1);//we don't model the output layer
482 
483  //small optimization for networks with only 3 layers
484  //in this case, we don't need an explicit shortcut as we can integrate it into
485  //the big matrices
486  if(connectivity == FFNetStructures::InputOutputShortcut && layers.size() ==3)
487  connectivity = FFNetStructures::Full;
488 
489 
490  m_inputNeurons = layers.front();
491  m_outputNeurons = layers.back();
492  m_numberOfNeurons = 0;
493  for(std::size_t i = 0; i != layers.size(); ++i){
494  m_numberOfNeurons += layers[i];
495  }
496  if(biasNeuron){
497  m_bias.resize(m_numberOfNeurons - m_inputNeurons);
498  }
499 
500  if(connectivity == FFNetStructures::Full){
501  //connect to all previous layers.
502  std::size_t numNeurons = layers[0];
503  for(std::size_t i = 0; i != m_layerMatrix.size(); ++i){
504  m_layerMatrix[i].resize(layers[i+1],numNeurons);
505  m_backpropMatrix[i].resize(layers[i],m_numberOfNeurons-numNeurons);
506  numNeurons += layers[i+1];
507 
508  }
509  m_inputOutputShortcut.resize(0,0);
510  }else{
511  //only connect with the previous layer
512  for(std::size_t i = 0; i != m_layerMatrix.size(); ++i){
513  m_layerMatrix[i].resize(layers[i+1],layers[i]);
514  m_backpropMatrix[i].resize(layers[i],layers[i+1]);
515  }
516 
517  //create a shortcut from input to output when desired
518  if(connectivity == FFNetStructures::InputOutputShortcut){
519  m_inputOutputShortcut.resize(m_outputNeurons,m_inputNeurons);
520  }
521  }
522  }
523 
524  //! \brief Creates a connection matrix for a network with a
525  //! single hidden layer
526  //!
527  //! Automatically creates a network with
528  //! three different layers: An input layer with \em in input neurons,
529  //! an output layer with \em out output neurons and one hidden layer
530  //! with \em hidden neurons, respectively.
531  //!
532  //! \param in number of input neurons.
533  //! \param hidden number of neurons of the second hidden layer.
534  //! \param out number of output neurons.
535  //! \param connectivity Type of connectivity between the layers
536  //! \param bias if set to \em true, connections from
537  //! all neurons (except the input neurons)
538  //! to the bias will be set.
540  std::size_t in,
541  std::size_t hidden,
542  std::size_t out,
544  bool bias = true
545  ){
546  std::vector<size_t> layer(3);
547  layer[0] = in;
548  layer[1] = hidden;
549  layer[2] = out;
550  setStructure(layer, connectivity, bias);
551  }
552 
553  //! \brief Creates a connection matrix for a network with two
554  //! hidden layers.
555  //!
556  //! Automatically creates a network with
557  //! four different layers: An input layer with \em in input neurons,
558  //! an output layer with \em out output neurons and two hidden layers
559  //! with \em hidden1 and \em hidden2 hidden neurons, respectively.
560  //!
561  //! \param in number of input neurons.
562  //! \param hidden1 number of neurons of the first hidden layer.
563  //! \param hidden2 number of neurons of the second hidden layer.
564  //! \param out number of output neurons.
565  //! \param connectivity Type of connectivity between the layers
566  //! \param bias if set to \em true, connections from
567  //! all neurons (except the input neurons)
568  //! to the bias will be set.
570  std::size_t in,
571  std::size_t hidden1,
572  std::size_t hidden2,
573  std::size_t out,
575  bool bias = true
576  ){
577  std::vector<size_t> layer(4);
578  layer[0] = in;
579  layer[1] = hidden1;
580  layer[2] = hidden2;
581  layer[3] = out;
582  setStructure(layer, connectivity, bias);
583  }
584 
585  //! From ISerializable, reads a model from an archive
586  void read( InArchive & archive ){
587  archive>>m_inputNeurons;
588  archive>>m_outputNeurons;
589  archive>>m_numberOfNeurons;
590  archive>>m_layerMatrix;
591  archive>>m_backpropMatrix;
592  archive>>m_inputOutputShortcut;
593  archive>>m_bias;
594  }
595 
596  //! From ISerializable, writes a model to an archive
597  void write( OutArchive & archive ) const{
598  archive<<m_inputNeurons;
599  archive<<m_outputNeurons;
600  archive<<m_numberOfNeurons;
601  archive<<m_layerMatrix;
602  archive<<m_backpropMatrix;
603  archive<<m_inputOutputShortcut;
604  archive<<m_bias;
605  }
606 
607 
608 private:
609 
610  void computeDelta(
611  RealMatrix& delta, State const& state, bool computeInputDelta
612  )const{
613  SIZE_CHECK(delta.size1() == numberOfNeurons());
614  InternalState const& s = state.toState<InternalState>();
615 
616  //initialize output neurons using coefficients
617  auto outputDelta = rows(delta,delta.size1()-outputSize(),delta.size1());
618  auto outputResponse = rows(s.responses,delta.size1()-outputSize(),delta.size1());
619  noalias(outputDelta) *= m_outputNeuron.derivative(outputResponse);
620 
621  //iterate backwards using the backprop matrix and propagate the errors to get the needed delta values
622  //we stop until we have filled all delta values. Thus we might not necessarily compute all layers.
623 
624  //last neuron of the current layer that we need to compute
625  //we don't need (or can not) compute the values of the output neurons as they are given from the outside
626  std::size_t endNeuron = delta.size1()-outputSize();
627  std::size_t layer = m_backpropMatrix.size()-1;
628  std::size_t endIndex = computeInputDelta? 0: inputSize();
629  while(endNeuron > endIndex){
630 
631  RealMatrix const& weights = m_backpropMatrix[layer];
632  std::size_t beginNeuron = endNeuron - weights.size1();//first neuron of the current layer
633  //get the delta and response values of this layer
634  auto layerDelta = rows(delta,beginNeuron,endNeuron);
635  auto layerDeltaInput = rows(delta,endNeuron,endNeuron+weights.size2());
636  auto layerResponse = rows(s.responses,beginNeuron,endNeuron);
637 
638  noalias(layerDelta) += prod(weights,layerDeltaInput);//add the values to the maybe non-empty delta part
639  if(layer != 0){
640  noalias(layerDelta) *= m_hiddenNeuron.derivative(layerResponse);
641  }
642  //go a layer backwards
643  endNeuron=beginNeuron;
644  --layer;
645  }
646 
647  //add the shortcut deltas if necessary
648  if(inputOutputShortcut().size1() != 0)
649  noalias(rows(delta,0,inputSize())) += prod(trans(inputOutputShortcut()),outputDelta);
650  }
651 
652  void computeParameterDerivative(RealMatrix const& delta, State const& state, RealVector& gradient)const{
653  SIZE_CHECK(delta.size1() == numberOfNeurons());
654  InternalState const& s = state.toState<InternalState>();
655  // calculate error gradient
656  //todo: take network structure into account to prevent checking all possible weights...
657  gradient.resize(numberOfParameters());
658  std::size_t pos = 0;
659  std::size_t layerStart = inputSize();
660  for(std::size_t layer = 0; layer != layerMatrices().size(); ++layer){
661  //obtain input, delta and gradients for the current layer
662  std::size_t layerRows = layerMatrices()[layer].size1();
663  std::size_t layerColumns = layerMatrices()[layer].size2();
664  std::size_t params = layerRows*layerColumns;
665  auto gradMatrix = to_matrix(subrange(gradient,pos,pos+params),layerRows,layerColumns);
666  auto deltaLayer = rows(delta,layerStart,layerStart+layerRows);
667  auto inputLayer = rows(s.responses,layerStart-layerColumns,layerStart);
668  noalias(gradMatrix) = prod(deltaLayer, trans(inputLayer));
669 
670  pos += params;
671  layerStart += layerRows;
672  }
673  //check whether we need the bias derivative
674  if(!bias().empty()){
675  //calculate bias derivative
676  for (std::size_t neuron = m_inputNeurons; neuron < m_numberOfNeurons; neuron++){
677  gradient(pos) = sum(row(delta,neuron));
678  pos++;
679  }
680  }
681  //compute shortcut derivative
682  if(inputOutputShortcut().size1() != 0){
683  std::size_t params = inputSize()*outputSize();
684  auto gradMatrix = to_matrix(subrange(gradient,pos,pos+params),outputSize(),inputSize());
685  auto deltaLayer = rows(delta,delta.size1()-outputSize(),delta.size1());
686  auto inputLayer = rows(s.responses,0,inputSize());
687  noalias(gradMatrix) = prod(deltaLayer, trans(inputLayer));
688  }
689 
690  }
691 
692 
693  //! \brief Number of all network neurons.
694  //!
695  //! This is the total number of neurons in the network, i.e.
696  //! input, hidden and output neurons.
697  std::size_t m_numberOfNeurons;
698  std::size_t m_inputNeurons;
699  std::size_t m_outputNeurons;
700 
701  //! \brief represents the connection matrix using a layered structure for forward propagation
702  //!
703  //! a layer is made of neurons with consecutive indices which are not
704  //! connected with each other. In other words, if there exists a k i<k<j such
705  //! that C(i,k) = 1 or C(k,j) = 1 or C(j,i) = 1 than the neurons i,j are not in the same layer.
706  //! This is the forward view, meaning that the layers holds the weights which are used to calculate
707  //! the activation of the neurons of the layer.
708  std::vector<RealMatrix> m_layerMatrix;
709 
710  //! \brief optional matrix directly connecting input to output
711  //!
712  //! This is only filled when the network has an input-output shortcut but not a full layer connection.
713  RealMatrix m_inputOutputShortcut;
714 
715  //!\brief represents the backwards view of the network as layered structure.
716  //!
717  //! This is the backward view of the Network which is used for the backpropagation step. So every
718  //! Matrix contains the weights of the neurons which are activated by the layer.
719  std::vector<RealMatrix> m_backpropMatrix;
720 
721  //! bias weights of the neurons
722  RealVector m_bias;
723 
724  //!Type of hidden neuron. See Models/Neurons.h for a few choices
725  HiddenNeuron m_hiddenNeuron;
726  //! Type of output neuron. See Models/Neurons.h for a few choices
727  OutputNeuron m_outputNeuron;
728 };
729 
730 
731 }
732 #endif