Autoencoder.h
Go to the documentation of this file.
1 /*!
2  * \brief Implements the autoencoder
3  *
4  * \author O. Krause
5  * \date 2010-2014
6  *
7  *
8  * \par Copyright 1995-2017 Shark Development Team
9  *
10  * <BR><HR>
11  * This file is part of Shark.
12  * <http://shark-ml.org/>
13  *
14  * Shark is free software: you can redistribute it and/or modify
15  * it under the terms of the GNU Lesser General Public License as published
16  * by the Free Software Foundation, either version 3 of the License, or
17  * (at your option) any later version.
18  *
19  * Shark is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  * GNU Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26  *
27  */
28 #ifndef SHARK_MODELS_AUTOENCODER_H
29 #define SHARK_MODELS_AUTOENCODER_H
30 
32 #include <shark/Models/Neurons.h>
33 #include <boost/serialization/vector.hpp>
34 
35 namespace shark{
36 
37 /// \brief implements the autoencoder
38 ///
39 /// The formula is
40 /// \f[ f(x) = \sigma_2(W^T\sigma_1(Wx+b_1)+b_2)\f]
41 /// Where \f$ W, W_2, b_1 \f$ and \f$b_2 \f$ are the weights and
42 /// \f$\sigma_1\f$ and \f$ \sigma_2\f$ are the activation functions for hidden and output units.
43 ///
44 /// see TiedAutoencoder for the tied weights version where \f$ W_2=W_1^T \f$.
45 template<class HiddenNeuron,class OutputNeuron>
46 class Autoencoder :public AbstractModel<RealVector,RealVector>
47 {
48  struct InternalState: public State{
49  RealMatrix hiddenResponses;
50  RealMatrix outputResponses;
51  };
52 
53 
54 public:
58  }
59 
60  //! \brief From INameable: return the class name.
61  std::string name() const{
62  return "Autoencoder";
63  }
64 
65  //! \brief Number of input neurons.
66  std::size_t inputSize()const{
67  return outputSize();
68  }
69  //! \brief Number of output neurons.
70  std::size_t outputSize()const{
71  return outputBias().size();
72  }
73 
74  //! \brief Total number of hidden neurons.
75  std::size_t numberOfHiddenNeurons()const{
76  return encoderMatrix().size1();
77  }
78 
79  /// \brief Returns the hidden bias weight vector.
80  RealVector const& hiddenBias()const{
81  return m_hiddenBias;
82  }
83 
84  /// \brief Returns the hidden bias weight vector.
85  RealVector& hiddenBias(){
86  return m_hiddenBias;
87  }
88 
89  /// \brief Returns the output bias weight vector.
90  RealVector const& outputBias()const{
91  return m_outputBias;
92  }
93  /// \brief Returns the output bias weight vector.
94  RealVector& outputBias(){
95  return m_outputBias;
96  }
97 
98  /// \brief Weight matrix for the direction input->hidden.
99  RealMatrix const& encoderMatrix()const{
100  return m_encoderMatrix;
101  }
102  /// \brief Weight matrix for the direction input->hidden.
103  RealMatrix& encoderMatrix(){
104  return m_encoderMatrix;
105  }
106  /// \brief Weight matrix for the direction hidden->output
107  ///
108  RealMatrix const& decoderMatrix()const{
109  return m_decoderMatrix;
110  }
111  /// \brief Weight matrix for the direction hidden->output
112  RealMatrix& decoderMatrix(){
113  return m_decoderMatrix;
114  }
115 
116  //! \brief Returns the total number of parameters of the network.
117  std::size_t numberOfParameters()const{
119  }
120 
121  //! returns the vector of used parameters inside the weight matrix
122  RealVector parameterVector() const{
123  return to_vector(m_encoderMatrix) | to_vector(m_decoderMatrix) | m_hiddenBias | m_outputBias;
124  }
125  //! uses the values inside the parametervector to set the used values inside the weight matrix
126  void setParameterVector(RealVector const& newParameters){
127  SIZE_CHECK(newParameters.size() == numberOfParameters());
128 
129  std::size_t endWeights1 = m_encoderMatrix.size1() * m_encoderMatrix.size2();
130  std::size_t endWeights2 = endWeights1 + m_decoderMatrix.size1() * m_decoderMatrix.size2();
131  std::size_t endBias = endWeights2 + m_hiddenBias.size();
132  noalias(to_vector(m_encoderMatrix)) = subrange(newParameters,0,endWeights1);
133  noalias(to_vector(m_decoderMatrix)) = subrange(newParameters,endWeights1,endWeights2);
134  noalias(m_hiddenBias) = subrange(newParameters,endWeights2,endBias);
135  noalias(m_outputBias) = subrange(newParameters,endBias,endBias+m_outputBias.size());
136  }
137 
138  //! \brief Returns the output of all neurons after the last call of eval
139  //!
140  //! \param state last result of eval
141  //! \return Output value of the neurons.
142  RealMatrix const& hiddenResponses(State const& state)const{
143  InternalState const& s = state.toState<InternalState>();
144  return s.hiddenResponses;
145  }
146 
147  /// \brief Returns the activation function of the hidden units.
148  HiddenNeuron const& hiddenActivationFunction()const{
149  return m_hiddenNeuron;
150  }
151  /// \brief Returns the activation function of the output units.
152  OutputNeuron const& outputActivationFunction()const{
153  return m_outputNeuron;
154  }
155 
156  /// \brief Returns the activation function of the hidden units.
157  HiddenNeuron& hiddenActivationFunction(){
158  return m_hiddenNeuron;
159  }
160  /// \brief Returns the activation function of the output units.
161  OutputNeuron& outputActivationFunction(){
162  return m_outputNeuron;
163  }
164 
165  boost::shared_ptr<State> createState()const{
166  return boost::shared_ptr<State>(new InternalState());
167  }
168 
169  void evalLayer(std::size_t layer,RealMatrix const& patterns,RealMatrix& outputs)const{
170  SIZE_CHECK(layer < 2);
171  std::size_t numPatterns = patterns.size1();
172 
173  if(layer == 0){//input->hidden
174  SIZE_CHECK(patterns.size2() == encoderMatrix().size2());
175  std::size_t numOutputs = encoderMatrix().size1();
176  outputs.resize(numPatterns,numOutputs);
177  outputs.clear();
178  noalias(outputs) = prod(patterns,trans(encoderMatrix())) + repeat(hiddenBias(),numPatterns);
179  noalias(outputs) = m_hiddenNeuron(outputs);
180  }
181  else{//hidden->output
182  SIZE_CHECK(patterns.size2() == decoderMatrix().size2());
183  std::size_t numOutputs = decoderMatrix().size1();
184  outputs.resize(numPatterns,numOutputs);
185  outputs.clear();
186  noalias(outputs) = prod(patterns,trans(decoderMatrix())) + repeat(outputBias(),numPatterns);
187  noalias(outputs) = m_outputNeuron(outputs);
188  }
189  }
190 
191  ///\brief Returns the response of the i-th layer given the input of that layer.
192  ///
193  /// this is usefull if only a portion of the network needs to be evaluated
194  /// be aware that this only works without shortcuts in the network
195  Data<RealVector> evalLayer(std::size_t layer, Data<RealVector> const& patterns)const{
196  SIZE_CHECK(layer < 2);
197  int batches = (int) patterns.numberOfBatches();
198  Data<RealVector> result(batches);
199  SHARK_PARALLEL_FOR(int i = 0; i < batches; ++i){
200  evalLayer(layer,patterns.batch(i),result.batch(i));
201  }
202  return result;
203  }
204 
205  Data<RealVector> encode(Data<RealVector> const& patterns)const{
206  return evalLayer(0,patterns);
207  }
208 
209  Data<RealVector> decode(Data<RealVector> const& patterns)const{
210  return evalLayer(1,patterns);
211  }
212 
213  template<class Label>
216  )const{
217  return LabeledData<RealVector,Label>(encode(data.inputs()),data.labels());
218  }
219 
220  template<class Label>
223  )const{
224  return LabeledData<RealVector,Label>(decode(data.inputs()),data.labels());
225  }
226 
227 
228  void eval(RealMatrix const& patterns,RealMatrix& output, State& state)const{
229  InternalState& s = state.toState<InternalState>();
230  evalLayer(0,patterns,s.hiddenResponses);//propagate input->hidden
231  evalLayer(1,s.hiddenResponses,s.outputResponses);//propagate hidden->output
232  output = s.outputResponses;
233  }
235 
237  BatchInputType const& patterns, RealMatrix const& coefficients, State const& state, RealVector& gradient
238  )const{
239  SIZE_CHECK(coefficients.size2() == outputSize());
240  SIZE_CHECK(coefficients.size1() == patterns.size1());
241 
242  RealMatrix outputDelta = coefficients;
243  RealMatrix hiddenDelta;
244  computeDelta(state,outputDelta,hiddenDelta);
245  computeParameterDerivative(patterns,outputDelta,hiddenDelta,state,gradient);
246  }
247 
249  BatchInputType const& patterns, RealMatrix const& coefficients, State const& state, BatchInputType& inputDerivative
250  )const{
251  SIZE_CHECK(coefficients.size2() == outputSize());
252  SIZE_CHECK(coefficients.size1() == patterns.size1());
253 
254  RealMatrix outputDelta = coefficients;
255  RealMatrix hiddenDelta;
256  computeDelta(state,outputDelta,hiddenDelta,inputDerivative);
257  }
258 
259  virtual void weightedDerivatives(
260  BatchInputType const & patterns,
261  BatchOutputType const & coefficients,
262  State const& state,
263  RealVector& parameterDerivative,
264  BatchInputType& inputDerivative
265  )const{
266  SIZE_CHECK(coefficients.size2() == outputSize());
267  SIZE_CHECK(coefficients.size1() == patterns.size1());
268 
269  RealMatrix outputDelta = coefficients;
270  RealMatrix hiddenDelta;
271  computeDelta(state,outputDelta,hiddenDelta,inputDerivative);
272  computeParameterDerivative(patterns,outputDelta,hiddenDelta,state,parameterDerivative);
273  }
274 
276  std::size_t in,std::size_t hidden
277  ){
278  m_encoderMatrix.resize(hidden,in);
279  m_decoderMatrix.resize(in,hidden);
280  m_hiddenBias.resize(hidden);
281  m_outputBias.resize(in);
282  }
283 
284  //! From ISerializable, reads a model from an archive
285  void read( InArchive & archive ){
286  archive>>m_encoderMatrix;
287  archive>>m_decoderMatrix;
288  archive>>m_hiddenBias;
289  archive>>m_outputBias;
290  }
291 
292  //! From ISerializable, writes a model to an archive
293  void write( OutArchive & archive ) const{
294  archive<<m_encoderMatrix;
295  archive<<m_decoderMatrix;
296  archive<<m_hiddenBias;
297  archive<<m_outputBias;
298  }
299 
300 
301 private:
302 
303  void computeDelta(
304  State const& state, RealMatrix& outputDelta, RealMatrix& hiddenDelta
305  )const{
306  InternalState const& s = state.toState<InternalState>();
307 
308  noalias(outputDelta) *= m_outputNeuron.derivative(s.outputResponses);
309  hiddenDelta.resize(outputDelta.size1(),numberOfHiddenNeurons());
310  noalias(hiddenDelta) = prod(outputDelta,decoderMatrix());
311  noalias(hiddenDelta) *= m_hiddenNeuron.derivative(s.hiddenResponses);
312  }
313 
314  void computeDelta(
315  State const& state, RealMatrix& outputDelta, RealMatrix& hiddenDelta, RealMatrix& inputDelta
316  )const{
317  computeDelta(state,outputDelta,hiddenDelta);
318  inputDelta.resize(outputDelta.size1(),inputSize());
319  noalias(inputDelta) = prod(hiddenDelta,encoderMatrix());
320  }
321 
322  void computeParameterDerivative(
323  RealMatrix const& patterns, RealMatrix const& outputDelta, RealMatrix const& hiddenDelta,
324  State const& state, RealVector& gradient
325  )const{
326  InternalState const& s = state.toState<InternalState>();
327  std::size_t hiddenParams = inputSize()*numberOfHiddenNeurons();
328  std::size_t numHidden = numberOfHiddenNeurons();
329  gradient.resize(numberOfParameters());
330  auto gradEncoder = to_matrix(subrange(gradient,0,hiddenParams),numHidden,inputSize());
331  auto gradDecoder = to_matrix(subrange(gradient,hiddenParams,2*hiddenParams),outputSize(),numHidden);
332  noalias(gradDecoder) = prod(trans(outputDelta),s.hiddenResponses);
333  noalias(gradEncoder) = prod(trans(hiddenDelta),patterns);
334 
335  std::size_t hiddenBiasPos = 2*hiddenParams;
336  std::size_t outputBiasPos = hiddenBiasPos+numHidden;
337  subrange(gradient,hiddenBiasPos,outputBiasPos) = sum_rows(hiddenDelta);
338  subrange(gradient,outputBiasPos,outputBiasPos+inputSize()) = sum_rows(outputDelta);
339  }
340 
341  //! weight matrix between input and hidden layer.
342  RealMatrix m_encoderMatrix;
343  //! weight matrix between hiddenand output layer.
344  RealMatrix m_decoderMatrix;
345  //! bias weights of the hidden neurons
346  RealVector m_hiddenBias;
347  //! bias weights of the visible neurons
348  RealVector m_outputBias;
349 
350  //!Type of hidden neuron. See Models/Neurons.h for a few choices
351  HiddenNeuron m_hiddenNeuron;
352  //! Type of output neuron. See Models/Neurons.h for a few choices
353  OutputNeuron m_outputNeuron;
354 };
355 
356 
357 }
358 #endif