TiedAutoencoder.h
Go to the documentation of this file.
1 /*!
2  * \brief Implements the autoencoder with tied weights
3  *
4  * \author O. Krause
5  * \date 2010-2014
6  *
7  *
8  * \par Copyright 1995-2017 Shark Development Team
9  *
10  * <BR><HR>
11  * This file is part of Shark.
12  * <http://shark-ml.org/>
13  *
14  * Shark is free software: you can redistribute it and/or modify
15  * it under the terms of the GNU Lesser General Public License as published
16  * by the Free Software Foundation, either version 3 of the License, or
17  * (at your option) any later version.
18  *
19  * Shark is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  * GNU Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26  *
27  */
28 #ifndef SHARK_MODELS_TIEDAUTOENCODER_H
29 #define SHARK_MODELS_TIEDAUTOENCODER_H
30 
32 #include <shark/Models/Neurons.h>
33 #include <shark/Models/FFNet.h>
34 #include <boost/serialization/vector.hpp>
35 
36 namespace shark{
37 
38 /// \brief implements the autoencoder with tied weights
39 ///
40 /// The formula is
41 /// \f[ f(x) = \sigma_2(W^T\sigma_1(Wx+b_1)+b_2)\f]
42 /// Where \f$ W \f$, \f$b_1 \f$ and \f$b_2 \f$ are the weights and
43 /// \f$\sigma_1\f$ and \f$ \sigma_2\f$ are the activation functions for hidden and output units.
44 template<class HiddenNeuron,class OutputNeuron>
45 class TiedAutoencoder :public AbstractModel<RealVector,RealVector>
46 {
47  struct InternalState: public State{
48  RealMatrix hiddenResponses;
49  RealMatrix outputResponses;
50  };
51 
52 
53 public:
57  }
58 
59  //! \brief From INameable: return the class name.
60  std::string name() const{
61  return "TiedAutoencoder";
62  }
63 
64  //! \brief Number of input neurons.
65  std::size_t inputSize()const{
66  return outputSize();
67  }
68  //! \brief Number of output neurons.
69  std::size_t outputSize()const{
70  return outputBias().size();
71  }
72 
73  //! \brief Total number of hidden neurons.
74  std::size_t numberOfHiddenNeurons()const{
75  return encoderMatrix().size1();
76  }
77 
78  /// \brief Returns the hidden bias weight vector.
79  RealVector const& hiddenBias()const{
80  return m_hiddenBias;
81  }
82 
83  /// \brief Returns the hidden bias weight vector.
84  RealVector& hiddenBias(){
85  return m_hiddenBias;
86  }
87 
88  /// \brief Returns the output bias weight vector.
89  RealVector const& outputBias()const{
90  return m_outputBias;
91  }
92  /// \brief Returns the output bias weight vector.
93  RealVector& outputBias(){
94  return m_outputBias;
95  }
96 
97  /// \brief Weight matrix for the direction input->hidden.
98  RealMatrix const& encoderMatrix()const{
99  return m_weightMatrix;
100  }
101  /// \brief Weight matrix for the direction input->hidden.
102  RealMatrix& encoderMatrix(){
103  return m_weightMatrix;
104  }
105  /// \brief Weight matrix for the direction hidden->output
106  ///
107  ///For tied autoencoders, this is the transpose of the encoder matrix
108  blas::matrix_transpose<RealMatrix const> decoderMatrix()const{
109  return trans(m_weightMatrix);
110  }
111  /// \brief Weight matrix for the direction hidden->output
112  ///
113  ///For tied autoencoders, this is the transpose of the encoder matrix
114  blas::matrix_transpose<RealMatrix> decoderMatrix(){
115  return trans(m_weightMatrix);
116  }
117 
118  //! \brief Returns the total number of parameters of the network.
119  std::size_t numberOfParameters()const{
121  }
122 
123  //! returns the vector of used parameters inside the weight matrix
124  RealVector parameterVector() const{
125  return to_vector(m_weightMatrix) | m_hiddenBias | m_outputBias;
126  }
127  //! uses the values inside the parametervector to set the used values inside the weight matrix
128  void setParameterVector(RealVector const& newParameters){
129  SIZE_CHECK(newParameters.size() == numberOfParameters());
130  std::size_t endWeights = m_weightMatrix.size1() * m_weightMatrix.size2();
131  std::size_t endBias = endWeights + m_hiddenBias.size();
132  noalias(to_vector(m_weightMatrix)) = subrange(newParameters,0,endWeights);
133  noalias(m_hiddenBias) = subrange(newParameters,endWeights,endBias);
134  noalias(m_outputBias) = subrange(newParameters,endBias,endBias+m_outputBias.size());
135  }
136 
137  /// \brief Returns the activation function of the hidden units.
138  HiddenNeuron const& hiddenActivationFunction()const{
139  return m_hiddenNeuron;
140  }
141  /// \brief Returns the activation function of the output units.
142  OutputNeuron const& outputActivationFunction()const{
143  return m_outputNeuron;
144  }
145 
146  /// \brief Returns the activation function of the hidden units.
147  HiddenNeuron& hiddenActivationFunction(){
148  return m_hiddenNeuron;
149  }
150  /// \brief Returns the activation function of the output units.
151  OutputNeuron& outputActivationFunction(){
152  return m_outputNeuron;
153  }
154 
155  //! \brief Returns the output of all neurons after the last call of eval
156  //!
157  //! \param state last result of eval
158  //! \return Output value of the neurons.
159  RealMatrix const& hiddenResponses(State const& state)const{
160  InternalState const& s = state.toState<InternalState>();
161  return s.hiddenResponses;
162  }
163 
164  boost::shared_ptr<State> createState()const{
165  return boost::shared_ptr<State>(new InternalState());
166  }
167 
168  void evalLayer(std::size_t layer,RealMatrix const& patterns,RealMatrix& outputs)const{
169  SIZE_CHECK(layer < 2);
170  std::size_t numPatterns = patterns.size1();
171 
172  if(layer == 0){//input->hidden
173  SIZE_CHECK(patterns.size2() == encoderMatrix().size2());
174  std::size_t numOutputs = encoderMatrix().size1();
175  outputs.resize(numPatterns,numOutputs);
176  noalias(outputs) = prod(patterns,trans(encoderMatrix())) + repeat(hiddenBias(),numPatterns);
177  noalias(outputs) = m_hiddenNeuron(outputs);
178  }
179  else{//hidden->output
180  SIZE_CHECK(patterns.size2() == decoderMatrix().size2());
181  std::size_t numOutputs = decoderMatrix().size1();
182  outputs.resize(numPatterns,numOutputs);
183  noalias(outputs) = prod(patterns,trans(decoderMatrix())) + repeat(outputBias(),numPatterns);
184  noalias(outputs) = m_outputNeuron(outputs);
185  }
186  }
187 
188  ///\brief Returns the response of the i-th layer given the input of that layer.
189  ///
190  /// this is usefull if only a portion of the network needs to be evaluated
191  /// be aware that this only works without shortcuts in the network
192  Data<RealVector> evalLayer(std::size_t layer, Data<RealVector> const& patterns)const{
193  SIZE_CHECK(layer < 2);
194  int batches = (int) patterns.numberOfBatches();
195  Data<RealVector> result(batches);
196  SHARK_PARALLEL_FOR(int i = 0; i < batches; ++i){
197  evalLayer(layer,patterns.batch(i),result.batch(i));
198  }
199  return result;
200  }
201 
202  Data<RealVector> encode(Data<RealVector> const& patterns)const{
203  return evalLayer(0,patterns);
204  }
205 
206  Data<RealVector> decode(Data<RealVector> const& patterns)const{
207  return evalLayer(1,patterns);
208  }
209 
210  template<class Label>
213  )const{
214  return LabeledData<RealVector,Label>(encode(data.inputs()),data.labels());
215  }
216 
217  template<class Label>
220  )const{
221  return LabeledData<RealVector,Label>(decode(data.inputs()),data.labels());
222  }
223 
224  void eval(RealMatrix const& patterns,RealMatrix& output, State& state)const{
225  InternalState& s = state.toState<InternalState>();
226  evalLayer(0,patterns,s.hiddenResponses);//propagate input->hidden
227  evalLayer(1,s.hiddenResponses,s.outputResponses);//propagate hidden->output
228  output = s.outputResponses;
229  }
231 
233  BatchInputType const& patterns, RealMatrix const& coefficients, State const& state, RealVector& gradient
234  )const{
235  SIZE_CHECK(coefficients.size2() == outputSize());
236  SIZE_CHECK(coefficients.size1() == patterns.size1());
237 
238  RealMatrix outputDelta = coefficients;
239  RealMatrix hiddenDelta;
240  computeDelta(state,outputDelta,hiddenDelta);
241  computeParameterDerivative(patterns,outputDelta,hiddenDelta,state,gradient);
242  }
243 
245  BatchInputType const& patterns, RealMatrix const& coefficients, State const& state, BatchInputType& inputDerivative
246  )const{
247  SIZE_CHECK(coefficients.size2() == outputSize());
248  SIZE_CHECK(coefficients.size1() == patterns.size1());
249 
250  RealMatrix outputDelta = coefficients;
251  RealMatrix hiddenDelta;
252  computeDelta(state,outputDelta,hiddenDelta,inputDerivative);
253  }
254 
255  virtual void weightedDerivatives(
256  BatchInputType const & patterns,
257  BatchOutputType const & coefficients,
258  State const& state,
259  RealVector& parameterDerivative,
260  BatchInputType& inputDerivative
261  )const{
262  SIZE_CHECK(coefficients.size2() == outputSize());
263  SIZE_CHECK(coefficients.size1() == patterns.size1());
264 
265  RealMatrix outputDelta = coefficients;
266  RealMatrix hiddenDelta;
267  computeDelta(state,outputDelta,hiddenDelta,inputDerivative);
268  computeParameterDerivative(patterns,outputDelta,hiddenDelta,state,parameterDerivative);
269  }
270 
272  std::size_t in,std::size_t hidden
273  ){
274  m_weightMatrix.resize(hidden,in);
275  m_hiddenBias.resize(hidden);
276  m_outputBias.resize(in);
277  }
278 
279  //! From ISerializable, reads a model from an archive
280  void read( InArchive & archive ){
281  archive>>m_weightMatrix;
282  archive>>m_hiddenBias;
283  archive>>m_outputBias;
284  }
285 
286  //! From ISerializable, writes a model to an archive
287  void write( OutArchive & archive ) const{
288  archive<<m_weightMatrix;
289  archive<<m_hiddenBias;
290  archive<<m_outputBias;
291  }
292 
293 
294 private:
295 
296  void computeDelta(
297  State const& state, RealMatrix& outputDelta, RealMatrix& hiddenDelta
298  )const{
299  InternalState const& s = state.toState<InternalState>();
300 
301  noalias(outputDelta) *= m_outputNeuron.derivative(s.outputResponses);
302  hiddenDelta.resize(outputDelta.size1(),numberOfHiddenNeurons());
303  noalias(hiddenDelta) = prod(outputDelta,decoderMatrix());
304  noalias(hiddenDelta) *= m_hiddenNeuron.derivative(s.hiddenResponses);
305  }
306 
307  void computeDelta(
308  State const& state, RealMatrix& outputDelta, RealMatrix& hiddenDelta, RealMatrix& inputDelta
309  )const{
310  computeDelta(state,outputDelta,hiddenDelta);
311  inputDelta.resize(outputDelta.size1(),inputSize());
312  noalias(inputDelta) = prod(hiddenDelta,encoderMatrix());
313  }
314 
315  void computeParameterDerivative(
316  RealMatrix const& patterns, RealMatrix const& outputDelta, RealMatrix const& hiddenDelta,
317  State const& state, RealVector& gradient
318  )const{
319  InternalState const& s = state.toState<InternalState>();
320  std::size_t hiddenParams = inputSize()*numberOfHiddenNeurons();
321  std::size_t numHidden = numberOfHiddenNeurons();
322  gradient.resize(numberOfParameters());
323  auto gradEncoder = to_matrix(subrange(gradient,0,hiddenParams),numHidden,inputSize());
324  noalias(gradEncoder) = prod(trans(s.hiddenResponses),outputDelta);
325  noalias(gradEncoder) += prod(trans(hiddenDelta),patterns);
326 
327  std::size_t hiddenBiasPos = hiddenParams;
328  std::size_t outputBiasPos = hiddenBiasPos+numHidden;
329  subrange(gradient,hiddenBiasPos,outputBiasPos) = sum_rows(hiddenDelta);
330  subrange(gradient,outputBiasPos,outputBiasPos+inputSize()) = sum_rows(outputDelta);
331  }
332 
333  //! weight matrix between input and hidden layer. the transpose of this is used to connect hidden->output.
334  RealMatrix m_weightMatrix;
335  //! bias weights of the hidden neurons
336  RealVector m_hiddenBias;
337  //! bias weights of the visible neurons
338  RealVector m_outputBias;
339 
340  //!Type of hidden neuron. See Models/Neurons.h for a few choices
341  HiddenNeuron m_hiddenNeuron;
342  //! Type of output neuron. See Models/Neurons.h for a few choices
343  OutputNeuron m_outputNeuron;
344 };
345 
346 
347 }
348 #endif