TruncatedExponentialLayer.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author -
7  * \date -
8  *
9  *
10  * \par Copyright 1995-2017 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://shark-ml.org/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef SHARK_UNSUPERVISED_RBM_NEURONLAYERS_TRUNCATEDEXPONENTIALLAYER_H
31 #define SHARK_UNSUPERVISED_RBM_NEURONLAYERS_TRUNCATEDEXPONENTIALLAYER_H
32 
35 #include <shark/LinAlg/Base.h>
39 #include <shark/Core/OpenMP.h>
40 #include <shark/Core/Random.h>
41 namespace shark{
42 namespace detail{
43 template<class VectorType>
44 struct TruncatedExponentialSufficientStatistics{
46  VectorType expMinusLambda;
47 
48  TruncatedExponentialSufficientStatistics(std::size_t numberOfNeurons)
49  :lambda(numberOfNeurons), expMinusLambda(numberOfNeurons){}
50  TruncatedExponentialSufficientStatistics(){}
51 };
52 }
53 
54 
55 /// \cond
56 
57 //auto generate the batch interface for the BinarySufficientStatistics
58 template<class VectorType>
59 struct Batch< detail::TruncatedExponentialSufficientStatistics<VectorType> >{
61  detail::TruncatedExponentialSufficientStatistics<VectorType>,
62  (VectorType, lambda)(VectorType, expMinusLambda)
63  )
64 };
65 
66 /// \endcond
67 
68 ///\brief A layer of truncated exponential neurons.
69 ///
70 /// Truncated exponential distributions arise, when the state space of the binary neurons is extended to the
71 /// real numbers in [0,1]. The conditional distribution of the state of this neurons given the states of the
72 /// connecred layer is an exponential distribution restricted to [0,1].
74 private:
75  RealVector m_bias;
76 public:
77  ///the state space of this neuron is real valued, so it can't be enumerated
79 
80  ///\brief Stores lambda, the defining parameter of the statistics and also exp(-lambda) since it is used regularly.
81  typedef detail::TruncatedExponentialSufficientStatistics<RealVector> SufficientStatistics;
82  ///\brief Sufficient statistics of a batch of data.
84 
85  /// \brief Returns the bias values of the units.
86  const RealVector& bias()const{
87  return m_bias;
88  }
89  /// \brief Returns the bias values of the units.
90  RealVector& bias(){
91  return m_bias;
92  }
93 
94  ///\brief Resizes this neuron layer.
95  ///
96  ///@param newSize number of neurons in the layer
97  void resize(std::size_t newSize){
98  m_bias.resize(newSize);
99  }
100 
101  ///\brief Returns the number of neurons of this layer.
102  std::size_t size()const{
103  return m_bias.size();
104  }
105 
106  /// \brief Takes the input of the neuron and calculates the statistics required to sample from the conditional distribution
107  ///
108  /// @param input the batch of inputs of the neuron
109  /// @param statistics sufficient statistics containing the probabilities of the neurons to be one
110  /// @param beta the inverse Temperature of the RBM (typically 1) for the whole batch
111  template<class Input, class BetaVector>
112  void sufficientStatistics(Input const& input, StatisticsBatch& statistics,BetaVector const& beta)const{ // \todo: auch hier noch mal namen ueberdenken
113  SIZE_CHECK(input.size2() == size());
114  SIZE_CHECK(statistics.lambda.size2() == size());
115  SIZE_CHECK(input.size1() == statistics.lambda.size1());
116 
117  for(std::size_t i = 0; i != input.size1(); ++i){
118  noalias(row(statistics.lambda,i)) = -(row(input,i)+m_bias)*beta(i);
119  }
120  noalias(statistics.expMinusLambda) = exp(-statistics.lambda);
121  }
122 
123 
124  /// \brief Samples from the truncated exponential distribution using either Gibbs- or flip-the-state sampling given the sufficient statistics
125  /// (i.e. the parameter lambda and the value of exp(-lambda))
126  ///
127  ///The truncated exponential function is defined as
128  ///\f[ p(x) = \lambda \frac{e^{-lambdax}}{1 - e^{-\lambda}}\f]
129  ///with x being in the range of [0,1]
130  ///
131  /// For alpha= 0 gibbs sampling is performed. That is the next state for neuron i is directly taken from the conditional distribution of the i-th neuron.
132  /// In the case of alpha=1, flip-the-state sampling is performed, which takes the last state into account and tries to do deterministically jump
133  /// into states with higher probability. THIS IS NOT IMPLEMENTED YET and alpha is ignored!
134  ///
135  /// @param statistics sufficient statistics for the batch to be computed
136  /// @param state the state matrix that will hold the sampled states
137  /// @param alpha factor changing from gibbs to flip-the state sampling. 0<=alpha<=1
138  /// @param rng the random number generator used for sampling
139  template<class Matrix, class Rng>
140  void sample(StatisticsBatch const& statistics, Matrix& state, double alpha, Rng& rng) const{
141  SIZE_CHECK(statistics.lambda.size2() == size());
142  SIZE_CHECK(statistics.lambda.size1() == state.size1());
143  SIZE_CHECK(statistics.lambda.size2() == state.size2());
144 
146  for(std::size_t i = 0; i != state.size1();++i){
147  for(std::size_t j = 0; j != state.size2();++j){
148  state(i,j) = random::truncExp(rng,statistics.lambda(i,j),1.0,1.0 - statistics.expMinusLambda(i,j));
149  }
150  }
151  }
152  (void)alpha;//TODO: USE ALPHA
153  }
154 
155  /// \brief Transforms the current state of the neurons for the multiplication with the weight matrix of the RBM,
156  /// i.e. calculates the value of the phi-function used in the interaction term.
157  ///
158  /// @param state the state matrix of the neuron layer
159  /// @return the value of the phi-function
160  template<class Matrix>
161  Matrix const& phi(Matrix const& state)const{
162  SIZE_CHECK(state.size2() == size());
163  return state;
164  }
165 
166 
167 
168  /// \brief Returns the conditional expectation of the phi-function given the state of the connected layer.
169  ///
170  /// @param statistics the sufficient statistics of the layer
171  RealMatrix expectedPhiValue(StatisticsBatch const& statistics)const{
172  SIZE_CHECK(statistics.lambda.size2() == size());
173  SIZE_CHECK(statistics.expMinusLambda.size2() == size());
174  std::size_t batchSize=statistics.lambda.size1();
175  RealMatrix mean(batchSize,size());
176 
177  for(std::size_t i = 0; i != batchSize;++i){
178  for(std::size_t j = 0; j != size();++j){
179  double expML=statistics.expMinusLambda(i,j);
180  mean(i,j) = 1.0/statistics.lambda(i,j)-expML/(1.0 - expML);
181  }
182  }
183  return mean;
184  }
185  /// \brief Returns the mean of the conditional distribution.
186  /// @param statistics the sufficient statistics defining the conditional distribution
187  RealMatrix mean(StatisticsBatch const& statistics)const{
188  return expectedPhiValue(statistics);
189  }
190 
191  /// \brief Returns the energy term this neuron adds to the energy function.
192  ///
193  /// @param state the state of the neuron layer
194  /// @param beta the inverse temperature of the i-th state
195  /// @return the energy term of the neuron layer
196  template<class Matrix, class BetaVector>
197  RealVector energyTerm(Matrix const& state, BetaVector const& beta)const{
198  SIZE_CHECK(state.size2() == size());
199  SIZE_CHECK(state.size1() == beta.size());
200  //the following code does for batches the equivalent thing to:
201  //return inner_prod(m_bias,state)
202 
203  RealVector energies = beta * prod(state,m_bias);
204  return energies;
205  }
206 
207  ///\brief Integrates over the terms of the Energy function which depend on the state of this layer and returns the logarithm of the result.
208  ///
209  ///This function is called by Energy when the unnormalized marginal probability of the connected layer is to be computed.
210  ///It calculates the part which depends on the neurons which are to be marinalized out.
211  ///(In the case of the exponential hidden neuron, this is the term \f$ \log \int_h e^{\vec h^T W \vec v+ \vec h^T \vec c} \f$).
212  ///
213  /// @param inputs the inputs of the neurons they get from the other layer
214  /// @param beta the inverse temperature of the RBM
215  /// @return the marginal distribution of the connected layer
216  template<class Input>
217  double logMarginalize(const Input& inputs,double beta) const{
218  SIZE_CHECK(inputs.size() == size());
219  double lnResult = 0;
220  for(std::size_t i = 0; i != size(); ++i){
221  double a = (inputs(i)+m_bias(i))*beta;
222  //calculates log( (exp(a)-1)/a ). the argument of the log is always positive since for a > 0 is exp(a) > 1 and for a < 0 is exp(a)<1
223  //for a = 0 the result is 1 and log(1) = 0
224  //so we calculate log( (exp(a)-1)/a ) = log|exp(a)-1| -log|a|
225  //we use for a > 0 log|exp(a)-1|=log(exp(a)-1)=a+log(1-exp(-a)) which is numerically more stable if a is big
226  // for a< 0, log|exp(a)-1|=log(1-exp(a)) is fine.
227  if( a > 1.e-50)
228  lnResult += a+std::log(1.0 - std::exp(-a));
229  else if( a < 1.e-50)
230  lnResult += std::log( 1.0 - std::exp(a));
231  lnResult -= std::log(std::abs(a));
232  }
233  return lnResult;
234  }
235 
236 
237  ///\brief Calculates the expectation of the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
238  /// The expectation is taken with respect to the conditional probability distribution of the layer given the state of the connected layer.
239  ///
240  ///This function takes a batch of samples and extracts the required informations out of it.
241  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
242  ///@param samples the samples from which the informations can be extracted
243  template<class Vector, class SampleBatch>
244  void expectedParameterDerivative(Vector& derivative, SampleBatch const& samples )const{
245  SIZE_CHECK(derivative.size() == size());
246  sum_rows(samples.statistics.probability,derivative);
247  }
248 
249 
250  ///\brief Calculates the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
251  ///
252  ///This function takes a batch of samples and extracts the required informations out of it.
253  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
254  ///@param samples the sample from which the informations can be extracted
255  template<class Vector, class SampleBatch>
256  void parameterDerivative(Vector& derivative, SampleBatch const& samples)const{
257  SIZE_CHECK(derivative.size() == size());
258  sum_rows(samples.state,derivative);
259  }
260 
261  /// \brief Returns the vector with the parameters associated with the neurons in the layer.
262  RealVector parameterVector()const{
263  return m_bias;
264  }
265 
266  /// \brief Returns the vector with the parameters associated with the neurons in the layer.
267  void setParameterVector(RealVector const& newParameters){
268  m_bias = newParameters;
269  }
270 
271  /// \brief Returns the number of the parameters associated with the neurons in the layer.
272  std::size_t numberOfParameters()const{
273  return size();
274  }
275 
276  /// \brief Reads the bias parameters from an archive.
277  void read( InArchive & archive ){
278  archive >> m_bias;
279  }
280  /// \brief Writes the bias parameters to an archive.
281  void write( OutArchive & archive ) const{
282  archive << m_bias;
283  }
284 };
285 
286 }
287 #endif