GaussianLayer.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author -
7  * \date -
8  *
9  *
10  * \par Copyright 1995-2017 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://shark-ml.org/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef SHARK_UNSUPERVISED_RBM_NEURONLAYERS_GAUSSIANLAYER_H
31 #define SHARK_UNSUPERVISED_RBM_NEURONLAYERS_GAUSSIANLAYER_H
32 
33 #include <shark/LinAlg/Base.h>
35 #include <shark/Core/Random.h>
38 #include <shark/Core/Math.h>
40 #include <shark/Core/OpenMP.h>
41 namespace shark{
42 
43 ///\brief A layer of Gaussian neurons.
44 ///
45 /// For a Gaussian neuron/variable the conditional probability distribution of the
46 /// state of the variable given the state of the other layer is given by a Gaussian
47 /// distribution with the input of the neuron as mean and unit variance.
48 class GaussianLayer : public ISerializable, public IParameterizable<>{
49 private:
50  RealVector m_bias; ///the bias terms associated with the neurons
51 public:
52  ///the state space of this neuron is binary
54 
55  ///\brief The sufficient statistics for the Guassian Layer stores the mean of the neuron and the inverse temperature
56  typedef RealVector SufficientStatistics;
57  ///\brief Sufficient statistics of a batch of data.
59 
60  /// \brief Returns the bias values of the units.
61  const RealVector& bias()const{
62  return m_bias;
63  }
64  /// \brief Returns the bias values of the units.
65  RealVector& bias(){
66  return m_bias;
67  }
68 
69  ///\brief Resizes this neuron layer.
70  ///
71  ///@param newSize number of neurons in the layer
72  void resize(std::size_t newSize){
73  m_bias.resize(newSize);
74  }
75 
76  ///\brief Returns the number of neurons of this layer.
77  std::size_t size()const{
78  return m_bias.size();
79  }
80 
81  /// \brief Takes the input of the neuron and estimates the expectation of the response of the neuron.
82  ///
83  /// @param input the batch of inputs of the neuron
84  /// @param statistics sufficient statistics containing the mean of the resulting Gaussian distribution
85  /// @param beta the inverse Temperature of the RBM (typically 1) for the whole batch
86  template<class Input, class BetaVector>
87  void sufficientStatistics(Input const& input, StatisticsBatch& statistics,BetaVector const& beta)const{ // \todo: auch hier noch mal namen ueberdenken
88  SIZE_CHECK(input.size2() == size());
89  SIZE_CHECK(statistics.size2() == size());
90  SIZE_CHECK(input.size1() == statistics.size1());
91 
92  for(std::size_t i = 0; i != input.size1(); ++i){
93  noalias(row(statistics,i)) = row(input,i)*beta(i)+m_bias;
94  }
95  }
96 
97 
98  /// \brief Given a the precomputed statistics (the mean of the Gaussian), the elements of the vector are sampled.
99  /// This happens either with Gibbs-Sampling or Flip-the-State sampling.
100  /// For alpha= 0 gibbs sampling is performed. That is the next state for neuron i is directly taken from the conditional distribution of the i-th neuron.
101  /// In the case of alpha=1, flip-the-state sampling is performed, which takes the last state into account and tries to do deterministically jump
102  /// into states with higher probability. THIS IS NOT IMPLEMENTED YET and alpha is ignored!
103  ///
104  ///
105  /// @param statistics sufficient statistics containing the mean of the conditional Gaussian distribution of the neurons
106  /// @param state the state matrix that will hold the sampled states
107  /// @param alpha factor changing from gibbs to flip-the state sampling. 0<=alpha<=1
108  /// @param rng the random number generator used for sampling
109  template<class Matrix, class Rng>
110  void sample(StatisticsBatch const& statistics, Matrix& state, double alpha, Rng& rng) const{
111  SIZE_CHECK(statistics.size2() == size());
112  SIZE_CHECK(statistics.size1() == state.size1());
113  SIZE_CHECK(statistics.size2() == state.size2());
114 
116  for(std::size_t i = 0; i != state.size1();++i){
117  for(std::size_t j = 0; j != state.size2();++j){
118  state(i,j) = random::gauss(rng,statistics(i,j), 1.0);
119  }
120  }
121  }
122  (void) alpha;
123  }
124 
125  /// \brief Computes the log of the probability of the given states in the conditional distribution
126  ///
127  /// Currently it is only possible to compute the case with alpha=0
128  ///
129  /// @param statistics the statistics of the conditional distribution
130  /// @param state the state to check
131  template<class Matrix>
132  RealVector logProbability(StatisticsBatch const& statistics, Matrix const& state) const{
133  SIZE_CHECK(statistics.size2() == size());
134  SIZE_CHECK(statistics.size1() == state.size1());
135  SIZE_CHECK(statistics.size2() == state.size2());
136 
137  RealVector logProbabilities(state.size1(),1.0);
138  for(std::size_t s = 0; s != state.size1();++s){
139  for(std::size_t i = 0; i != state.size2();++i){
140  logProbabilities(s) -= 0.5*sqr(statistics(s,i)-state(s,i));
141  }
142  }
143  return logProbabilities;
144  }
145 
146  /// \brief Transforms the current state of the neurons for the multiplication with the weight matrix of the RBM,
147  /// i.e. calculates the value of the phi-function used in the interaction term.
148  /// In the case of Gaussian neurons the phi-function is just the identity.
149  ///
150  /// @param state the state matrix of the neuron layer
151  /// @return the value of the phi-function
152  template<class Matrix>
153  Matrix const& phi(Matrix const& state)const{
154  SIZE_CHECK(state.size2() == size());
155  return state;
156  }
157 
158 
159  /// \brief Returns the expectation of the phi-function.
160  /// @param statistics the sufficient statistics (the mean of the distribution).
161  RealMatrix const& expectedPhiValue(StatisticsBatch const& statistics)const{
162  SIZE_CHECK(statistics.size2() == size());
163  return statistics;
164  }
165  /// \brief Returns the mean given the state of the connected layer, i.e. in this case the mean of the Gaussian
166  ///
167  /// @param statistics the sufficient statistics of the layer for a whole batch
168  RealMatrix const& mean(StatisticsBatch const& statistics)const{
169  SIZE_CHECK(statistics.size2() == size());
170  return statistics;
171  }
172 
173  /// \brief The energy term this neuron adds to the energy function for a batch of inputs.
174  ///
175  /// @param state the state of the neuron layer
176  /// @param beta the inverse temperature of the i-th state
177  /// @return the energy term of the neuron layer
178  template<class Matrix, class BetaVector>
179  RealVector energyTerm(Matrix const& state, BetaVector const& beta)const{
180  SIZE_CHECK(state.size2() == size());
181  SIZE_CHECK(state.size1() == beta.size());
182  //the following code does for batches the equivalent thing to:
183  //return beta * inner_prod(m_bias,state) - norm_sqr(state)/2.0;
184 
185  std::size_t batchSize = state.size1();
186  RealVector energies = prod(state,m_bias);
187  noalias(energies) *= beta;
188  for(std::size_t i = 0; i != batchSize; ++i){
189  energies(i) -= norm_sqr(row(state,i))/2.0;
190  }
191  return energies;
192 
193  }
194 
195 
196  ///\brief Sums over all possible values of the terms of the energy function which depend on the this layer and returns the logarithmic result.
197  ///
198  ///This function is called by Energy when the unnormalized marginal probability of the connected layer is to be computed.
199  ///This function calculates the part which depends on the neurons which are to be marginalized out.
200  ///(In the case of the binary hidden neuron, this is the term \f$ \log \sum_h e^{\vec h^T W \vec v+ \vec h^T \vec c} \f$).
201  ///The rest is calculated by the energy function.
202  ///In the general case of a hidden layer, this function calculates \f$ \log \int_h e^(\phi_h(\vec h)^T W \phi_v(\vec v)+f_h(\vec h) ) \f$
203  ///where f_h is the energy term of this.
204  ///
205  /// @param inputs the inputs of the neurons they get from the other layer
206  /// @param beta the inverse temperature of the RBM
207  /// @return the marginal distribution of the connected layer
208  template<class Input>
209  double logMarginalize(const Input& inputs, double beta) const{
210  SIZE_CHECK(inputs.size() == size());
211  double lnResult = 0;
212  double logNormalizationTerm = std::log(SQRT_2_PI) - 0.5 * std::log(beta);
213 
214  for(std::size_t i = 0; i != size(); ++i){
215  lnResult += 0.5 * sqr(inputs(i)+m_bias(i))*beta;
216  lnResult += logNormalizationTerm;
217  }
218  return lnResult;
219  }
220 
221 
222  ///\brief Calculates the expectation of the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
223  /// The expectation is taken with respect to the conditional probability distribution of the layer given the state of the connected layer.
224  ///
225  ///This function takes a batch of samples and extracts the required informations out of it.
226  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
227  ///@param samples the samples from which the informations can be extracted
228  template<class Vector, class SampleBatch>
229  void expectedParameterDerivative(Vector& derivative, SampleBatch const& samples )const{
230  SIZE_CHECK(derivative.size() == size());
231  sum_rows(samples.statistics,derivative);
232  }
233 
234  template<class Vector, class SampleBatch, class Vector2 >
235  void expectedParameterDerivative(Vector& derivative, SampleBatch const& samples, Vector2 const& weights )const{
236  SIZE_CHECK(derivative.size() == size());
237  noalias(derivative) += prod(weights,samples.statistics);
238  }
239 
240  ///\brief Calculates the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
241  ///
242  ///This function takes a batch of samples and extracts the required informations out of it.
243  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
244  ///@param samples the sample from which the informations can be extracted
245  template<class Vector, class SampleBatch>
246  void parameterDerivative(Vector& derivative, SampleBatch const& samples)const{
247  SIZE_CHECK(derivative.size() == size());
248  sum_rows(samples.state,derivative);
249  }
250 
251  ///\brief Calculates the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
252  ///
253  ///This function takes a batch of samples and calculates a weighted derivative
254  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
255  ///@param samples the sample from which the informations can be extracted
256  ///@param weights the weights for the single sample derivatives
257  template<class Vector, class SampleBatch, class WeightVector>
258  void parameterDerivative(Vector& derivative, SampleBatch const& samples, WeightVector const& weights)const{
259  SIZE_CHECK(derivative.size() == size());
260  noalias(derivative) += prod(weights,samples.state);
261  }
262 
263  ///\brief Returns the vector with the parameters associated with the neurons in the layer.
264  RealVector parameterVector()const{
265  return m_bias;
266  }
267 
268  ///\brief Returns the vector with the parameters associated with the neurons in the layer.
269  void setParameterVector(RealVector const& newParameters){
270  m_bias = newParameters;
271  }
272 
273  ///\brief Returns the number of the parameters associated with the neurons in the layer.
274  std::size_t numberOfParameters()const{
275  return size();
276  }
277 
278  /// \brief Reads the bias parameters from an archive.
279  void read( InArchive & archive ){
280  archive >> m_bias;
281  }
282  /// \brief Writes the bias parameters to an archive.
283  void write( OutArchive & archive ) const{
284  archive << m_bias;
285  }
286 };
287 
288 }
289 #endif