BipolarLayer.h
Go to the documentation of this file.
1 /*!
2  *
3  * \brief Implements the bipolar (-1,1) state neuron layer
4  *
5  * \author Asja Fischer
6  * \date 2013
7  *
8  * \par Copyright 1995-2017 Shark Development Team
9  *
10  * <BR><HR>
11  * This file is part of Shark.
12  * <http://shark-ml.org/>
13  *
14  * Shark is free software: you can redistribute it and/or modify
15  * it under the terms of the GNU Lesser General Public License as published
16  * by the Free Software Foundation, either version 3 of the License, or
17  * (at your option) any later version.
18  *
19  * Shark is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  * GNU Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26  *
27  */
28 #ifndef SHARK_UNSUPERVISED_RBM_NEURONLAYERS_BIPOLARLAYER_H
29 #define SHARK_UNSUPERVISED_RBM_NEURONLAYERS_BIPOLARLAYER_H
30 
33 #include <shark/LinAlg/Base.h>
35 #include <shark/Core/Random.h>
37 #include <shark/Core/OpenMP.h>
38 namespace shark{
39 
40 ///\brief Layer of bipolar units taking values in {-1,1}.
41 
42 ///A neuron in a BipolarLayer takes values in {-1,1} and the conditional probability to be 1
43 ///given the states of the neurons in the connected layer is determined by the sigmoid function
44 ///and the input it gets from the connected layer.
45 class BipolarLayer : public ISerializable, public IParameterizable<>{
46 private:
47  ///\brief The bias terms associated with the neurons.
48  RealVector m_bias;
49 public:
50  ///the state space of this neuron is binary
52 
53  ///\brief The sufficient statistics for the Binary Layer store the probability for a neuron to be on
54  typedef RealVector SufficientStatistics;
55  ///\brief Sufficient statistics of a batch of data.
57 
58  /// \brief Returns the bias values of the units.
59  const RealVector& bias()const{
60  return m_bias;
61  }
62 
63  /// \brief Returns the bias values of the units.
64  RealVector& bias(){
65  return m_bias;
66  }
67 
68  ///\brief Resizes this neuron layer.
69  ///
70  ///@param newSize number of neurons in the layer
71  void resize(std::size_t newSize){
72  m_bias.resize(newSize);
73  }
74 
75  ///\brief Returns the number of neurons of this layer.
76  std::size_t size()const{
77  return m_bias.size();
78  }
79 
80  /// \brief Takes the input of the neuron and calculates the distribution of the neurons
81  /// For binary neuronsthis is identical with the conditional probability for the neuron to be on given the state of the connected layer.
82  ///
83  /// @param input the batch of inputs of the neuron
84  /// @param statistics sufficient statistics containing the probabilities of the neurons to be one
85  /// @param beta the inverse Temperature of the RBM (typically 1) for the whole batch
86  template<class Input, class BetaVector>
87  void sufficientStatistics(Input const& input, StatisticsBatch& statistics,BetaVector const& beta)const{ // \todo: auch hier noch mal namen ueberdenken
88  SIZE_CHECK(input.size2() == size());
89  SIZE_CHECK(statistics.size2() == size());
90  SIZE_CHECK(input.size1() == statistics.size1());
91 
92  for(std::size_t i = 0; i != input.size1(); ++i){
93  noalias(row(statistics,i)) = sigmoid(2*(row(input,i)+m_bias)*beta(i));
94  }
95  }
96 
97  /// \brief Samples from the distribution using either Gibbs- or flip-the-state sampling.
98  ///
99  /// For alpha= 0 gibbs sampling is performed. That is the next state for neuron i is directly taken from the conditional distribution of the i-th neuron.
100  /// In the case of alpha=1, flip-the-state sampling is performed, which takes the last state into account and tries to do deterministically jump
101  /// into states with higher probability. This is counterbalanced by a higher chance to jump back into a lower probability state in later steps.
102  /// For alpha between 0 and 1 a mixture of both is performed.
103  ///
104  /// @param statistics sufficient statistics containing the probabilities of the neurons to be one
105  /// @param state the state vector that shell hold the sampled states
106  /// @param alpha factor changing from gibbs to flip-the state sampling. 0<=alpha<=1
107  /// @param rng the random number generator used for sampling
108  template<class Matrix, class Rng>
109  void sample(StatisticsBatch const& statistics, Matrix& state, double alpha, Rng& rng) const{
110  SIZE_CHECK(statistics.size2() == size());
111  SIZE_CHECK(statistics.size1() == state.size1());
112  SIZE_CHECK(statistics.size2() == state.size2());
113 
115  if(alpha == 0.0){//special case: normal gibbs sampling
116  for(std::size_t s = 0; s != state.size1();++s){
117  for(std::size_t i = 0; i != state.size2();++i){
118  state(s,i) = random::coinToss(rng,statistics(s,i));
119  if(state(s,i)==0) state(s,i)=-1.;
120  }
121  }
122  }
123  else{//flip-the state sampling
124  for(size_t s = 0; s != state.size1(); ++s){
125  for (size_t i = 0; i != state.size2(); i++) {
126  double prob = statistics(s,i);
127  if (state(s,i) == -1) {
128  if (prob <= 0.5) {
129  prob = (1. - alpha) * prob + alpha * prob / (1. - prob);
130  } else {
131  prob = (1. - alpha) * prob + alpha;
132  }
133  } else {
134  if (prob >= 0.5) {
135  prob = (1. - alpha) * prob + alpha * (1. - (1. - prob) / prob);
136  } else {
137  prob = (1. - alpha) * prob;
138  }
139  }
140  state(s,i) = random::coinToss(rng, prob);
141  if(state(s,i)==0) state(s,i)=-1.;
142  }
143  }
144  }
145  }
146  }
147 
148  /// \brief Computes the log of the probability of the given states in the conditional distribution
149  ///
150  /// Currently it is only possible to compute the case with alpha=0
151  ///
152  /// @param statistics the statistics of the conditional distribution
153  /// @param state the state to check
154  template<class Matrix>
155  RealVector logProbability(StatisticsBatch const& statistics, Matrix const& state) const{
156  SIZE_CHECK(statistics.size2() == size());
157  SIZE_CHECK(statistics.size1() == state.size1());
158  SIZE_CHECK(statistics.size2() == state.size2());
159 
160  RealVector logProbabilities(state.size1(),1.0);
161  for(std::size_t s = 0; s != state.size1();++s){
162  for(std::size_t i = 0; i != state.size2();++i){
163  logProbabilities(s) += (state(s,i) > 0.0)? std::log(statistics(s,i)) : std::log(1-statistics(s,i));
164  }
165  }
166  return logProbabilities;
167  }
168 
169  /// \brief Transforms the current state of the neurons for the multiplication with the weight matrix of the RBM,
170  /// i.e. calculates the value of the phi-function used in the interaction term.
171  /// In the case of bipolar neurons the phi-function is just the identity.
172  ///
173  /// @param state the state matrix of the neuron layer
174  /// @return the value of the phi-function
175  template<class Matrix>
176  Matrix const& phi(Matrix const& state)const{
177  SIZE_CHECK(state.size2() == size());
178  return state;
179  }
180 
181 
182  /// \brief Returns the conditional expectation of the phi-function given the state of the connected layer,
183  ///
184  /// @param statistics the sufficient statistics of the layer
185  RealMatrix expectedPhiValue(StatisticsBatch const& statistics)const{
186  //calculation of the expectation: 1*P(h_i=1|v)- 1*(1-P(h_i=1|v))= 2*P(h_i=1|v)-1
187  return 2*statistics - 1;
188  }
189 
190  /// \brief Returns the mean of the distribution
191  ///
192  /// @param statistics the sufficient statistics of the layer for a whole batch
193  RealMatrix mean(StatisticsBatch const& statistics)const{
194  SIZE_CHECK(statistics.size2() == size());
195  //calculation of the expectation: 1*P(h_i=1|v)- 1*(1-P(h_i=1|v))= 2*P(h_i=1|v)-1
196  return 2*statistics - 1;
197  }
198 
199 
200  /// \brief Returns the energy term this neuron adds to the energy function.
201  ///
202  /// @param state the state of the neuron layer
203  /// @param beta the inverse temperature of the i-th state
204  /// @return the energy term of the neuron layer
205  template<class Matrix, class BetaVector>
206  RealVector energyTerm(Matrix const& state, BetaVector const& beta)const{
207  SIZE_CHECK(state.size2() == size());
208 
209  RealVector energies = beta*prod(state,m_bias);
210  return energies;
211  }
212 
213 
214  ///\brief Sums over all possible values of the terms of the energy function which depend on the this layer and returns the logarithmic result.
215  ///
216  ///This function is called by Energy when the unnormalized marginal probability of the connected layer is to be computed.
217  ///This function calculates the part which depends on the neurons which are to be marginalized out.
218  ///(In the case of the bipolar hidden neuron, this is the term \f$ \sum_h e^{\vec h^T W \vec v+ \vec h^T \vec c} \f$).
219  ///The rest is calculated by the energy function.
220  ///
221  /// @param inputs the inputs of the neurons they get from the other layer
222  /// @param beta the inverse temperature of the RBM
223  /// @return the marginal distribution of the connected layer
224  template<class Input>
225  double logMarginalize(Input const& inputs, double beta) const{
226  SIZE_CHECK(inputs.size() == size());
227  long double logFactorization = 0;
228  for(std::size_t i = 0; i != inputs.size(); ++i){
229  long double arg = std::abs((inputs(i)+m_bias(i))*beta);
230  logFactorization += softPlus(-2*arg)+arg;
231  }
232  return logFactorization;
233  }
234 
235 
236  ///\brief Calculates the expectation of the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
237  /// The expectation is taken with respect to the conditional probability distribution of the layer given the state of the connected layer.
238  ///
239  ///This function takes a batch of samples and extracts the required informations out of it.
240  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
241  ///@param samples the samples from which the informations can be extracted
242  template<class Vector, class SampleBatch>
243  void expectedParameterDerivative(Vector& derivative, SampleBatch const& samples )const{
244  SIZE_CHECK(derivative.size() == size());
245  sumRows(2*samples.statistics,derivative);
246  derivative -= samples.size();
247  }
248 
249  ///\brief Calculates the expectation of the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
250  /// The expectation is taken with respect to the conditional probability distribution of the layer given the state of the connected layer.
251  ///
252  ///This function takes a batch of samples and weights the results
253  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
254  ///@param samples the samples from which the informations can be extracted
255  ///@param weights The weights for alle samples
256  template<class Vector, class SampleBatch, class WeightVector>
257  void expectedParameterDerivative(Vector& derivative, SampleBatch const& samples, WeightVector const& weights )const{
258  SIZE_CHECK(derivative.size() == size());
259  noalias(derivative) += 2*prod(weights,samples.statistics) - sum(weights);
260  }
261 
262 
263  ///\brief Calculates the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
264  ///
265  ///This function takes a batch of samples and extracts the required informations out of it.
266  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
267  ///@param samples the sample from which the informations can be extracted
268  template<class Vector, class SampleBatch>
269  void parameterDerivative(Vector& derivative, SampleBatch const& samples)const{
270  SIZE_CHECK(derivative.size() == size());
271  sumRows(samples.state,derivative);
272  }
273 
274  ///\brief Calculates the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
275  ///
276  ///This function takes a batch of samples and calculates a weighted derivative
277  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
278  ///@param samples the sample from which the informations can be extracted
279  ///@param weights the weights for the single sample derivatives
280  template<class Vector, class SampleBatch, class WeightVector>
281  void parameterDerivative(Vector& derivative, SampleBatch const& samples, WeightVector const& weights)const{
282  SIZE_CHECK(derivative.size() == size());
283  noalias(derivative) += prod(weights,samples.state);
284  }
285 
286  /// \brief Returns the vector with the parameters associated with the neurons in the layer, i.e. the bias vector.
287  RealVector parameterVector()const{
288  return m_bias;
289  }
290 
291  /// \brief Sets the parameters associated with the neurons in the layer, i.e. the bias vector.
292  void setParameterVector(RealVector const& newParameters){
293  m_bias = newParameters;
294  }
295 
296  /// \brief Returns the number of the parameters associated with the neurons in the layer.
297  std::size_t numberOfParameters()const{
298  return size();
299  }
300 
301  /// \brief Reads the bias vector from an archive.
302  ///
303  /// @param archive the archive
304  void read( InArchive & archive ){
305  archive >> m_bias;
306  }
307 
308  /// \brief Writes the bias vector to an archive.
309  ///
310  /// @param archive the archive
311  void write( OutArchive & archive ) const{
312  archive << m_bias;
313  }
314 };
315 
316 }
317 #endif