GibbsOperator.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief Implements Block Gibbs Sampling
5  *
6  * \author O.Krause
7  * \date 2014
8  *
9  *
10  * \par Copyright 1995-2017 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://shark-ml.org/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef SHARK_UNSUPERVISED_RBM_SAMPLING_GIBBSOPERATOR_H
31 #define SHARK_UNSUPERVISED_RBM_SAMPLING_GIBBSOPERATOR_H
32 
33 #include <shark/LinAlg/Base.h>
34 #include "Impl/SampleTypes.h"
35 namespace shark{
36 
37 ///\brief Implements Block Gibbs Sampling related transition operators for various temperatures.
38 ///
39 /// The operator generates transitions from the current state of the neurons of an RBM
40 /// to a new one and thus can be used to produce a Markov chain.
41 /// The Gibbs Operator works by computing the conditional distribution of the hidden given the visible p(h|v) (or
42 /// vice versa) and than samples the new hidden (or visible) state from it.
43 ///
44 /// As an interesting twist, this operator can also be used to implement Flip-The-State sampling using two values alpha_visible
45 /// and alpha_hidden both being between 0 and 1 (inclusively). for alpha_visible=alpha_hidden=0, pure gibbs sampling is performed.
46 /// if for one of the layers, the value is not 0 a mixture of gibbs and flip-the-state sampling is performed. 1 equals to pure flip-the state
47 /// sampling.
48 /// The trick of this sampler is that it takes the previous state into account while sampling. If the current state has a low probability,
49 /// the sampler jumps deterministically in another state with higher probability. This is counterbalanced by having a higher chance to jump away from
50 /// this state.
51 template< class RBMType >
53 public:
54  typedef RBMType RBM;
55 
56  ///The operator holds a 'sample' of the visible and hidden neurons.
57  ///Such a sample does not only contain the states of the neurons but all other information
58  ///needed to approximate the gradient
59 
60  ///\brief the type of a concrete sample.
61  typedef detail::GibbsSample<
62  typename RBMType::HiddenType::SufficientStatistics
64  ///\brief the type of a concrete sample.
65  typedef detail::GibbsSample<
66  typename RBMType::VisibleType::SufficientStatistics
68 
69  ///\brief Represents the state of a batch of hidden samples and additional information required by the gradient.
70  ///
71  ///Aside from the hidden state, this structure can also hold the actual values
72  ///of the input, the phi-function and the sufficient statistics
74 
75  ///\brief Represents the state of the visible units and additional information required by the gradient.
76  ///
77  ///Aside from the visible state, this structure can also hold the actual values
78  ///of the hidden input, the phi-function and the sufficient statistics
80 
81  ///\brief Constructs the Operator using an allready defined Distribution to sample from.
82  GibbsOperator(RBM* rbm, double alphaVisible = 0,double alphaHidden = 0)
83  :mpe_rbm(rbm){
84  setAlpha(alphaVisible,alphaHidden);
85  }
86 
87  ///\brief Returns the internal RBM.
88  RBM* rbm()const{
89  return mpe_rbm;
90  }
91 
92 
93  ///\brief Calculates internal data needed for sampling the hidden units as well as requested information for the gradient.
94  ///
95  ///This function calculates the conditional probability distribution p(h|v) with inverse temperature beta for the whole batch of samples
96  ///Be aware that a change of temperature may occur between sampleVisible and precomputeHidden.
97  /// @param hiddenBatch the batch of hidden samples to be created
98  /// @param visibleBatch the batch of visible samples to be created
99  /// @param beta the vector of inverse temperatures
100  template<class BetaVector>
101  void precomputeHidden(HiddenSampleBatch& hiddenBatch, VisibleSampleBatch& visibleBatch, BetaVector const& beta)const{
102  SIZE_CHECK(visibleBatch.size()==hiddenBatch.size());
103  mpe_rbm->energy().inputHidden(hiddenBatch.input, visibleBatch.state);
104  //calculate the sufficient statistics of the hidden units
105  mpe_rbm->hiddenNeurons().sufficientStatistics(hiddenBatch.input,hiddenBatch.statistics, beta);
106  }
107 
108 
109  ///\brief calculates internal data needed for sampling the visible units as well as requested information for the gradient
110  ///
111  ///This function calculates the conditional probability distribution p(v|h) with inverse temperature beta for a whole batch of inputs.
112  ///Be aware that a change of temperature may occur between sampleHidden and precomputeVisible.
113  template<class BetaVector>
114  void precomputeVisible(HiddenSampleBatch& hiddenBatch, VisibleSampleBatch& visibleBatch, BetaVector const& beta)const{
115  SIZE_CHECK(visibleBatch.size()==hiddenBatch.size());
116  mpe_rbm->energy().inputVisible(visibleBatch.input, hiddenBatch.state);
117  //calculate the sufficient statistics of the visible units for every element of the batch
118  mpe_rbm->visibleNeurons().sufficientStatistics(visibleBatch.input,visibleBatch.statistics, beta);
119 
120  }
121 
122  ///\brief Samples a new batch of states of the hidden units using their precomputed statistics.
123  void sampleHidden(HiddenSampleBatch& sampleBatch)const{
124  //sample state of the hidden neurons, input and statistics was allready computed by precompute
125  mpe_rbm->hiddenNeurons().sample(sampleBatch.statistics, sampleBatch.state, m_alphaHidden, mpe_rbm->rng());
126  }
127 
128 
129  ///\brief Samples a new batch of states of the visible units using their precomputed statistics.
130  void sampleVisible(VisibleSampleBatch& sampleBatch)const{
131  //sample state of the visible neurons, input and statistics was allready computed by precompute
132  mpe_rbm->visibleNeurons().sample(sampleBatch.statistics, sampleBatch.state, m_alphaVisible, mpe_rbm->rng());
133  }
134 
135  /// \brief Applies the Gibbs operator a number of times to a given sample.
136  ///
137  /// Performs one complete step for a sample by sampling first the hidden, than the visible and computing the probability of a hidden given the visible unit
138  /// That is, Given a State (v,h), computes p(v|h),draws v and then computes p(h|v) and draws h . this is repeated several times
139  template<class BetaVector>
140  void stepVH(HiddenSampleBatch& hiddenBatch, VisibleSampleBatch& visibleBatch, std::size_t numberOfSteps, BetaVector const& beta){
141  for(unsigned int i=0; i != numberOfSteps; i++){
142  precomputeVisible(hiddenBatch,visibleBatch,beta);
143  sampleVisible(visibleBatch);
144  precomputeHidden(hiddenBatch, visibleBatch,beta);
145  sampleHidden(hiddenBatch);
146  }
147  }
148 
149  ///\brief Creates hidden/visible sample pairs from the states of the visible neurons, i.e. sets the visible units to the given states and samples hidden states based on the states of the visible units.
150  /// This can directly be used to calculate the gradient.
151  ///
152  /// @param hiddenBatch the batch of hidden samples to be created
153  /// @param visibleBatch the batch of visible samples to be created
154  /// @param states the states of the visible neurons in the sample
155  /// @param beta the vector of inverse temperatures
156  template<class States, class BetaVector>
157  void createSample(HiddenSampleBatch& hiddenBatch,VisibleSampleBatch& visibleBatch, States const& states, BetaVector const& beta)const{
158  SIZE_CHECK(batchSize(states)==visibleBatch.size());
159  SIZE_CHECK(hiddenBatch.size()==visibleBatch.size());
160  visibleBatch.state = states;
161 
162  precomputeHidden(hiddenBatch,visibleBatch, beta);
163  sampleHidden(hiddenBatch);
164  }
165 
166  ///\brief Creates hidden/visible sample pairs from the states of the visible neurons, i.e. sets the visible units to the given states and samples hidden states based on the states of the visible units.
167  /// This can directly be used to calculate the gradient.
168  ///
169  /// @param hiddenBatch the batch of hidden samples to be created
170  /// @param visibleBatch the batch of visible samples to be created
171  /// @param states the states of the visible neurons in the sample
172  template<class States>
173  void createSample(HiddenSampleBatch& hiddenBatch,VisibleSampleBatch& visibleBatch, States const& states)const{
174  createSample(hiddenBatch,visibleBatch,states, blas::repeat(1.0,states.size1()));
175  }
176 
177  ///\brief Calculates the Energy of a sample of the visible and hidden neurons created by this chain.
178  ///
179  ///@param hiddenBatch the batch of samples of the hidden neurons
180  ///@param visibleBatch the batch of samples of the visible neurons (holding also the precomputed input of the visibles)
181  ///@return the value of the energy function
182  RealVector calculateEnergy(HiddenSampleBatch const& hiddenBatch, VisibleSampleBatch const& visibleBatch)const{
183  return mpe_rbm->energy().energyFromHiddenInput(
184  hiddenBatch.input,
185  hiddenBatch.state,
186  visibleBatch.state
187  );
188  }
189 
190  void setAlpha(double newAlphaVisible, double newAlphaHidden){
191  SHARK_RUNTIME_CHECK(newAlphaVisible >= 0.0, "alpha >= 0 not fulfilled for the visible layer");
192  SHARK_RUNTIME_CHECK(newAlphaVisible <= 1., "alpha <=1 not fulfilled for the visible layer");
193  SHARK_RUNTIME_CHECK(newAlphaHidden >= 0.0, "alpha >= 0 not fulfilled for the hidden layer");
194  SHARK_RUNTIME_CHECK(newAlphaHidden <= 1., "alpha <=1 not fulfilled for the hidden layer");
195  m_alphaVisible = newAlphaVisible;
196  m_alphaHidden = newAlphaHidden;
197  }
198 private:
199  RBM* mpe_rbm;
200  double m_alphaVisible;
201  double m_alphaHidden;
202 };
203 
204 
205 }
206 #endif