TemperedMarkovChain.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author -
7  * \date -
8  *
9  *
10  * \par Copyright 1995-2017 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://shark-ml.org/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef SHARK_UNSUPERVISED_RBM_SAMPLING_TEMPEREDMARKOVCHAIN_H
31 #define SHARK_UNSUPERVISED_RBM_SAMPLING_TEMPEREDMARKOVCHAIN_H
32 
33 #include <shark/Data/Dataset.h>
34 #include <shark/Core/Random.h>
36 #include <vector>
37 #include "Impl/SampleTypes.h"
38 namespace shark{
39 
40 
41 //\brief models a set of tempered Markov chains given a TransitionOperator.
42 // e.g. TemperedMarkovChain<GibbsOperator<RBM> > chain, leads to the set of chains
43 // used for parallel tempering.
44 template<class Operator>
46 private:
47  typedef typename Operator::HiddenSample HiddenSample;
48  typedef typename Operator::VisibleSample VisibleSample;
49 public:
50 
51  ///\brief The MarkovChain can't be used to compute several samples at once.
52  ///
53  /// The tempered markov chain ues it's batch capabilities allready to compute the samples for all temperatures
54  /// At the same time. Also it is much more powerfull when all samples are drawn one after another for a higher mixing rate.
55  static const bool computesBatch = false;
56 
57  ///\brief The type of the RBM the operator is working with.
58  typedef typename Operator::RBM RBM;
59 
60  ///\brief A batch of samples containing hidden and visible samples as well as the energies.
62 
63  ///\brief Mutable reference to an element of the batch.
64  typedef typename SampleBatch::reference reference;
65 
66  ///\brief Immutable reference to an element of the batch.
67  typedef typename SampleBatch::const_reference const_reference;
68 
69 private:
70  SampleBatch m_temperedChains;
71  RealVector m_betas;
72  Operator m_operator;
73 
74  void metropolisSwap(reference low, double betaLow, reference high, double betaHigh){
75  RealVector const& baseRate = transitionOperator().rbm()->visibleNeurons().baseRate();
76  double betaDiff = betaLow - betaHigh;
77  double energyDiff = low.energy - high.energy;
78  double baseRateDiff = inner_prod(low.visible.state,baseRate) - inner_prod(high.visible.state,baseRate);
79  double r = betaDiff * energyDiff + betaDiff*baseRateDiff;
80 
81  double z = random::uni(m_operator.rbm()->rng(),0,1);
82  if( r >= 0 || (z > 0 && std::log(z) < r) ){
83  swap(high,low);
84  }
85  }
86 
87 public:
88  TemperedMarkovChain(RBM* rbm):m_operator(rbm){}
89 
90  const Operator& transitionOperator()const{
91  return m_operator;
92  }
93  Operator& transitionOperator(){
94  return m_operator;
95  }
96 
97 
98  /// \brief Sets the number of temperatures and initializes the tempered chains accordingly.
99  ///
100  /// @param temperatures number of temperatures
101  void setNumberOfTemperatures(std::size_t temperatures){
102  std::size_t visibles=m_operator.rbm()->numberOfVN();
103  std::size_t hiddens=m_operator.rbm()->numberOfHN();
104  m_temperedChains = SampleBatch(temperatures,visibles,hiddens);
105  m_betas.resize(temperatures);
106  }
107 
108  /// \brief Sets the number of temperatures and initializes them in a uniform spacing
109  ///
110  /// Temperatures are spaced equally between 0 and 1.
111  /// @param temperatures number of temperatures
112  void setUniformTemperatureSpacing(std::size_t temperatures){
113  setNumberOfTemperatures(temperatures);
114  for(std::size_t i = 0; i != temperatures; ++i){
115  double factor = temperatures - 1.0;
116  setBeta(i,1.0 - i/factor);
117  }
118  }
119 
120 
121  /// \brief Returns the number Of temperatures.
122  std::size_t numberOfTemperatures()const{
123  return m_betas.size();
124  }
125 
126  void setBatchSize(std::size_t batchSize){
127  SHARK_RUNTIME_CHECK(batchSize == 1, "Markov chain can only compute batches of size 1.");
128  }
129  std::size_t batchSize(){
130  return 1;
131  }
132 
133  void setBeta(std::size_t i, double beta){
134  SIZE_CHECK(i < m_betas.size());
135  m_betas(i) = beta;
136  }
137 
138  double beta(std::size_t i)const{
139  SIZE_CHECK(i < m_betas.size());
140  return m_betas(i);
141  }
142 
143  RealVector const& beta()const{
144  return m_betas;
145  }
146 
147  ///\brief Returns the current state of the chain for beta = 1.
148  const_reference sample()const{
149  return const_reference(m_temperedChains,0);
150  }
151  ///\brief Returns the current state of the chain for all beta values.
152  SampleBatch const& samples()const{
153  return m_temperedChains;
154  }
155 
156  /// \brief Returns the current batch of samples of the Markov chain.
157  SampleBatch& samples(){
158  return m_temperedChains;
159  }
160 
161  ///\brief Initializes the markov chain using samples drawn uniformly from the set.
162  ///
163  /// Be aware that the number of chains and the temperatures need to bee specified previously.
164  /// @param dataSet the data set
165  void initializeChain(Data<RealVector> const& dataSet){
166  SHARK_RUNTIME_CHECK(m_temperedChains.size() != 0,"You did not initialize the number of temperatures bevor initializing the chain!");
167  std::size_t visibles = m_operator.rbm()->numberOfVN();
168  RealMatrix sampleData(m_temperedChains.size(),visibles);
169 
170  for(std::size_t i = 0; i != m_temperedChains.size(); ++i){
171  noalias(row(sampleData,i)) = dataSet.element(random::discrete(m_operator.rbm()->rng(),std::size_t(0),dataSet.numberOfElements()-1));
172  }
173  initializeChain(sampleData);
174  }
175 
176  /// \brief Initializes with data points from a batch of points
177  ///
178  /// Be aware that the number of chains and the temperatures need to bee specified previously.
179  /// @param sampleData the data set
180  void initializeChain(RealMatrix const& sampleData){
181  SHARK_RUNTIME_CHECK(m_temperedChains.size() != 0,"You did not initialize the number of temperatures bevor initializing the chain!");
182 
183  m_operator.createSample(m_temperedChains.hidden,m_temperedChains.visible,sampleData,m_betas);
184 
185  m_temperedChains.energy = m_operator.calculateEnergy(
186  m_temperedChains.hidden,
187  m_temperedChains.visible
188  );
189  }
190  //updates the chain using the current sample
191  void step(unsigned int k){
192  for(std::size_t i = 0; i != k; ++i){
193  //do one step of the tempered the Markov chains at the same time
194  m_operator.stepVH(m_temperedChains.hidden, m_temperedChains.visible,1,m_betas);
195 
196  //calculate energy for samples at all temperatures
197  m_temperedChains.energy = m_operator.calculateEnergy(
198  m_temperedChains.hidden,
199  m_temperedChains.visible
200  );
201 
202  //EVEN phase
203  std::size_t elems = m_temperedChains.size();
204  for(std::size_t i = 0; i < elems-1; i+=2){
205  metropolisSwap(
206  reference(m_temperedChains,i),m_betas(i),
207  reference(m_temperedChains,i+1),m_betas(i+1)
208  );
209  }
210  //ODD phase
211  for(std::size_t i = 1; i < elems-1; i+=2){
212  metropolisSwap(
213  reference(m_temperedChains,i),m_betas(i),
214  reference(m_temperedChains,i+1),m_betas(i+1)
215  );
216  }
217  m_operator.rbm()->hiddenNeurons().sufficientStatistics(
218  m_temperedChains.hidden.input,m_temperedChains.hidden.statistics, m_betas
219  );
220  }
221  }
222 };
223 
224 }
225 #endif