MultiChainApproximator.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author -
7  * \date -
8  *
9  *
10  * \par Copyright 1995-2017 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://shark-ml.org/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef SHARK_UNSUPERVISED_RBM_GRADIENTAPPROXIMATIONS_MULTICHAINAPPROXIMATOR_H
31 #define SHARK_UNSUPERVISED_RBM_GRADIENTAPPROXIMATIONS_MULTICHAINAPPROXIMATOR_H
32 
34 #include "Impl/DataEvaluator.h"
35 #include <vector>
36 
37 namespace shark{
38 ///\brief Approximates the gradient by taking samples from an ensemble of Markov chains running in parallel.
39 ///
40 ///The advantage is, that every chain can produce samples of a different mode of the distribution.
41 ///The disadvantage is however, that mixing is slower and a higher value of sampling steps between subsequent samples
42 ///need to be chosen.
43 template<class MarkovChainType>
45 public:
46  typedef typename MarkovChainType::RBM RBM;
47 
49  : mpe_rbm(rbm),m_chainOperator(rbm),m_k(1),m_samples(0),m_numBatches(0),m_regularizer(0){
50  SHARK_ASSERT(rbm != NULL);
51  setBatchSize(500);
52 
56  }
57 
58  /// \brief From INameable: return the class name.
59  std::string name() const
60  { return "MultiChainApproximator"; }
61 
62  void setK(unsigned int k){
63  m_k = k;
64  }
65  void setNumberOfSamples(std::size_t samples){
66  m_samples = samples;
67  }
68  void setBatchSize(std::size_t batchSize){
69  m_batchSize = batchSize;
70  if(!MarkovChainType::computesBatch)
71  m_batchSize=1;
72  }
73 
74  MarkovChainType& chain(){
75  return m_chainOperator;
76  }
77  MarkovChainType const& chain() const{
78  return m_chainOperator;
79  }
80 
81  /// \brief Returns the number of batches of the dataset that are used in every iteration.
82  ///
83  /// If it is less than all batches, the batches are chosen at random. if it is 0, all batches are used
84  std::size_t numBatches()const{
85  return m_numBatches;
86  }
87 
88  /// \brief Returns a reference to the number of batches of the dataset that are used in every iteration.
89  ///
90  /// If it is less than all batches, the batches are chosen at random.if it is 0, all batches are used.
91  std::size_t& numBatches(){
92  return m_numBatches;
93  }
94 
96  m_data = data;
97 
98  //construct a gradient object to get the information about which values of the samples are needed
99  typename RBM::GradientType grad(mpe_rbm);
100 
101  //if the number of samples is 0 = unset, set it to the number of points in the data set
102  if(!m_samples){
104  }
105 
106  //calculate the number of batches
107  std::size_t batches = m_samples / m_batchSize;
108  if(m_samples - batches*m_batchSize != 0){
109  ++batches;
110  }
111  m_chains.resize(batches);
112 
113  //swap every sample batch from the vector into the operator, initialize it and shift it back out.
114  for(std::size_t i = 0; i != batches;++i){
115  swap(m_chains[i],m_chainOperator.samples());
116  std::size_t currentBatchSize = std::min(m_samples-i*m_batchSize, m_batchSize);
117  m_chainOperator.setBatchSize(currentBatchSize);
118  m_chainOperator.initializeChain(m_data);
119  swap(m_chains[i],m_chainOperator.samples());
120  }
121  }
122 
124  return mpe_rbm->parameterVector();
125  }
126 
127  std::size_t numberOfVariables()const{
128  return mpe_rbm->numberOfParameters();
129  }
130 
131  void setRegularizer(double factor, SingleObjectiveFunction* regularizer){
132  m_regularizer = regularizer;
133  m_regularizationStrength = factor;
134  }
135 
136  double evalDerivative( SearchPointType const & parameter, FirstOrderDerivative & derivative ) const {
137  mpe_rbm->setParameterVector(parameter);
138 
139  typename RBM::GradientType modelAverage(mpe_rbm);
140  RealVector empiricalAverage = detail::evaluateData(m_data,*mpe_rbm,m_numBatches);
141 
142  //approximate the expectation of the energy gradient with respect to the model distribution
143  //using samples from the Markov chain
144  for(std::size_t i = 0; i != m_chains.size();++i){
145  swap(m_chains[i],m_chainOperator.samples());//set the current GibbsChain
146  m_chainOperator.step(m_k);//do the next step along the gibbs chain
147  modelAverage.addVH(m_chainOperator.samples().hidden, m_chainOperator.samples().visible);//update gradient
148  swap(m_chains[i],m_chainOperator.samples());//save the GibbsChain.
149  }
150 
151  derivative.resize(mpe_rbm->numberOfParameters());
152  noalias(derivative) = modelAverage.result() - empiricalAverage;
153 
154  if(m_regularizer){
155  FirstOrderDerivative regularizerDerivative;
156  m_regularizer->evalDerivative(parameter,regularizerDerivative);
157  noalias(derivative) += m_regularizationStrength*regularizerDerivative;
158  }
159 
160  return std::numeric_limits<double>::quiet_NaN();
161  }
162 private:
163  RBM* mpe_rbm;
164  mutable MarkovChainType m_chainOperator;
165  mutable std::vector<typename MarkovChainType::SampleBatch> m_chains;
167 
168  unsigned int m_k;
169  std::size_t m_samples;
170  std::size_t m_batchSize;
171  std::size_t m_numBatches;
172 
173  SingleObjectiveFunction* m_regularizer;
174  double m_regularizationStrength;
175 };
176 }
177 
178 #endif
179