SingleChainApproximator.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author -
7  * \date -
8  *
9  *
10  * \par Copyright 1995-2017 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://shark-ml.org/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef SHARK_UNSUPERVISED_RBM_SINGLECHAINAPPROXIMATOR_H
31 #define SHARK_UNSUPERVISED_RBM_SINGLECHAINAPPROXIMATOR_H
32 
34 #include "Impl/DataEvaluator.h"
35 
36 namespace shark{
37 
38 ///\brief Approximates the gradient by taking samples from a single Markov chain.
39 ///
40 ///Taking samples only from a single chain leads to a high mixing rate but the correlation of the samples is higher than using
41 ///several chains. This approximator should be used with a sampling scheme which also achieves a faster decorrelation of samples like
42 ///tempering.
43 template<class MarkovChainType>
45 public:
46  typedef typename MarkovChainType::RBM RBM;
47 
49  : mpe_rbm(rbm),m_chain(rbm),m_k(1)
50  ,m_samples(0),m_batchSize(500)
51  ,m_numBatches(0),m_regularizer(0){
52  SHARK_ASSERT(rbm != NULL);
53 
57 
58  m_chain.setBatchSize(1);
59  };
60 
61  /// \brief From INameable: return the class name.
62  std::string name() const
63  { return "SingleChainApproximator"; }
64 
65  void setK(unsigned int k){
66  m_k = k;
67  }
68  void setNumberOfSamples(std::size_t samples){
69  m_samples = samples;
70  }
71 
72  /// \brief Returns the number of batches of the dataset that are used in every iteration.
73  ///
74  /// If it is less than all batches, the batches are chosen at random. if it is 0, all batches are used
75  std::size_t numBatches()const{
76  return m_numBatches;
77  }
78 
79  /// \brief Returns a reference to the number of batches of the dataset that are used in every iteration.
80  ///
81  /// If it is less than all batches, the batches are chosen at random.if it is 0, all batches are used.
82  std::size_t& numBatches(){
83  return m_numBatches;
84  }
85 
86  MarkovChainType& chain(){
87  return m_chain;
88  }
89  MarkovChainType const& chain() const{
90  return m_chain;
91  }
92 
94  m_data = data;
95  m_chain.initializeChain(m_data);
96  }
97 
99  return mpe_rbm->parameterVector();
100  }
101 
102  std::size_t numberOfVariables()const{
103  return mpe_rbm->numberOfParameters();
104  }
105 
106  void setRegularizer(double factor, SingleObjectiveFunction* regularizer){
107  m_regularizer = regularizer;
108  m_regularizationStrength = factor;
109  }
110 
111  double evalDerivative( SearchPointType const & parameter, FirstOrderDerivative & derivative ) const {
112  mpe_rbm->setParameterVector(parameter);
113 
114  typename RBM::GradientType modelAverage(mpe_rbm);
115  RealVector empiricalAverage = detail::evaluateData(m_data,*mpe_rbm,m_numBatches);
116 
117  //approximate the expectation of the energy gradient with respect to the model distribution
118  //using samples from the Markov chain
119 
120  //calculate number of samples to draw and size of batches used in the gradient update
121  std::size_t samplesToDraw = m_samples > 0 ? m_samples: m_data.numberOfElements();
122 
123  std::size_t batches = samplesToDraw / m_batchSize;
124  if(samplesToDraw - batches*m_batchSize != 0){
125  ++batches;
126  }
127 
128  //calculate the gradient. we do this by normal k-step sampling for exactly as many
129  //samples as calculated in samplesToDraw but saving the result in an intermediate
130  //batch variable gradientbatch. When this batch is full, we do an update step of the gradient.
131  //this is an a bit more efficient grouping and preserves us from using batches of size1 as the argument
132  //of addVH which might be inefficient.
133  for(std::size_t batch = 0; batch != batches; ++batch){
134  //calculate the size of the next batch which is batchSize as long as there are enough samples left to draw
135  std::size_t currentBatchSize = std::min(samplesToDraw-batch*m_batchSize, m_batchSize);
136  typename MarkovChainType::SampleBatch gradientBatch(currentBatchSize, mpe_rbm->numberOfVN(),mpe_rbm->numberOfHN());
137  //fill the batch with fresh samples
138  for(std::size_t i = 0; i != currentBatchSize; ++i){
139  m_chain.step(m_k);
140  getBatchElement(gradientBatch,i) = m_chain.sample();
141  }
142  //do the gradient update
143  modelAverage.addVH(gradientBatch.hidden, gradientBatch.visible);
144  }
145 
146  derivative.resize(mpe_rbm->numberOfParameters());
147  noalias(derivative) = modelAverage.result() - empiricalAverage;
148 
149  if(m_regularizer){
150  FirstOrderDerivative regularizerDerivative;
151  m_regularizer->evalDerivative(parameter,regularizerDerivative);
152  noalias(derivative) += m_regularizationStrength*regularizerDerivative;
153  }
154 
155  return std::numeric_limits<double>::quiet_NaN();
156  }
157 
158 private:
159  RBM* mpe_rbm;
160  mutable MarkovChainType m_chain;
162 
163  unsigned int m_k;
164  unsigned int m_samples;
165  std::size_t m_batchSize;
166  std::size_t m_numBatches;
167 
168  SingleObjectiveFunction* m_regularizer;
169  double m_regularizationStrength;
170 };
171 
172 }
173 
174 #endif