ExactGradient.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author -
7  * \date -
8  *
9  *
10  * \par Copyright 1995-2017 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://shark-ml.org/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef SHARK_UNSUPERVISED_RBM_GRADIENTAPPROXIMATIONS_EXACTGRADIENT_H
31 #define SHARK_UNSUPERVISED_RBM_GRADIENTAPPROXIMATIONS_EXACTGRADIENT_H
32 
36 
37 namespace shark{
38 
39 template<class RBMType>
41 private:
43 public:
44  typedef RBMType RBM;
45 
46  ExactGradient(RBM* rbm): mpe_rbm(rbm),m_regularizer(0){
47  SHARK_ASSERT(rbm != NULL);
48 
51  };
52 
53  /// \brief From INameable: return the class name.
54  std::string name() const
55  { return "ExactGradient"; }
56 
58  m_data = data;
59  }
60 
62  return mpe_rbm->parameterVector();
63  }
64 
65  std::size_t numberOfVariables()const{
66  return mpe_rbm->numberOfParameters();
67  }
68 
69  void setRegularizer(double factor, SingleObjectiveFunction* regularizer){
70  m_regularizer = regularizer;
71  m_regularizationStrength = factor;
72  }
73 
74  double eval( SearchPointType const & parameter) const {
75  mpe_rbm->setParameterVector(parameter);
76 
77  double negLogLikelihood = negativeLogLikelihood(*mpe_rbm,m_data)/m_data.numberOfElements();
78  if(m_regularizer){
79  negLogLikelihood += m_regularizationStrength * m_regularizer->eval(parameter);
80  }
81  return negLogLikelihood;
82  }
83 
84  double evalDerivative( SearchPointType const & parameter, FirstOrderDerivative & derivative ) const {
85  mpe_rbm->setParameterVector(parameter);
86 
87  //the gradient approximation for the energy terms of the RBM
88  typename RBM::GradientType empiricalExpectation(mpe_rbm);
89  typename RBM::GradientType modelExpectation(mpe_rbm);
90 
91  Gibbs gibbsSampler(mpe_rbm);
92 
93  //calculate the expectation of the energy gradient with respect to the data
94  double negLogLikelihood = 0;
95  for(RealMatrix const& batch: m_data.batches()) {
96  std::size_t currentBatchSize = batch.size1();
97  typename Gibbs::HiddenSampleBatch hiddenSamples(currentBatchSize,mpe_rbm->numberOfHN());
98  typename Gibbs::VisibleSampleBatch visibleSamples(currentBatchSize,mpe_rbm->numberOfVN());
99 
100  gibbsSampler.createSample(hiddenSamples,visibleSamples,batch);
101  empiricalExpectation.addVH(hiddenSamples, visibleSamples);
102  negLogLikelihood -= sum(mpe_rbm->energy().logUnnormalizedProbabilityVisible(
103  batch,hiddenSamples.input,blas::repeat(1,currentBatchSize)
104  ));
105  }
106 
107  //calculate the expectation of the energy gradient with respect to the model distribution
108  if(mpe_rbm->numberOfVN() < mpe_rbm->numberOfHN()){
109  integrateOverVisible(modelExpectation);
110  }
111  else{
112  integrateOverHidden(modelExpectation);
113  }
114 
115  derivative.resize(mpe_rbm->numberOfParameters());
116  noalias(derivative) = modelExpectation.result() - empiricalExpectation.result();
117 
118  m_logPartition = modelExpectation.logWeightSum();
119  negLogLikelihood/=m_data.numberOfElements();
120  negLogLikelihood += m_logPartition;
121 
122  if(m_regularizer){
123  FirstOrderDerivative regularizerDerivative;
124  negLogLikelihood += m_regularizationStrength * m_regularizer->evalDerivative(parameter,regularizerDerivative);
125  noalias(derivative) += m_regularizationStrength * regularizerDerivative;
126  }
127 
128  return negLogLikelihood;
129  }
130 
131  long double getLogPartition(){
132  return m_logPartition;
133  }
134 
135 private:
136  RBM* mpe_rbm;
137 
138  SingleObjectiveFunction* m_regularizer;
139  double m_regularizationStrength;
140 
141  //batchwise loops over all hidden units to calculate the gradient as well as partition
142  template<class GradientApproximator>//mostly dummy right now
143  void integrateOverVisible(GradientApproximator & modelExpectation) const{
144 
145  Gibbs sampler(mpe_rbm);
146 
147  typedef typename RBM::VisibleType::StateSpace VisibleStateSpace;
148  std::size_t values = VisibleStateSpace::numberOfStates(mpe_rbm->numberOfVN());
149  std::size_t batchSize = std::min(values, std::size_t(256));
150 
151  for (std::size_t x = 0; x < values; x+=batchSize) {
152  //create batch of states
153  std::size_t currentBatchSize=std::min(batchSize,values-x);
154  typename Batch<RealVector>::type stateBatch(currentBatchSize,mpe_rbm->numberOfVN());
155  for(std::size_t elem = 0; elem != currentBatchSize;++elem){
156  //generation of the x+elem-th state vector
157  VisibleStateSpace::state(row(stateBatch,elem),x+elem);
158  }
159 
160  //create sample from state batch
161  typename Gibbs::HiddenSampleBatch hiddenBatch(currentBatchSize,mpe_rbm->numberOfHN());
162  typename Gibbs::VisibleSampleBatch visibleBatch(currentBatchSize,mpe_rbm->numberOfVN());
163  sampler.createSample(hiddenBatch,visibleBatch,stateBatch);
164 
165  //calculate probabilities and update
166  RealVector logP = mpe_rbm->energy().logUnnormalizedProbabilityVisible(
167  stateBatch,hiddenBatch.input,blas::repeat(1,currentBatchSize)
168  );
169  modelExpectation.addVH(hiddenBatch, visibleBatch, logP);
170  }
171  }
172 
173  //batchwise loops over all hidden units to calculate the gradient as well as partition
174  template<class GradientApproximator>//mostly dummy right now
175  void integrateOverHidden(GradientApproximator & modelExpectation) const{
176 
177  Gibbs sampler(mpe_rbm);
178 
179  typedef typename RBM::HiddenType::StateSpace HiddenStateSpace;
180  std::size_t values = HiddenStateSpace::numberOfStates(mpe_rbm->numberOfHN());
181  std::size_t batchSize = std::min(values, std::size_t(256) );
182 
183  for (std::size_t x = 0; x < values; x+=batchSize) {
184  //create batch of states
185  std::size_t currentBatchSize=std::min(batchSize,values-x);
186  typename Batch<RealVector>::type stateBatch(currentBatchSize,mpe_rbm->numberOfHN());
187  for(std::size_t elem = 0; elem != currentBatchSize;++elem){
188  //generation of the x+elem-th state vector
189  HiddenStateSpace::state(row(stateBatch,elem),x+elem);
190  }
191 
192  //create sample from state batch
193  typename Gibbs::HiddenSampleBatch hiddenBatch(currentBatchSize,mpe_rbm->numberOfHN());
194  typename Gibbs::VisibleSampleBatch visibleBatch(currentBatchSize,mpe_rbm->numberOfVN());
195  hiddenBatch.state=stateBatch;
196  sampler.precomputeVisible(hiddenBatch,visibleBatch, blas::repeat(1,currentBatchSize));
197 
198  //calculate probabilities and update
199  RealVector logP = mpe_rbm->energy().logUnnormalizedProbabilityHidden(
200  stateBatch,visibleBatch.input,blas::repeat(1,currentBatchSize)
201  );
202  modelExpectation.addHV(hiddenBatch, visibleBatch, logP);
203  }
204  }
205 
207 
208  mutable double m_logPartition; //the partition function of the model distribution
209 };
210 
211 }
212 
213 #endif