30 #ifndef SHARK_UNSUPERVISED_RBM_GRADIENTAPPROXIMATIONS_EXACTGRADIENT_H 31 #define SHARK_UNSUPERVISED_RBM_GRADIENTAPPROXIMATIONS_EXACTGRADIENT_H 39 template<
class RBMType>
55 {
return "ExactGradient"; }
62 return mpe_rbm->parameterVector();
66 return mpe_rbm->numberOfParameters();
70 m_regularizer = regularizer;
71 m_regularizationStrength = factor;
75 mpe_rbm->setParameterVector(parameter);
79 negLogLikelihood += m_regularizationStrength * m_regularizer->
eval(parameter);
81 return negLogLikelihood;
85 mpe_rbm->setParameterVector(parameter);
91 Gibbs gibbsSampler(mpe_rbm);
94 double negLogLikelihood = 0;
95 for(RealMatrix
const& batch: m_data.
batches()) {
96 std::size_t currentBatchSize = batch.size1();
100 gibbsSampler.
createSample(hiddenSamples,visibleSamples,batch);
101 empiricalExpectation.addVH(hiddenSamples, visibleSamples);
102 negLogLikelihood -= sum(mpe_rbm->energy().logUnnormalizedProbabilityVisible(
103 batch,hiddenSamples.input,blas::repeat(1,currentBatchSize)
108 if(mpe_rbm->numberOfVN() < mpe_rbm->numberOfHN()){
109 integrateOverVisible(modelExpectation);
112 integrateOverHidden(modelExpectation);
115 derivative.resize(mpe_rbm->numberOfParameters());
116 noalias(derivative) = modelExpectation.result() - empiricalExpectation.result();
118 m_logPartition = modelExpectation.logWeightSum();
120 negLogLikelihood += m_logPartition;
124 negLogLikelihood += m_regularizationStrength * m_regularizer->
evalDerivative(parameter,regularizerDerivative);
125 noalias(derivative) += m_regularizationStrength * regularizerDerivative;
128 return negLogLikelihood;
132 return m_logPartition;
139 double m_regularizationStrength;
142 template<
class GradientApproximator>
143 void integrateOverVisible(GradientApproximator & modelExpectation)
const{
145 Gibbs sampler(mpe_rbm);
147 typedef typename RBM::VisibleType::StateSpace VisibleStateSpace;
148 std::size_t values = VisibleStateSpace::numberOfStates(mpe_rbm->numberOfVN());
149 std::size_t
batchSize = std::min(values, std::size_t(256));
151 for (std::size_t x = 0; x < values; x+=
batchSize) {
153 std::size_t currentBatchSize=std::min(batchSize,values-x);
155 for(std::size_t elem = 0; elem != currentBatchSize;++elem){
157 VisibleStateSpace::state(row(stateBatch,elem),x+elem);
163 sampler.
createSample(hiddenBatch,visibleBatch,stateBatch);
166 RealVector logP = mpe_rbm->energy().logUnnormalizedProbabilityVisible(
167 stateBatch,hiddenBatch.input,blas::repeat(1,currentBatchSize)
169 modelExpectation.addVH(hiddenBatch, visibleBatch, logP);
174 template<
class GradientApproximator>
175 void integrateOverHidden(GradientApproximator & modelExpectation)
const{
177 Gibbs sampler(mpe_rbm);
179 typedef typename RBM::HiddenType::StateSpace HiddenStateSpace;
180 std::size_t values = HiddenStateSpace::numberOfStates(mpe_rbm->numberOfHN());
181 std::size_t
batchSize = std::min(values, std::size_t(256) );
183 for (std::size_t x = 0; x < values; x+=
batchSize) {
185 std::size_t currentBatchSize=std::min(batchSize,values-x);
187 for(std::size_t elem = 0; elem != currentBatchSize;++elem){
189 HiddenStateSpace::state(row(stateBatch,elem),x+elem);
195 hiddenBatch.state=stateBatch;
196 sampler.
precomputeVisible(hiddenBatch,visibleBatch, blas::repeat(1,currentBatchSize));
199 RealVector logP = mpe_rbm->energy().logUnnormalizedProbabilityHidden(
200 stateBatch,visibleBatch.input,blas::repeat(1,currentBatchSize)
202 modelExpectation.addHV(hiddenBatch, visibleBatch, logP);
208 mutable double m_logPartition;