NegativeLogLikelihood.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief Negative Log Likelihood error function
5  *
6  *
7  *
8  * \author O.Krause
9  * \date 2014
10  *
11  *
12  * \par Copyright 1995-2017 Shark Development Team
13  *
14  * <BR><HR>
15  * This file is part of Shark.
16  * <http://shark-ml.org/>
17  *
18  * Shark is free software: you can redistribute it and/or modify
19  * it under the terms of the GNU Lesser General Public License as published
20  * by the Free Software Foundation, either version 3 of the License, or
21  * (at your option) any later version.
22  *
23  * Shark is distributed in the hope that it will be useful,
24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26  * GNU Lesser General Public License for more details.
27  *
28  * You should have received a copy of the GNU Lesser General Public License
29  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
30  *
31  */
32 #ifndef SHARK_OBJECTIVEFUNCTIONS_NEGATIVE_LOG_LIKELIHOOD_H
33 #define SHARK_OBJECTIVEFUNCTIONS_NEGATIVE_LOG_LIKELIHOOD_H
34 
37 #include <shark/Core/Random.h>
38 
39 #include <boost/range/algorithm_ext/iota.hpp>
40 #include <boost/range/algorithm/random_shuffle.hpp>
41 namespace shark{
42 
43 /// \brief Computes the negative log likelihood of a dataset under a model
44 ///
45 /// The negative log likelihood is defined as
46 /// \f[ L(\theta) = -\frac 1 N \sum_{i=1}^N log(p_{\theta}(x_i)) \f]
47 /// where \f$ \theta \f$ is the vector of parameters of the model \f$ p \f$ and \f$ x \f$ are the
48 /// datapoints of the training set. Minimizing this
49 /// maximizes the probability of the datast under p. This error measure is
50 /// closely related to the Kulback-Leibler-Divergence.
51 ///
52 /// For this error function, the model is only allowed to have a single output
53 /// - the probability of the sample. The distribution must be normalized as otherwise
54 /// the likeelihood does not mean anything.
56 {
57 public:
59 
61  DatasetType const& data,
63  ):mep_model(model),m_data(data){
64  if(mep_model->hasFirstParameterDerivative())
67  }
68 
69  /// \brief From INameable: return the class name.
70  std::string name() const
71  { return "NegativeLogLikelihood"; }
72 
74  return mep_model->parameterVector();
75  }
76 
77  std::size_t numberOfVariables()const{
78  return mep_model->numberOfParameters();
79  }
80 
81  ResultType eval(RealVector const& input) const{
82  SIZE_CHECK(input.size() == numberOfVariables());
84  mep_model->setParameterVector(input);
85 
86  double error = 0;
87  double minProb = 1e-100;//numerical stability is only guaranteed for lower bounded probabilities
88  SHARK_PARALLEL_FOR(int i = 0; i < (int)m_data.numberOfBatches(); ++i){
89  RealMatrix predictions = (*mep_model)(m_data.batch(i));
90  SIZE_CHECK(predictions.size2() == 1);
91  double logLikelihoodOfSamples = sum(log(max(predictions,minProb)));
93  error += logLikelihoodOfSamples;
94  }
95  }
96  error/=m_data.numberOfElements();//compute mean
97  return -error;//negative log likelihood
98  }
100  SearchPointType const& input,
101  FirstOrderDerivative & derivative
102  ) const{
103  SIZE_CHECK(input.size() == numberOfVariables());
105  mep_model->setParameterVector(input);
106  derivative.resize(input.size());
107  derivative.clear();
108 
109  //compute partitioning on threads
110  std::size_t numBatches = m_data.numberOfBatches();
111  std::size_t numElements = m_data.numberOfElements();
112  std::size_t numThreads = std::min(SHARK_NUM_THREADS,numBatches);
113  //calculate optimal partitioning
114  std::size_t batchesPerThread = numBatches/numThreads;
115  std::size_t leftOver = numBatches - batchesPerThread*numThreads;
116  double error = 0;
117  double minProb = 1e-100;//numerical stability is only guaranteed for lower bounded probabilities
118  SHARK_PARALLEL_FOR(int ti = 0; ti < (int)numThreads; ++ti){//MSVC does not support unsigned integrals in paralll loops
119  std::size_t t = ti;
120  //~ //get start and end index of batch-range
121  std::size_t start = t*batchesPerThread+std::min(t,leftOver);
122  std::size_t end = (t+1)*batchesPerThread+std::min(t+1,leftOver);
123 
124  //calculate error and derivative of the current thread
125  FirstOrderDerivative threadDerivative(input.size(),0.0);
126  double threadError = 0;
127  boost::shared_ptr<State> state = mep_model->createState();
128  RealVector batchDerivative;
129  RealMatrix predictions;
130  for(std::size_t i = start; i != end; ++i){
131  mep_model->eval(m_data.batch(i),predictions,*state);
132  SIZE_CHECK(predictions.size2() == 1);
133  threadError += sum(log(max(predictions,minProb)));
134  RealMatrix coeffs(predictions.size1(),predictions.size2(),0.0);
135  //the below handls numeric instabilities...
136  for(std::size_t j = 0; j != predictions.size1(); ++j){
137  for(std::size_t k = 0; k != predictions.size2(); ++k){
138  if(predictions(j,k) >= minProb){
139  coeffs(j,k) = 1.0/predictions(j,k);
140  }
141  }
142  }
143  mep_model->weightedParameterDerivative(
144  m_data.batch(i),predictions, coeffs,*state,batchDerivative
145  );
146  threadDerivative += batchDerivative;
147  }
148 
149  //sum over all threads
151  error += threadError;
152  noalias(derivative) += threadDerivative;
153  }
154  }
155 
156  error /= numElements;
157  derivative /= numElements;
158  derivative *= -1;
159  return -error;//negative log likelihood
160  }
161 
162 private:
165 };
166 
167 }
168 #endif