SquaredLoss.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief Implements the Squared Error Loss function for regression.
5  *
6  *
7  *
8  *
9  * \author Oswin Krause, Christian Igel
10  * \date 2011
11  *
12  *
13  * \par Copyright 1995-2017 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://shark-ml.org/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 #ifndef SHARK_OBJECTIVEFUNCTIONS_LOSS_SQUAREDLOSS_H
34 #define SHARK_OBJECTIVEFUNCTIONS_LOSS_SQUAREDLOSS_H
35 
36 
38 
39 namespace shark{
40 /// \brief squared loss for regression and classification
41 ///
42 /// The SquaredLoss computes the squared distance
43 /// between target and prediction. It is defined for both
44 /// vectorial as well as integral labels. In the case of integral labels,
45 /// the label c is interpreted as unit-vector having the c-th component activated.
46 ///
47 template<class OutputType = RealVector, class LabelType = OutputType >
48 class SquaredLoss : public AbstractLoss<LabelType,OutputType>
49 {
50 public:
54 
55  /// Constructor.
57  {
59  }
60 
61 
62  /// \brief From INameable: return the class name.
63  std::string name() const
64  { return "SquaredLoss"; }
65 
66  using base_type::eval;
67 
68  /// Evaluate the squared loss \f$ (label - prediction)^2 \f$.
69  double eval(BatchLabelType const& labels, BatchOutputType const& predictions) const {
70  SIZE_CHECK(labels.size1()==predictions.size1());
71  SIZE_CHECK(labels.size2()==predictions.size2());
72 
73  double error = 0;
74  for(std::size_t i = 0; i != labels.size1(); ++i){
75  error+=distanceSqr(row(predictions,i),row(labels,i));
76  }
77  return 0.5 * error;
78  }
79 
80  /// Evaluate the squared loss \f$ (label - prediction)^2 \f$
81  /// and its deriative \f$ \frac{\partial}{\partial prediction} 1/2 (label - prediction)^2 = prediction - label \f$.
82  double evalDerivative(BatchLabelType const& label, BatchOutputType const& prediction, BatchOutputType& gradient) const {
83  gradient.resize(prediction.size1(),prediction.size2());
84  noalias(gradient) = (prediction - label);
85  return SquaredLoss::eval(label,prediction);
86  }
87 };
88 
89 //specialisation for classification case.
90 template<class OutputType>
91 class SquaredLoss<OutputType,unsigned int> : public AbstractLoss<unsigned int,OutputType>
92 {
93 public:
97 
98  /// Constructor.
100  {
102  }
103 
104 
105  /// \brief From INameable: return the class name.
106  std::string name() const
107  { return "SquaredLoss"; }
108 
109  using base_type::eval;
110 
111  /// Evaluate the squared loss \f$ (label - prediction)^2 \f$.
112  double eval(BatchLabelType const& labels, BatchOutputType const& predictions) const {
113  SIZE_CHECK(labels.size()==predictions.size1());
114 
115  double error = 0;
116  for(std::size_t i = 0; i != labels.size(); ++i){
117  unsigned int c = labels(i);
118  SIZE_CHECK(c < predictions.size2());
119  error+=norm_sqr(row(predictions,i))+1.0-2.0*predictions(i,c);
120  }
121  return 0.5 * error;
122  }
123 
124  /// Evaluate the squared loss \f$ (label - prediction)^2 \f$
125  /// and its deriative \f$ \frac{\partial}{\partial prediction} 1/2 (label - prediction)^2 = prediction - label \f$.
126  double evalDerivative(BatchLabelType const& labels, BatchOutputType const& predictions, BatchOutputType& gradient) const {
127  gradient.resize(predictions.size1(),predictions.size2());
128  noalias(gradient) = predictions;
129  for(std::size_t i = 0; i != labels.size(); ++i){
130  unsigned int c = labels(i);
131  SIZE_CHECK(c < predictions.size2());
132  gradient(i,c)-=1.0;
133  }
134  return SquaredLoss::eval(labels,predictions);
135  }
136 };
137 
138 //spcialisation for sequence data
139 template<>
140 class SquaredLoss<Sequence,Sequence> : public AbstractLoss<Sequence,Sequence>
141 {
142 public:
143  /// \brief Constructor.
144  ///
145  /// \param ignore Specifies how many elements of the sequence are to be ignored during evaluation
146  /// must be strictly smaller than the smalles sequnce to evaluate.
147  SquaredLoss(std::size_t ignore=0)
148  :m_ignore(ignore){
150  }
151 
152 
153  /// \brief From INameable: return the class name.
154  std::string name() const
155  { return "SquaredLoss"; }
156 
157  using base_type::eval;
158 
159  /// \brief Evaluate the squared loss \f$ (label - prediction)^2 \f$.
160  ///
161  /// For Sequences this is:
162  /// \f[ sum_{i=i_0} (label_i-prediction_i)^2\f]
163  /// where \f$ i_0 \f$ is the first element to be evaluated. By default it is 0
164  double eval(BatchLabelType const& labels, BatchOutputType const& predictions) const {
165  SIZE_CHECK(labels.size()==predictions.size());
166 
167  double error = 0;
168  for(std::size_t i = 0; i != labels.size(); ++i){
169  SIZE_CHECK(labels[i].size()==predictions[i].size());
170  SHARK_RUNTIME_CHECK(labels[i].size() > m_ignore,"Number of sequence elements to ignore is too large");
171 
172  for(std::size_t j = m_ignore; j != labels[i].size(); ++j){
173  error += distanceSqr(predictions[i][j],labels[i][j]);
174  }
175  }
176  return 0.5 * error;
177  }
178 
179  /// Evaluate the squared loss \f$ (label - prediction)^2 \f$
180  /// and its deriative \f$ \frac{\partial}{\partial prediction} 1/2 (label - prediction)^2 = prediction - label \f$.
181  double evalDerivative(BatchLabelType const& labels, BatchOutputType const& predictions, BatchOutputType& gradient) const {
182  SIZE_CHECK(labels.size()==predictions.size());
183  gradient.resize(labels.size());
184 
185  double error = 0;
186  for(std::size_t i = 0; i != labels.size(); ++i){
187  SIZE_CHECK(labels[i].size()==predictions[i].size());
188  SHARK_RUNTIME_CHECK(labels[i].size() > m_ignore,"Number of sequence elements to ignore is too large");
189  for(std::size_t j = 0; j != m_ignore; ++j){
190  gradient[i].push_back(RealVector(predictions[i][j].size(),0.0));
191  }
192  for(std::size_t j = m_ignore; j != labels[i].size(); ++j){
193  error += 0.5 * distanceSqr(predictions[i][j],labels[i][j]);
194  gradient[i].push_back(predictions[i][j] - labels[i][j]);
195 
196  }
197  }
198  return error;
199  }
200 private:
201  std::size_t m_ignore;
202 };
203 
204 }
205 #endif