HuberLoss.h
Go to the documentation of this file.
1 /*!
2  *
3  * \brief Implements the Huber loss function for robust regression
4  *
5  *
6  * \author Oswin Krause
7  * \date 2014
8  *
9  *
10  * \par Copyright 1995-2017 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://shark-ml.org/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 
31 #ifndef SHARK_OBJECTIVEFUNCTIONS_LOSS_HUBERLOSS_H
32 #define SHARK_OBJECTIVEFUNCTIONS_LOSS_HUBERLOSS_H
33 
35 
36 namespace shark {
37 
38 /// \brief Huber-loss for for robust regression
39 ///
40 /// The Huber loss is a function that is quadratic if\f$ ||f(x)-y||_2 \leq \delta \f$.
41 /// Outside this region, whn the error is larger, it is defined as a linear continuation. The function is once
42 /// but not twice differentiable. This loss is important for regression as it weights outliers lower than
43 /// ordinary least squares regression while still preserving a convex shape of the loss function.
44 ///
45 /// Please not that, due to its nature, the error function is not scale invariant. thus rescaling the dataset
46 /// changes the behaviour. This function has the hyper parameter delta which marks thee region where
47 /// the function changes from quadratic to linear.
48 class HuberLoss : public AbstractLoss<RealVector, RealVector>
49 {
50 public:
51  /// constructor
52  HuberLoss(double delta = 1.0):m_delta(delta){
53  m_features |= base_type::HAS_FIRST_DERIVATIVE;
54  }
55 
56  /// \brief Returns class name "HuberLoss"
57  std::string name() const
58  { return "HuberLoss"; }
59 
60 
61  ///\brief calculates the sum of all
62  double eval(BatchLabelType const& labels, BatchOutputType const& predictions) const{
63  SIZE_CHECK(labels.size1() == predictions.size1());
64  SIZE_CHECK(labels.size2() == predictions.size2());
65  std::size_t numInputs = labels.size1();
66 
67  double error = 0;
68  for(std::size_t i = 0; i != numInputs;++i){
69  double norm2 = norm_sqr(row(predictions,i)-row(labels,i));
70 
71  //check whether we are in the quadratic area
72  if(norm2 <= sqr(m_delta)){
73  error += 0.5*norm2;
74  }
75  else{
76  error += m_delta*std::sqrt(norm2)-0.5*sqr(m_delta);
77  }
78  }
79  return error;
80  }
81 
82  double evalDerivative(BatchLabelType const& labels, BatchOutputType const& predictions, BatchOutputType& gradient)const{
83  SIZE_CHECK(labels.size1() == predictions.size1());
84  SIZE_CHECK(labels.size2() == predictions.size2());
85  std::size_t numInputs = predictions.size1();
86  std::size_t outputDim = predictions.size2();
87 
88  gradient.resize(numInputs,outputDim);
89  double error = 0;
90  for(std::size_t i = 0; i != numInputs;++i){
91  double norm2 = norm_sqr(row(predictions,i)-row(labels,i));
92 
93  //check whether we are in the quadratic area
94  if(norm2 <= sqr(m_delta)){
95  error += 0.5*norm2;
96  noalias(row(gradient,i)) = row(predictions,i)-row(labels,i);
97  }
98  else{
99  double norm = std::sqrt(norm2);
100  error += m_delta*norm-0.5*sqr(m_delta);
101  noalias(row(gradient,i)) = m_delta/norm*(row(predictions,i)-row(labels,i));
102  }
103  }
104  return error;
105  }
106 
107 private:
108  double m_delta;
109 };
110 
111 }
112 #endif