HingeLoss.h
Go to the documentation of this file.
1 /*!
2  *
3  * \brief Implements the Hinge Loss function for maximum margin classification.
4  *
5  *
6  * \author Oswin Krause
7  * \date 2014
8  *
9  *
10  * \par Copyright 1995-2017 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://shark-ml.org/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 
31 #ifndef SHARK_OBJECTIVEFUNCTIONS_LOSS_HINGELOSS_H
32 #define SHARK_OBJECTIVEFUNCTIONS_LOSS_HINGELOSS_H
33 
35 
36 namespace shark {
37 
38 ///
39 /// \brief Hinge-loss for large margin classification
40 ///
41 /// The hinge loss for two class problems is defined as \f$ L_i = \max \{ 0 , 1- y_i f(x_i) \} \f$ where \f$ y_i \in \{-1,1} \f$ is the label
42 /// and \f$ f(x_i) \f$ is the prediction of the model for the ith input. The loss introduces the concept of
43 /// a margin, that is, the point should not only be correctly classified but also not too close to the
44 /// decision boundary. Therefore even correctly classified points are getting punished.
45 ///
46 /// for multi class problems the concept of sums of the relative margin is used:
47 /// \f$ L_i = \sum_{c \neq y_i} \max \{ 0 , 1- 1/2 (f_{y_i}(x_i)- f_c(x_i) \} \f$. This loss requires that there is a margin
48 /// between the different class outputs and the functions needs as many outputs as classes. the pre-factor
49 /// 1/2 ensures that in the 2 class 2 output case with a linear function the value of loss is the same as in the single
50 /// output version.
51 ///
52 /// The loss is implemented for class labels 0,1,...,n, even in the binary cases.
53 ///
54 /// The hinge-loss is differentiable except on one point.
55 /// For points violating the margin, the derivative is -1,
56 /// for points that are not violating it, it is 0. Boundary counts as non-violating.
57 class HingeLoss : public AbstractLoss<unsigned int, RealVector>
58 {
59 public:
60  /// constructor
62  m_features |= base_type::HAS_FIRST_DERIVATIVE;
63  }
64 
65  /// \brief Returns class name "HingeLoss"
66  std::string name() const
67  { return "HingeLoss"; }
68 
69 
70  ///\brief calculates the sum of all
71  double eval(BatchLabelType const& labels, BatchOutputType const& predictions) const{
72  std::size_t numInputs = labels.size();
73  SIZE_CHECK(numInputs == predictions.size1());
74 
75  double error = 0;
76  //binary case for models with single output
77  if(predictions.size2() == 1){
78  for(std::size_t i = 0; i != numInputs;++i){
79  SIZE_CHECK(labels(i) < 2);
80  double y = 2.0*labels(i)-1.0;
81  error += std::max(0.0,1.0-y*predictions(i,0));
82  }
83  }
84  else
85  {//multi-class or multiple output case
86  for(std::size_t i = 0; i != numInputs;++i){
87  SIZE_CHECK(labels(i) < predictions.size2());
88  for(std::size_t o = 0; o != predictions.size2(); ++o){
89  if(o == labels(i)) continue;
90  error += std::max(0.0,2.0 - predictions(i,labels(i))+predictions(i,o));
91  }
92  }
93  error/=2;
94  }
95 
96  return error;
97  }
98 
99  double evalDerivative(BatchLabelType const& labels, BatchOutputType const& predictions, BatchOutputType& gradient)const{
100  std::size_t numInputs = labels.size();
101  std::size_t outputDim = predictions.size2();
102  SIZE_CHECK(numInputs == predictions.size1());
103 
104  gradient.resize(numInputs,outputDim);
105  gradient.clear();
106  double error = 0;
107  //binary case for models with single output
108  if(outputDim == 1){
109  for(std::size_t i = 0; i != numInputs; ++i){
110  double y = 2.0*labels(i)-1.0;
111  double sampleLoss = std::max(0.0,1.0-y*predictions(i,0));
112  if(sampleLoss > 0)
113  gradient(i,0) = -y;
114  error += sampleLoss;
115  }
116  }
117  else
118  {//multi-class or multiple output case
119  for(std::size_t i = 0; i != numInputs;++i){
120  SIZE_CHECK(labels(i) < predictions.size2());
121  for(std::size_t o = 0; o != predictions.size2();++o){
122  if( o == labels(i)) continue;
123  double sampleLoss = std::max(0.0, 2.0 - predictions(i,labels(i)) + predictions(i,o));
124  if(sampleLoss > 0){
125  gradient(i,o) = 0.5;
126  gradient(i,labels(i)) -= 0.5;
127  }
128  error+=sampleLoss;
129  }
130  }
131  error/=2;
132  }
133 
134  return error;
135  }
136 
137 };
138 
139 }
140 #endif