EpsilonSvmTrainer.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Trainer for the Epsilon-Support Vector Machine for Regression
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date 2007-2012
12  *
13  *
14  * \par Copyright 1995-2017 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://shark-ml.org/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_EPSILONSVMTRAINER_H
38 #define SHARK_ALGORITHMS_EPSILONSVMTRAINER_H
39 
40 
47 
48 namespace shark {
49 
50 
51 ///
52 /// \brief Training of Epsilon-SVMs for regression.
53 ///
54 /// The Epsilon-SVM is a support vector machine variant
55 /// for regression problems. Given are data tuples
56 /// \f$ (x_i, y_i) \f$ with x-component denoting input and
57 /// y-component denoting a real-valued label (see the tutorial on
58 /// label conventions; the implementation uses RealVector),
59 /// a kernel function k(x, x'), a regularization constant C > 0,
60 /// and a loss insensitivity parameter \f$ \varepsilon \f$.
61 /// Let H denote the kernel induced reproducing kernel Hilbert
62 /// space of k, and let \f$ \phi \f$ denote the corresponding
63 /// feature map. Then the SVM regression function is of the form
64 /// \f[
65 /// (x) = \langle w, \phi(x) \rangle + b
66 /// \f]
67 /// with coefficients w and b given by the (primal)
68 /// optimization problem
69 /// \f[
70 /// \min \frac{1}{2} \|w\|^2 + C \sum_i L(y_i, f(x_i)),
71 /// \f]
72 /// where
73 /// \f[
74 /// L(y, f(x)) = \max\{0, |y - f(x)| - \varepsilon \}
75 /// \f]
76 /// is the \f$ \varepsilon \f$ insensitive absolute loss.
77 ///
78 template <class InputType, class CacheType = float>
79 class EpsilonSvmTrainer : public AbstractSvmTrainer<InputType, RealVector, KernelExpansion<InputType> >
80 {
81 public:
82 
83  typedef CacheType QpFloatType;
84 
89 
93 
94  /// Constructor
95  /// \param kernel kernel function to use for training and prediction
96  /// \param C regularization parameter - always the 'true' value of C, even when unconstrained is set
97  /// \param epsilon Loss insensitivity parameter.
98  //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
99  EpsilonSvmTrainer(KernelType* kernel, double C, double epsilon, bool unconstrained = false)
100  : base_type(kernel, C, true, unconstrained)
101  , m_epsilon(epsilon)
102  { }
103 
104  /// \brief From INameable: return the class name.
105  std::string name() const
106  { return "EpsilonSvmTrainer"; }
107 
108  double epsilon() const
109  { return m_epsilon; }
110  void setEpsilon(double epsilon)
111  { m_epsilon = epsilon; }
112 
113  /// get the hyper-parameter vector
114  RealVector parameterVector() const{
115  double pEps = base_type::m_unconstrained ? std::log(m_epsilon) : m_epsilon;
116  return base_type::parameterVector() | pEps;
117  }
118 
119  /// set the vector of hyper-parameters
120  void setParameterVector(RealVector const& newParameters){
121  size_t sp = base_type::numberOfParameters();
122  SHARK_ASSERT(newParameters.size() == sp + 1);
123  base_type::setParameterVector(subrange(newParameters, 0, sp));
124  setEpsilon(base_type::m_unconstrained ? std::exp(newParameters(sp)) : newParameters(sp));
125  }
126 
127  /// return the number of hyper-parameters
128  size_t numberOfParameters() const
129  { return (base_type::numberOfParameters() + 1); }
130 
132  svm.setStructure(base_type::m_kernel,dataset.inputs(),true,1);
133 
134  SHARK_RUNTIME_CHECK(labelDimension(dataset) == 1, "Can only train 1D labels");
135 
137  trainSVM<PrecomputedBlockMatrixType>(svm,dataset);
138  else
139  trainSVM<CachedBlockMatrixType>(svm,dataset);
140 
141  if (base_type::sparsify()) svm.sparsify();
142  }
143 
144 private:
145  template<class MatrixType>
146  void trainSVM(KernelExpansion<InputType>& svm, LabeledData<InputType, RealVector> const& dataset){
147  typedef GeneralQuadraticProblem<MatrixType> SVMProblemType;
148  typedef SvmShrinkingProblem<SVMProblemType> ProblemType;
149 
150  //Set up the problem
151  KernelMatrixType km(*base_type::m_kernel, dataset.inputs());
152  std::size_t ic = km.size();
153  BlockMatrixType blockkm(&km);
154  MatrixType matrix(&blockkm);
155  SVMProblemType svmProblem(matrix);
156  for(std::size_t i = 0; i != ic; ++i){
157  svmProblem.linear(i) = dataset.element(i).label(0) - m_epsilon;
158  svmProblem.linear(i+ic) = dataset.element(i).label(0) + m_epsilon;
159  svmProblem.boxMin(i) = 0;
160  svmProblem.boxMax(i) = this->C();
161  svmProblem.boxMin(i+ic) = -this->C();
162  svmProblem.boxMax(i+ic) = 0;
163  }
164  ProblemType problem(svmProblem,base_type::m_shrinking);
165 
166  //solve it
167  QpSolver< ProblemType> solver(problem);
169  RealVector alpha = problem.getUnpermutedAlpha();
170  column(svm.alpha(),0)= subrange(alpha,0,ic)+subrange(alpha,ic,2*ic);
171 
172  // compute the offset from the KKT conditions
173  double lowerBound = -1e100;
174  double upperBound = 1e100;
175  double sum = 0.0;
176 
177  std::size_t freeVars = 0;
178  for (std::size_t i=0; i< ic; i++)
179  {
180  if (problem.alpha(i) > 0.0)
181  {
182  double value = problem.gradient(i);
183  if (problem.alpha(i) < this->C())
184  {
185  sum += value;
186  freeVars++;
187  }
188  else
189  {
190  lowerBound = std::max(value,lowerBound);
191  }
192  }
193  if (problem.alpha(i + ic) < 0.0)
194  {
195  double value = problem.gradient(i + ic);
196  if (problem.alpha(i + ic) > -this->C())
197  {
198  sum += value;
199  freeVars++;
200  }
201  else
202  {
203  upperBound = std::min(value,upperBound);
204  }
205  }
206  }
207  if (freeVars > 0)
208  svm.offset(0) = sum / freeVars; // stabilized (averaged) exact value
209  else
210  svm.offset(0) = 0.5 * (lowerBound + upperBound); // best estimate
211 
212  base_type::m_accessCount = km.getAccessCount();
213  }
214  double m_epsilon;
215 };
216 
217 
218 }
219 #endif