AbstractSvmTrainer.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Abstract Support Vector Machine Trainer, general and linear case
6  *
7  *
8  * \par
9  * This file provides: 1) the QpConfig class, which can configure and
10  * provide information about an SVM training procedure; 2) a super-class
11  * for general SVM trainers, namely the AbstractSvmTrainer; and 3) a
12  * streamlined variant thereof for purely linear SVMs, namely the
13  * AbstractLinearSvmTrainer. In general, the SvmTrainers hold as parameters
14  * all hyperparameters of the underlying SVM, which includes the kernel
15  * parameters for non-linear SVMs.
16  *
17  *
18  *
19  *
20  * \author T. Glasmachers
21  * \date -
22  *
23  *
24  * \par Copyright 1995-2017 Shark Development Team
25  *
26  * <BR><HR>
27  * This file is part of Shark.
28  * <http://shark-ml.org/>
29  *
30  * Shark is free software: you can redistribute it and/or modify
31  * it under the terms of the GNU Lesser General Public License as published
32  * by the Free Software Foundation, either version 3 of the License, or
33  * (at your option) any later version.
34  *
35  * Shark is distributed in the hope that it will be useful,
36  * but WITHOUT ANY WARRANTY; without even the implied warranty of
37  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
38  * GNU Lesser General Public License for more details.
39  *
40  * You should have received a copy of the GNU Lesser General Public License
41  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
42  *
43  */
44 //===========================================================================
45 
46 
47 #ifndef SHARK_ALGORITHMS_TRAINERS_ABSTRACTSVMTRAINER_H
48 #define SHARK_ALGORITHMS_TRAINERS_ABSTRACTSVMTRAINER_H
49 
50 
51 #include <shark/LinAlg/Base.h>
56 
57 
58 namespace shark {
59 
60 
61 ///
62 /// \brief Super class of all support vector machine trainers.
63 ///
64 /// \par
65 /// The QpConfig class holds two structures describing
66 /// the stopping condition and the solution obtained by the underlying
67 /// quadratic programming solvers. It provides a uniform interface for
68 /// setting, e.g., the target solution accuracy and obtaining the
69 /// accuracy of the actual solution.
70 ///
71 class QpConfig
72 {
73 public:
74  /// Constructor
75  QpConfig(bool precomputedFlag = false, bool sparsifyFlag = true)
76  : m_precomputedKernelMatrix(precomputedFlag)
77  , m_sparsify(sparsifyFlag)
78  , m_shrinking(true)
79  , m_s2do(true)
80  , m_verbosity(0)
81  , m_accessCount(0)
82  { }
83 
84  /// Read/write access to the stopping condition
86  { return m_stoppingcondition; }
87 
88  /// Read access to the stopping condition
90  { return m_stoppingcondition; }
91 
92  /// Access to the solution properties
94  { return m_solutionproperties; }
95 
96  /// Flag for using a precomputed kernel matrix
98  { return m_precomputedKernelMatrix; }
99 
100  /// Flag for using a precomputed kernel matrix
101  bool const& precomputeKernel() const
102  { return m_precomputedKernelMatrix; }
103 
104  /// Flag for sparsifying the model after training
105  bool& sparsify()
106  { return m_sparsify; }
107 
108  /// Flag for sparsifying the model after training
109  bool const& sparsify() const
110  { return m_sparsify; }
111 
112  /// Flag for shrinking in the decomposition solver
113  bool& shrinking()
114  { return m_shrinking; }
115 
116  /// Flag for shrinking in the decomposition solver
117  bool const& shrinking() const
118  { return m_shrinking; }
119 
120  /// Flag for S2DO (instead of SMO)
121  bool& s2do()
122  { return m_s2do; }
123 
124  /// Flag for S2DO (instead of SMO)
125  bool const& s2do() const
126  { return m_s2do; }
127 
128  /// Verbosity level of the solver
129  unsigned int& verbosity()
130  { return m_verbosity; }
131 
132  /// Verbosity level of the solver
133  unsigned int const& verbosity() const
134  { return m_verbosity; }
135 
136  /// Number of kernel accesses
137  unsigned long long const& accessCount() const
138  { return m_accessCount; }
139 
140  // Set threshold for minimum dual accuracy stopping condition
142  // Set number of iterations for maximum number of iterations stopping condition
143  void setMaxIterations(unsigned long long i) { m_stoppingcondition.maxIterations = i; }
144  // Set values for target value stopping condition
146  // Set maximum training time in seconds for the maximum seconds stopping condition
148 
149 protected:
150  /// conditions for when to stop the QP solver
152  /// properties of the approximate solution found by the solver
154  /// should the solver use a precomputed kernel matrix?
156  /// should the trainer sparsify the model after training?
158  /// should shrinking be used?
160  /// should S2DO be used instead of SMO?
161  bool m_s2do;
162  /// verbosity level (currently unused)
163  unsigned int m_verbosity;
164  /// kernel access count
165  unsigned long long m_accessCount;
166 };
167 
168 
169 ///
170 /// \brief Super class of all kernelized (non-linear) SVM trainers.
171 ///
172 /// \par
173 /// This class holds general information shared by most if not
174 /// all SVM trainers. First of all, this includes the kernel and
175 /// the regularization parameter. The class also manages
176 /// meta-information of the training process, like the maximal
177 /// size of the kernel cache, the stopping criterion, as well
178 /// as information on the actual solution.
179 ///
180 template <
181  class InputType, class LabelType,
182  class Model = KernelClassifier<InputType>,
183  class Trainer= AbstractTrainer< Model,LabelType>
184 >
186 : public Trainer,public QpConfig, public IParameterizable<>
187 {
188 public:
190 
191  //! Constructor
192  //! \param kernel kernel function to use for training and prediction
193  //! \param C regularization parameter - always the 'true' value of C, even when unconstrained is set
194  //! \param offset train svm with offset - this is not supported for all SVM solvers.
195  //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
196  AbstractSvmTrainer(KernelType* kernel, double C, bool offset, bool unconstrained = false)
197  : m_kernel(kernel)
198  , m_regularizers(1,C)
199  , m_trainOffset(offset)
200  , m_unconstrained(unconstrained)
201  , m_cacheSize(0x4000000)
202  {
203  SHARK_RUNTIME_CHECK( C > 0, "C must be larger than 0" );
204  SHARK_RUNTIME_CHECK( kernel != nullptr, "Kernel must not be NULL" );
205  }
206 
207  //! Constructor featuring two regularization parameters
208  //! \param kernel kernel function to use for training and prediction
209  //! \param negativeC regularization parameter of the negative class (label 0)
210  //! \param positiveC regularization parameter of the positive class (label 1)
211  //! \param offset train svm with offset - this is not supported for all SVM solvers.
212  //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
213  AbstractSvmTrainer(KernelType* kernel, double negativeC, double positiveC, bool offset, bool unconstrained = false)
214  : m_kernel(kernel)
215  , m_regularizers(2)
216  , m_trainOffset(offset)
217  , m_unconstrained(unconstrained)
218  , m_cacheSize(0x4000000)
219  {
220  SHARK_RUNTIME_CHECK( positiveC > 0, "C must be larger than 0" );
221  SHARK_RUNTIME_CHECK( negativeC > 0, "C must be larger than 0" );
222  SHARK_RUNTIME_CHECK( kernel != nullptr, "Kernel must not be NULL" );
223  m_regularizers[0] = negativeC;
224  m_regularizers[1] = positiveC;
225 
226  }
227 
228  /// \brief Return the value of the regularization parameter C.
229  double C() const
230  {
231  SIZE_CHECK(m_regularizers.size() == 1);
232  return m_regularizers[0];
233  }
234  /// \brief Set the value of the regularization parameter C.
235  void setC(double C) {
236  SHARK_RUNTIME_CHECK( C > 0, "C must be larger than 0" );
237  m_regularizers[0] = C;
238  }
239 
240  RealVector const& regularizationParameters() const
241  {
242  return m_regularizers;
243  }
244 
245  /// \brief Set the value of the regularization parameter C.
246  void setRegularizationParameters(RealVector const& regularizers) {
247  SHARK_RUNTIME_CHECK( min(regularizers) > 0, "regularization parameters must be larger than 0" );
248  m_regularizers = regularizers;
249  }
250 
251  KernelType* kernel()
252  { return m_kernel; }
253  KernelType const* kernel() const
254  { return m_kernel; }
255  void setKernel(KernelType* kernel){
256  SHARK_RUNTIME_CHECK( kernel != nullptr, "Kernel must not be NULL" );
257  m_kernel = kernel;
258  }
259 
260  bool isUnconstrained() const
261  { return m_unconstrained; }
262 
263  bool trainOffset() const
264  { return m_trainOffset; }
265 
266  std::size_t cacheSize() const
267  { return m_cacheSize; }
268  void setCacheSize( std::size_t size )
269  { m_cacheSize = size; }
270 
271  /// get the hyper-parameter vector
272  RealVector parameterVector() const{
273  if(m_unconstrained)
274  return m_kernel->parameterVector() | log(m_regularizers);
275  else
276  return m_kernel->parameterVector() | m_regularizers;
277  }
278 
279  /// set the vector of hyper-parameters
280  void setParameterVector(RealVector const& newParameters){
281  size_t kp = m_kernel->numberOfParameters();
282  SHARK_ASSERT(newParameters.size() == kp + m_regularizers.size());
283  m_kernel->setParameterVector(subrange(newParameters,0,kp));
284  noalias(m_regularizers) = subrange(newParameters,kp,newParameters.size());
285  if(m_unconstrained)
286  m_regularizers = exp(m_regularizers);
287  }
288 
289  /// return the number of hyper-parameters
290  size_t numberOfParameters() const{
291  return m_kernel->numberOfParameters() + m_regularizers.size();
292  }
293 
294 protected:
295  KernelType* m_kernel; ///< Kernel object.
296  ///\brief Vector of regularization parameters.
297  ///
298  /// If the size of the vector is 1 there is only one regularization parameter for all classes, else there must
299  /// be one for every class in the dataset.
300  /// The exact meaning depends on the sub-class, but the value is always positive,
301  /// and higher implies a less regular solution.
302  RealVector m_regularizers;
304  bool m_unconstrained; ///< Is log(C) stored internally as a parameter instead of C? If yes, then we get rid of the constraint C > 0 on the level of the parameter interface.
305  std::size_t m_cacheSize; ///< Number of values in the kernel cache. The size of the cache in bytes is the size of one entry (4 for float, 8 for double) times this number.
306 };
307 
308 
309 ///
310 /// \brief Super class of all linear SVM trainers.
311 ///
312 /// \par
313 /// This class is analogous to the AbstractSvmTrainer class,
314 /// but for training of linear SVMs. It represents the
315 /// regularization parameter of the SVM. The class also manages
316 /// meta-information of the training process, like the stopping
317 /// criterion and information on the actual solution.
318 ///
319 template <class InputType>
321 : public AbstractTrainer<LinearClassifier<InputType>, unsigned int>
322 , public QpConfig
323 , public IParameterizable<>
324 {
325 public:
328 
329  //! Constructor
330  //! \param C regularization parameter - always the 'true' value of C, even when unconstrained is set
331  //! \param offset train svm with offset - this is not supported for all SVM solvers.
332  //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
333  AbstractLinearSvmTrainer(double C, bool offset, bool unconstrained)
334  : m_C(C)
335  , m_trainOffset(offset)
336  , m_unconstrained(unconstrained)
337  { SHARK_RUNTIME_CHECK( C > 0, "C must be larger than 0" );}
338 
339  /// \brief Return the value of the regularization parameter C.
340  double C() const
341  { return m_C; }
342 
343  /// \brief Set the value of the regularization parameter C.
344  void setC(double C) {
345  SHARK_RUNTIME_CHECK( C > 0, "C must be larger than 0" );
346  m_C = C;
347  }
348 
349  /// \brief Is the regularization parameter provided in logarithmic (unconstrained) form as a parameter?
350  bool isUnconstrained() const
351  { return m_unconstrained; }
352 
353  bool trainOffset() const
354  { return m_trainOffset; }
355 
356  /// \brief Get the hyper-parameter vector.
357  RealVector parameterVector() const
358  {
359  RealVector ret(1);
360  ret(0) = (m_unconstrained ? std::log(m_C) : m_C);
361  return ret;
362  }
363 
364  /// \brief Set the vector of hyper-parameters.
365  void setParameterVector(RealVector const& newParameters)
366  {
367  SHARK_ASSERT(newParameters.size() == 1);
368  setC(m_unconstrained ? std::exp(newParameters(0)) : newParameters(0));
369  }
370 
371  /// \brief Return the number of hyper-parameters.
372  size_t numberOfParameters() const
373  { return 1; }
374 
377  using QpConfig::m_verbosity;
378 
379 protected:
380  double m_C; ///< Regularization parameter. The exact meaning depends on the sub-class, but the value is always positive, and higher implies a less regular solution.
381  bool m_trainOffset; ///< Is the SVM trained with or without bias?
382  bool m_unconstrained; ///< Is log(C) stored internally as a parameter instead of C? If yes, then we get rid of the constraint C > 0 on the level of the parameter interface.
383 
384 };
385 
386 
387 }
388 #endif