OneClassSvmTrainer.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Trainer for One-Class Support Vector Machines
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date 2007-2012
12  *
13  *
14  * \par Copyright 1995-2017 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://shark-ml.org/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_ONECLASSSVMTRAINER_H
38 #define SHARK_ALGORITHMS_ONECLASSSVMTRAINER_H
39 
40 
46 
47 
48 namespace shark {
49 
50 
51 ///
52 /// \brief Training of one-class SVMs.
53 ///
54 /// The one-class support vector machine is an unsupervised
55 /// method for learning the high probability region of a
56 /// distribution. Given are data points \f$ x_i, i \in \{1, \dots, m\} \f$,
57 /// a kernel function k(x, x') and a regularization
58 /// constant C > 0. Let H denote the kernel induced
59 /// reproducing kernel Hilbert space of k, and let \f$ \phi \f$
60 /// denote the corresponding feature map.
61 /// Then an estimate of a high probability region of the
62 /// distribution generating the sample points is described
63 /// by the set where the following function takes positive
64 /// values:
65 /// \f[
66 /// f(x) = \langle w, \phi(x) \rangle + b
67 /// \f]
68 /// with coefficients w and b given by the (primal)
69 /// optimization problem
70 /// \f[
71 /// \min \frac{1}{2} \|w\|^2 + \frac{1}{\nu m} \sum_{i=1}^m \xi_i - \rho
72 /// \f]
73 /// \f[
74 /// \text{s.t. } \langle w, \phi(x_i) \rangle + b \geq \rho - \xi_i; \xi_i \geq 0
75 /// \f]
76 /// \f$ 0 \leq \nu, \rho \leq 1 \f$ are parameters of the
77 /// method for controlling the smoothness of the solution
78 /// and the amount of probability mass covered.
79 ///
80 /// For more details refer to the paper:<br/>
81 /// <p>Estimating the support of a high-dimensional distribution. B. Sch&ouml;lkopf, J. C. Platt, J. Shawe-Taylor, A. Smola, and R. C. Williamson, 1999.</p>
82 ///
83 template <class InputType, class CacheType = float>
84 class OneClassSvmTrainer : public AbstractUnsupervisedTrainer<KernelExpansion<InputType> >, public QpConfig, public IParameterizable<>
85 {
86 public:
87 
88  typedef CacheType QpFloatType;
91  typedef QpConfig base_type;
92 
96 
97  OneClassSvmTrainer(KernelType* kernel, double nu)
98  : m_kernel(kernel)
99  , m_nu(nu)
100  , m_cacheSize(0x4000000)
101  { }
102 
103  /// \brief From INameable: return the class name.
104  std::string name() const
105  { return "OneClassSvmTrainer"; }
106 
107  double nu() const
108  { return m_nu; }
109  void setNu(double nu)
110  { m_nu = nu; }
111  KernelType* kernel()
112  { return m_kernel; }
113  const KernelType* kernel() const
114  { return m_kernel; }
115  void setKernel(KernelType* kernel)
116  { m_kernel = kernel; }
117 
118  double CacheSize() const
119  { return m_cacheSize; }
120  void setCacheSize( std::size_t size )
121  { m_cacheSize = size; }
122 
123  /// get the hyper-parameter vector
124  RealVector parameterVector() const{
125  size_t kp = m_kernel->numberOfParameters();
126  RealVector ret(kp + 1);
127  noalias(subrange(ret, 0, kp)) = m_kernel->parameterVector();
128  ret(kp) = m_nu;
129  return ret;
130  }
131 
132  /// set the vector of hyper-parameters
133  void setParameterVector(RealVector const& newParameters){
134  size_t kp = m_kernel->numberOfParameters();
135  SHARK_ASSERT(newParameters.size() == kp + 1);
136  m_kernel->setParameterVector(subrange(newParameters, 0, kp));
137  setNu(newParameters(kp));
138  }
139 
140  /// return the number of hyper-parameters
141  size_t numberOfParameters() const
142  { return (m_kernel->numberOfParameters() + 1); }
143 
145  {
146  SHARK_RUNTIME_CHECK(m_nu > 0.0 && m_nu< 1.0, "invalid setting of the parameter nu (must be 0 < nu < 1)");
147  svm.setStructure(m_kernel,inputset,true);
148 
149  // solve the quadratic program
151  trainSVM<PrecomputedMatrixType>(svm,inputset);
152  else
153  trainSVM<CachedMatrixType>(svm,inputset);
154 
155  if (base_type::sparsify())
156  svm.sparsify();
157  }
158 
159 protected:
160  KernelType* m_kernel;
161  double m_nu;
162  std::size_t m_cacheSize;
163 
164  template<class MatrixType>
166  typedef BoxedSVMProblem<MatrixType> SVMProblemType;
167  typedef SvmShrinkingProblem<SVMProblemType> ProblemType;
168 
169  // Setup the problem
170 
171  KernelMatrixType km(*m_kernel, inputset);
172  MatrixType matrix(&km);
173  std::size_t ic = matrix.size();
174  double upper = 1.0/(m_nu*ic);
175  SVMProblemType svmProblem(matrix,blas::repeat(0.0,ic),0.0,upper);
176  ProblemType problem(svmProblem,base_type::m_shrinking);
177 
178  //solve it
179  QpSolver< ProblemType > solver(problem);
181  column(svm.alpha(),0)= problem.getUnpermutedAlpha();
182 
183  // compute the offset from the KKT conditions
184  double lowerBound = -1e100;
185  double upperBound = 1e100;
186  double sum = 0.0;
187  std::size_t freeVars = 0;
188  for (std::size_t i=0; i != problem.dimensions(); i++)
189  {
190  double value = problem.gradient(i);
191  if (problem.alpha(i) == 0.0)
192  lowerBound = std::max(value,lowerBound);
193  else if (problem.alpha(i) == upper)
194  upperBound = std::min(value,upperBound);
195  else
196  {
197  sum += value;
198  freeVars++;
199  }
200  }
201  if (freeVars > 0)
202  svm.offset(0) = sum / freeVars; // stabilized (averaged) exact value
203  else
204  svm.offset(0) = 0.5 * (lowerBound + upperBound); // best estimate
205 
207  }
208 };
209 
210 
211 }
212 #endif