OneClassSvm.cpp
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief One-Class Support Vector Machine example program.
6  *
7  * \par
8  * This program generates a toy data set composed of Gaussian
9  * distributions. It then uses a one-class SVM to model the
10  * densest regions. It visualizes the result.
11  *
12  *
13  *
14  * \author T. Glasmachers
15  * \date 2013
16  *
17  *
18  * \par Copyright 1995-2017 Shark Development Team
19  *
20  * <BR><HR>
21  * This file is part of Shark.
22  * <http://shark-ml.org/>
23  *
24  * Shark is free software: you can redistribute it and/or modify
25  * it under the terms of the GNU Lesser General Public License as published
26  * by the Free Software Foundation, either version 3 of the License, or
27  * (at your option) any later version.
28  *
29  * Shark is distributed in the hope that it will be useful,
30  * but WITHOUT ANY WARRANTY; without even the implied warranty of
31  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32  * GNU Lesser General Public License for more details.
33  *
34  * You should have received a copy of the GNU Lesser General Public License
35  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
36  *
37  */
38 //===========================================================================
39 
43 
44 using namespace shark;
45 using namespace std;
46 
47 
48 class Gaussians : public DataDistribution<RealVector>
49 {
50 public:
51  void draw(RealVector& point) const
52  {
53  point.resize(2);
54  size_t cluster = random::discrete(random::globalRng, 0, 4);
55  double alpha = 0.4 * M_PI * cluster;
56  point(0) = 3.0 * cos(alpha) + 0.75 * random::gauss(random::globalRng);
57  point(1) = 3.0 * sin(alpha) + 0.75 * random::gauss(random::globalRng);
58  }
59 };
60 
61 
62 int main(int argc, char** argv)
63 {
64  // experiment settings
65  unsigned int ell = 100; // number of training data point
66  double nu = 0.5; // probability mass to be covered, must fulfill 0 < mu < 1
67  double gamma = 0.5; // kernel bandwidth parameter
68 
69  GaussianRbfKernel<> kernel(gamma); // Gaussian kernel
70  KernelExpansion<RealVector> ke; // (affine) linear function in kernel-induced feature space
71 
72  // generate artificial benchmark data
73  Gaussians problem;
74  UnlabeledData<RealVector> data = problem.generateDataset(ell);
75 
76  // define the learner
77  OneClassSvmTrainer<RealVector> trainer(&kernel, nu);
78 
79  // train the model
80  trainer.train(ke, data);
81 
82  // evaluate the model
83  char output[35][71];
84  RealVector input(2);
85  for (std::size_t y=0; y<35; y++)
86  {
87  input(1) = 5.0 * (y - 17.0) / 17.0;
88  for (std::size_t x=0; x<70; x++)
89  {
90  input(0) = 5.0 * (x - 34.5) / 34.5;
91  double val = ke(input)(0);
92  output[y][x] = (val < 0.0) ? ' ' : ':';
93  }
94  output[y][70] = 0;
95  }
96 
97  // mark the samples
99  for (UnlabeledData<RealVector>::const_element_range::const_iterator it = elements.begin(); it != elements.end(); ++it)
100  {
101  RealVector v = *it;
102  int x = (int)std::floor(34.5 * v(0) / 5.0 + 34.5 + 0.5);
103  int y = (int)std::floor(17.0 * v(1) / 5.0 + 17.0 + 0.5);
104  if (x >= 0 && y >= 0 && x < 70 && y < 35) output[y][x] = '*';
105  }
106 
107  // output to the console
108  cout << endl
109  << "One-Class SVM example program." << endl
110  << "100 samples are drawn from a mixture of five Gaussians. Data samples" << endl
111  << "are marked with an asterisk '*'. The :::-shaded regions are the SVM's" << endl
112  << "estimate of the high-probability region of the distribution." << endl
113  << endl;
114  for (std::size_t y=0; y<35; y++) cout << output[y] << endl;
115  cout << endl;
116 }