KernelBudgetedSGDTutorial.cpp
Go to the documentation of this file.
1 #include <shark/Algorithms/Trainers/Budgeted/KernelBudgetedSGDTrainer.h> // the KernelBudgetedSGD trainer
2 #include <shark/Algorithms/Trainers/Budgeted/MergeBudgetMaintenanceStrategy.h> // the strategy the trainer will use
3 #include <shark/Data/DataDistribution.h> //includes small toy distributions
4 #include <shark/Models/Kernels/GaussianRbfKernel.h> //the used kernel for the SVM
5 #include <shark/ObjectiveFunctions/Loss/HingeLoss.h> // the loss we want to use for the SGD machine
6 #include <shark/ObjectiveFunctions/Loss/ZeroOneLoss.h> //used for evaluation of the classifier
7 
8 using namespace shark;
9 using namespace std;
10 
11 
12 // data generating distribution for our toy
13 // multi-category classification problem
14 class myProblem : public LabeledDataDistribution<RealVector, unsigned int>
15 {
16 public:
17  void draw(RealVector& input, unsigned int& label)const
18  {
19  label = random::discrete(random::globalRng, 0, 4);
20  input.resize(1);
21  input(0) = random::gauss(random::globalRng) + 3.0 * label;
22  }
23 };
24 /// @endcond
25 
26 
27 
28 
29 int main(int argc, char** argv)
30 {
31  // experiment settings
32  unsigned int ell = 500; // number of training data point
33  unsigned int tests = 10000; // number of test data points
34  double gamma = 0.5; // kernel bandwidth parameter
35  double C = 1.0; // regularization parameter
36  bool bias = false; // use bias/offset parameter
37  size_t budgetSize = 16; // our model shall contain at most 16 vectors
38  size_t epochs = 5; // we want to run 5 epochs
39 
40 
41  GaussianRbfKernel<> kernel(gamma); // Gaussian kernel
42  KernelClassifier<RealVector> kernelClassifier; // (affine) linear function in kernel-induced feature space
43 
44  // generate dataset
45  Chessboard problem; // artificial benchmark data
46  ClassificationDataset trainingData = problem.generateDataset(ell);
47  ClassificationDataset testData = problem.generateDataset(tests);
48 
49  // define the machine
50  HingeLoss hingeLoss; // define the loss we want to use while training
51  // as the budget maintenance strategy we choose the merge strategy
53  KernelBudgetedSGDTrainer<RealVector> kernelBudgetedSGDtrainer(&kernel, &hingeLoss, C, bias, false, budgetSize, strategy); // create the trainer
54  kernelBudgetedSGDtrainer.setEpochs(epochs); // set the epochs number
55 
56  // train the machine
57  std::cout << "Training the " << kernelBudgetedSGDtrainer.name() << " on the problem with a budget of " << budgetSize << " and " << epochs << " Epochs..." << std::endl; // Shark algorithms know their names
58  kernelBudgetedSGDtrainer.train(kernelClassifier, trainingData);
59  Data<RealVector> supportVectors = kernelClassifier.decisionFunction().basis(); // get a pointer to the support vectors of the model
60  size_t nSupportVectors = supportVectors.numberOfElements(); // get number of support vectors
61  std::cout << "We have " << nSupportVectors << " support vectors in our model.\n"; // report
62 
63  // evaluate
64  ZeroOneLoss<unsigned int> loss; // 0-1 loss
65  Data<unsigned int> output = kernelClassifier(trainingData.inputs()); // evaluate on training set
66  double train_error = loss.eval(trainingData.labels(), output);
67  cout << "training error:\t" << train_error << endl;
68  output = kernelClassifier(testData.inputs()); // evaluate on test set
69  double test_error = loss.eval(testData.labels(), output);
70  cout << "test error:\t" << test_error << endl;
71 }
72