RankingSvmTrainer.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Support Vector Machine Trainer for the ranking-SVM
6  *
7  *
8  * \author T. Glasmachers
9  * \date 2016
10  *
11  *
12  * \par Copyright 1995-2017 Shark Development Team
13  *
14  * <BR><HR>
15  * This file is part of Shark.
16  * <http://shark-ml.org/>
17  *
18  * Shark is free software: you can redistribute it and/or modify
19  * it under the terms of the GNU Lesser General Public License as published
20  * by the Free Software Foundation, either version 3 of the License, or
21  * (at your option) any later version.
22  *
23  * Shark is distributed in the hope that it will be useful,
24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26  * GNU Lesser General Public License for more details.
27  *
28  * You should have received a copy of the GNU Lesser General Public License
29  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
30  *
31  */
32 //===========================================================================
33 
34 
35 #ifndef SHARK_ALGORITHMS_RANKINGSVMTRAINER_H
36 #define SHARK_ALGORITHMS_RANKINGSVMTRAINER_H
37 
38 
46 
47 namespace shark {
48 
49 
50 ///
51 /// \brief Training of an SVM for ranking.
52 ///
53 /// A ranking SVM trains a function (linear or linear in a kernel
54 /// induced feature space, RKHS) with the aim that the function values
55 /// are consistent with given pairwise rankings. I.e., given are pairs
56 /// (a, b) of points, and the task of SVM training is to find a
57 /// function f such that f(a) < f(b). More exactly, the hard margin
58 /// ranking SVM aims for f(b) - f(a) >= 1 while minimizing the squared
59 /// RKHS norm of f. The soft-margin SVM relates the constraint analogous
60 /// to a standard C-SVM.
61 ///
62 /// The trained model is a real-valued function. To predict the ranking
63 /// of a pair of points the function is applied to both points. The one
64 /// with smaller function value is ranked as being "smaller", i.e., if f
65 /// is the trained model and a and b are data points, then the following
66 /// code computes the ranking:
67 ///
68 /// bool a_better_than_b = (f(a) < f(b));
69 ///
70 template <class InputType, class CacheType = float>
71 class RankingSvmTrainer : public AbstractSvmTrainer< InputType, unsigned int, KernelExpansion<InputType> >
72 {
73 private:
75 
76 public:
77  /// \brief Convenience typedefs:
78  /// this and many of the below typedefs build on the class template type CacheType.
79  /// Simply changing that one template parameter CacheType thus allows to flexibly
80  /// switch between using float or double as type for caching the kernel values.
81  /// The default is float, offering sufficient accuracy in the vast majority
82  /// of cases, at a memory cost of only four bytes. However, the template
83  /// parameter makes it easy to use double instead, (e.g., in case high
84  /// accuracy training is needed).
85  typedef CacheType QpFloatType;
86 
88 
89  //! Constructor
90  //! \param kernel kernel function to use for training and prediction
91  //! \param C regularization parameter - always the 'true' value of C, even when unconstrained is set
92  //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
93  RankingSvmTrainer(KernelType* kernel, double C, bool unconstrained = false)
94  : base_type(kernel, C, false, unconstrained)
95  { }
96 
97  /// \brief From INameable: return the class name.
98  std::string name() const
99  { return "RankingSvmTrainer"; }
100 
101  /// \brief Train the ranking SVM.
102  ///
103  /// This variant of the train function assumes that all pairs of
104  /// points should be ranked according to the order they appear in
105  /// the data set.
106  void train(KernelExpansion<InputType>& function, Data<InputType> const& dataset)
107  {
108  // create all pairs
109  std::size_t n = dataset.numberOfElements();
110  std::vector<std::pair<std::size_t, std::size_t>> pairs;
111  for (std::size_t i=0; i<n; i++) {
112  for (std::size_t j=0; j<i; j++) {
113  pairs.push_back(std::make_pair(j, i));
114  }
115  }
116  train(function, dataset, pairs);
117  }
118 
119  /// \brief Train the ranking SVM.
120  ///
121  /// This variant of the train function uses integer labels to define
122  /// pairwise rankings. It is trained on all pairs of data points
123  /// with different label, aiming for a smaller function value for
124  /// the point with smaller label.
126  {
127  std::vector<std::pair<std::size_t, std::size_t>> pairs;
128  std::size_t i = 0;
129  for (auto const& yi : dataset.labels().elements()) {
130  std::size_t j = 0;
131  for (auto const& yj : dataset.labels().elements()) {
132  if (j >= i) break;
133  if (yi < yj) pairs.push_back(std::make_pair(i, j));
134  else if (yi > yj) pairs.push_back(std::make_pair(j, i));
135  j++;
136  }
137  i++;
138  }
139  train(function, dataset.inputs(), pairs);
140  }
141 
142  /// \brief Train the ranking SVM.
143  ///
144  /// This variant of the train function works with explicitly given
145  /// pairs of data points. Each pair is identified by the indices of
146  /// the training points in the data set.
147  void train(KernelExpansion<InputType>& function, Data<InputType> const& dataset, std::vector<std::pair<std::size_t, std::size_t>> const& pairs)
148  {
149  function.setStructure(base_type::m_kernel, dataset, false);
150  DifferenceKernelMatrix<InputType, QpFloatType> dm(*function.kernel(), dataset, pairs);
151 
153  {
155  trainInternal(function, dataset, pairs, matrix);
156  }
157  else
158  {
160  trainInternal(function, dataset, pairs, matrix);
161  }
162  }
163 
164 private:
165  template <typename MatrixType>
166  void trainInternal(KernelExpansion<InputType>& function, Data<InputType> const& dataset, std::vector<std::pair<std::size_t, std::size_t>> const& pairs, MatrixType& matrix)
167  {
169  qp.linear = RealVector(qp.dimensions(), 1.0);
170  qp.boxMin = RealVector(qp.dimensions(), 0.0);
171  qp.boxMax = RealVector(qp.dimensions(), this->C());
173  ProblemType problem(qp, base_type::m_shrinking);
174 
175  QpSolver<ProblemType> solver(problem);
177  RealVector alpha = problem.getUnpermutedAlpha();
178  RealVector coeff(dataset.numberOfElements(), 0.0);
179  SIZE_CHECK(pairs.size() == alpha.size());
180  for (std::size_t i=0; i<alpha.size(); i++)
181  {
182  double a = alpha(i);
183  coeff(pairs[i].first) -= a;
184  coeff(pairs[i].second) += a;
185  }
186  blas::column(function.alpha(),0) = coeff;
187 
188  if (base_type::sparsify()) function.sparsify();
189  }
190 };
191 
192 
193 }
194 #endif