QpSolver.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief General and specialized quadratic program classes and a generic solver.
6  *
7  *
8  *
9  * \author T. Glasmachers, O.Krause
10  * \date 2007-2016
11  *
12  *
13  * \par Copyright 1995-2017 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://shark-ml.org/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 
36 #ifndef SHARK_ALGORITHMS_QP_QPSOLVER_H
37 #define SHARK_ALGORITHMS_QP_QPSOLVER_H
38 
39 #include <shark/Core/Timer.h>
41 #include <shark/Data/Dataset.h>
42 
43 namespace shark{
44 
45 /// \brief Quadratic Problem with only Box-Constraints
46 /// Let K the kernel matrix, than the problem has the form
47 ///
48 /// max_\alpha - 1/2 \alpha^T K \alpha + \alpha^Tv
49 /// under constraints:
50 /// l_i <= \alpha_i <= u_i
51 template<class MatrixT>
53 public:
54  typedef MatrixT MatrixType;
55  typedef typename MatrixType::QpFloatType QpFloatType;
56 
57  //Setup only using kernel matrix, labels and regularization parameter
59  : quadratic(quadratic)
60  , linear(quadratic.size(),0)
61  , alpha(quadratic.size(),0)
62  , diagonal(quadratic.size())
63  , permutation(quadratic.size())
64  , boxMin(quadratic.size(),0)
65  , boxMax(quadratic.size(),0)
66  {
67  for(std::size_t i = 0; i!= dimensions(); ++i){
68  permutation[i] = i;
69  diagonal(i) = quadratic.entry(i, i);
70  }
71  }
72 
73  /// \brief constructor which initializes a C-SVM problem with weighted datapoints and different regularizers for every class
75  MatrixType& quadratic,
76  Data<unsigned int> const& labels,
77  Data<double> const& weights,
78  RealVector const& regularizers
79  ): quadratic(quadratic)
80  , linear(quadratic.size())
81  , alpha(quadratic.size(),0)
82  , diagonal(quadratic.size())
83  , permutation(quadratic.size())
84  , boxMin(quadratic.size())
85  , boxMax(quadratic.size())
86  {
87  SIZE_CHECK(dimensions() == linear.size());
88  SIZE_CHECK(dimensions() == quadratic.size());
89  SIZE_CHECK(dimensions() == labels.numberOfElements());
90  SIZE_CHECK(dimensions() == weights.numberOfElements());
91  SIZE_CHECK(regularizers.size() > 0);
92  SIZE_CHECK(regularizers.size() <= 2);
93 
94  double Cn = regularizers[0];
95  double Cp = regularizers[0];
96  if(regularizers.size() == 2)
97  Cp = regularizers[1];
98 
99  for(std::size_t i = 0; i!= dimensions(); ++i){
100  unsigned int label = labels.element(i);
101  double weight = weights.element(i);
102  permutation[i] = i;
103  diagonal(i) = quadratic.entry(i, i);
104  linear(i) = label? 1.0:-1.0;
105  boxMin(i) = label? 0.0:-Cn*weight;
106  boxMax(i) = label? Cp*weight : 0.0;
107  }
108  }
109 
110  std::size_t dimensions()const{
111  return quadratic.size();
112  }
113 
114  /// exchange two variables via the permutation
115  void flipCoordinates(std::size_t i, std::size_t j)
116  {
117  if (i == j) return;
118 
119  // notify the matrix cache
120  quadratic.flipColumnsAndRows(i, j);
121  std::swap( alpha[i], alpha[j]);
122  std::swap( linear[i], linear[j]);
123  std::swap( diagonal[i], diagonal[j]);
124  std::swap( boxMin[i], boxMin[j]);
125  std::swap( boxMax[i], boxMax[j]);
127  }
128 
129  /// \brief Scales all box constraints by a constant factor and adapts the solution by scaling it by the same factor.
130  void scaleBoxConstraints(double factor){
131  alpha *= factor;
132  boxMin *=factor;
133  boxMax *=factor;
134  }
135 
136  /// representation of the quadratic part of the objective function
137  MatrixType& quadratic;
138 
139  /// \brief Linear part of the problem
140  RealVector linear;
141 
142  /// Solution candidate
143  RealVector alpha;
144 
145  /// diagonal matrix entries
146  /// The diagonal array is of fixed size and not subject to shrinking.
147  RealVector diagonal;
148 
149  /// permutation of the variables alpha, gradient, etc.
150  std::vector<std::size_t> permutation;
151 
152  /// \brief component-wise lower bound
153  RealVector boxMin;
154 
155  /// \brief component-wise upper bound
156  RealVector boxMax;
157 };
158 
159 ///\brief Boxed problem for alpha in [lower,upper]^n and equality constraints.
160 ///
161 ///It is assumed for the initial alpha value that there exists a sum to one constraint and lower <= 1/n <= upper
162 template<class MatrixT>
164 public:
165  typedef MatrixT MatrixType;
166  typedef typename MatrixType::QpFloatType QpFloatType;
167 
168  //Setup only using kernel matrix, labels and regularization parameter
169  BoxedSVMProblem(MatrixType& quadratic, RealVector const& linear, double lower, double upper)
170  : quadratic(quadratic)
171  , linear(linear)
172  , alpha(quadratic.size(),1.0/quadratic.size())
173  , diagonal(quadratic.size())
174  , permutation(quadratic.size())
175  , m_lower(lower)
176  , m_upper(upper)
177  {
178  SIZE_CHECK(dimensions() == linear.size());
179  SIZE_CHECK(dimensions() == quadratic.size());
180 
181  for(std::size_t i = 0; i!= dimensions(); ++i){
182  permutation[i] = i;
183  diagonal(i) = quadratic.entry(i, i);
184  }
185  }
186 
187  std::size_t dimensions()const{
188  return quadratic.size();
189  }
190 
191  double boxMin(std::size_t i)const{
192  return m_lower;
193  }
194  double boxMax(std::size_t i)const{
195  return m_upper;
196  }
197 
198  /// representation of the quadratic part of the objective function
199  MatrixType& quadratic;
200 
201  ///\brief Linear part of the problem
202  RealVector linear;
203 
204  /// Solution candidate
205  RealVector alpha;
206 
207  /// diagonal matrix entries
208  /// The diagonal array is of fixed size and not subject to shrinking.
209  RealVector diagonal;
210 
211  /// exchange two variables via the permutation
212  void flipCoordinates(std::size_t i, std::size_t j)
213  {
214  if (i == j) return;
215 
216  // notify the matrix cache
217  quadratic.flipColumnsAndRows(i, j);
218  std::swap( alpha[i], alpha[j]);
219  std::swap( linear[i], linear[j]);
220  std::swap( diagonal[i], diagonal[j]);
222  }
223 
224  /// \brief Scales all box constraints by a constant factor and adapts the solution by scaling it by the same factor.
225  void scaleBoxConstraints(double factor){
226  m_lower*=factor;
227  m_upper*=factor;
228  alpha *= factor;
229  }
230 
231  /// permutation of the variables alpha, gradient, etc.
232  std::vector<std::size_t> permutation;
233 private:
234  double m_lower;
235  double m_upper;
236 };
237 
238 
239 /// \brief Problem formulation for binary C-SVM problems
240 ///
241 /// max_\alpha - 1/2 \alpha^T K \alpha + \alpha^Ty
242 /// under constraints:
243 /// l_i <= \alpha_i <= u_i
244 /// \sum_i \alpha_i = 0
245 template<class MatrixT>
247 public:
248  typedef MatrixT MatrixType;
249  typedef typename MatrixType::QpFloatType QpFloatType;
250 
251  /// \brief Setup only using kernel matrix, labels and regularization parameter
252  CSVMProblem(MatrixType& quadratic, Data<unsigned int> const& labels, double C)
253  : quadratic(quadratic)
254  , linear(quadratic.size())
255  , alpha(quadratic.size(),0)
256  , diagonal(quadratic.size())
257  , permutation(quadratic.size())
258  , positive(quadratic.size())
259  , m_Cp(C)
260  , m_Cn(C)
261  {
262  SIZE_CHECK(dimensions() == linear.size());
263  SIZE_CHECK(dimensions() == quadratic.size());
264  SIZE_CHECK(dimensions() == labels.numberOfElements());
265 
266  for(std::size_t i = 0; i!= dimensions(); ++i){
267  permutation[i] = i;
268  diagonal(i) = quadratic.entry(i, i);
269  linear(i) = labels.element(i)? 1.0:-1.0;
270  positive[i] = linear(i) > 0;
271  }
272  }
273  ///\brief Setup using kernel matrix, labels and different regularization parameters for positive and negative classes
274  CSVMProblem(MatrixType& quadratic, Data<unsigned int> const& labels, RealVector const& regularizers)
275  : quadratic(quadratic)
276  , linear(quadratic.size())
277  , alpha(quadratic.size(),0)
278  , diagonal(quadratic.size())
279  , permutation(quadratic.size())
280  , positive(quadratic.size())
281  {
282  SIZE_CHECK(dimensions() == linear.size());
283  SIZE_CHECK(dimensions() == quadratic.size());
284  SIZE_CHECK(dimensions() == labels.numberOfElements());
285 
286  SIZE_CHECK(regularizers.size() > 0);
287  SIZE_CHECK(regularizers.size() <= 2);
288  m_Cp = m_Cn = regularizers[0];
289  if(regularizers.size() == 2)
290  m_Cp = regularizers[1];
291 
292 
293  for(std::size_t i = 0; i!= dimensions(); ++i){
294  permutation[i] = i;
295  diagonal(i) = quadratic.entry(i, i);
296  linear(i) = labels.element(i)? 1.0:-1.0;
297  positive[i] = linear(i) > 0;
298  }
299  }
300 
301  //Setup with changed linear part
302  CSVMProblem(MatrixType& quadratic, RealVector linear, Data<unsigned int> const& labels, double C)
303  : quadratic(quadratic)
304  , linear(linear)
305  , alpha(quadratic.size(),0)
306  , diagonal(quadratic.size())
307  , permutation(quadratic.size())
308  , positive(quadratic.size())
309  , m_Cp(C)
310  , m_Cn(C)
311  {
312  SIZE_CHECK(dimensions() == quadratic.size());
313  SIZE_CHECK(dimensions() == linear.size());
314  SIZE_CHECK(dimensions() == labels.numberOfElements());
315 
316  for(std::size_t i = 0; i!= dimensions(); ++i){
317  permutation[i] = i;
318  diagonal(i) = quadratic.entry(i, i);
319  positive[i] = labels.element(i) ? 1: 0;
320  }
321  }
322 
323  std::size_t dimensions()const{
324  return quadratic.size();
325  }
326 
327  double boxMin(std::size_t i)const{
328  return positive[i] ? 0.0 : -m_Cn;
329  }
330  double boxMax(std::size_t i)const{
331  return positive[i] ? m_Cp : 0.0;
332  }
333 
334  /// representation of the quadratic part of the objective function
335  MatrixType& quadratic;
336 
337  ///\brief Linear part of the problem
338  RealVector linear;
339 
340  /// Solution candidate
341  RealVector alpha;
342 
343  /// diagonal matrix entries
344  /// The diagonal array is of fixed size and not subject to shrinking.
345  RealVector diagonal;
346 
347  /// permutation of the variables alpha, gradient, etc.
348  std::vector<std::size_t> permutation;
349 
350  /// exchange two variables via the permutation
351  void flipCoordinates(std::size_t i, std::size_t j)
352  {
353  if (i == j) return;
354 
355  // notify the matrix cache
356  quadratic.flipColumnsAndRows(i, j);
357  std::swap( linear[i], linear[j]);
358  std::swap( alpha[i], alpha[j]);
359  std::swap( diagonal[i], diagonal[j]);
360  std::swap( permutation[i], permutation[j]);
361  std::swap( positive[i], positive[j]);
362  }
363 
364  /// \brief Scales all box constraints by a constant factor and adapts the solution by scaling it by the same factor.
365  void scaleBoxConstraints(double factor, double variableScalingFactor){
366  bool sameFactor = factor == variableScalingFactor;
367  double newCp = m_Cp*factor;
368  double newCn = m_Cn*factor;
369  for(std::size_t i = 0; i != dimensions(); ++i){
370  if(sameFactor && alpha(i)== m_Cp)
371  alpha(i) = newCp;
372  else if(sameFactor && alpha(i) == -m_Cn)
373  alpha(i) = -newCn;
374  else
375  alpha(i) *= variableScalingFactor;
376  }
377  m_Cp = newCp;
378  m_Cn = newCn;
379  }
380 
381 private:
382  ///\brief whether the label of the point is positive
383  std::vector<char> positive;
384 
385  ///\brief Regularization constant of the positive class
386  double m_Cp;
387  ///\brief Regularization constant of the negative class
388  double m_Cn;
389 };
390 
395  AlphaDeactivated = 3//also: AlphaUpperBound and AlphaLowerBound
396 };
397 
398 ///
399 /// \brief Quadratic program solver
400 ///
401 /// todo: new documentation
402 template<class Problem, class SelectionStrategy = typename Problem::PreferedSelectionStrategy >
403 class QpSolver
404 {
405 public:
407  Problem& problem
408  ):m_problem(problem){}
409 
410  /// \brief Solve the quadratic program.
411  ///
412  /// The function iteratively refines the solution by applying the
413  /// SMO (subset descent) algorithm until one of the stopping
414  /// criteria is fulfilled.
415  void solve(
416  QpStoppingCondition& stop,
417  QpSolutionProperties* prop = NULL
418  ){
419  double start_time = Timer::now();
420  unsigned long long iter = 0;
421  unsigned long long shrinkCounter = 0;
422 
423  SelectionStrategy workingSet;
424 
425  // decomposition loop
426  for(;;){
427  //stop if iterations exceed
428  if( iter == stop.maxIterations){
429  if (prop != NULL) prop->type = QpMaxIterationsReached;
430  break;
431  }
432  //stop if the maximum running time is exceeded
433  if (stop.maxSeconds < 1e100 && (iter+1) % 1000 == 0 ){
434  double current_time = Timer::now();
435  if (current_time - start_time > stop.maxSeconds){
436  if (prop != NULL) prop->type = QpTimeout;
437  break;
438  }
439  }
440 
441  // select a working set and check for optimality
442  std::size_t i = 0, j = 0;
443  double acc = workingSet(m_problem,i, j);
444  if (acc < stop.minAccuracy) {
445  m_problem.unshrink();
446  if(m_problem.checkKKT() < stop.minAccuracy){
447  if (prop != NULL) prop->type = QpAccuracyReached;
448  break;
449  }
450  m_problem.shrink(stop.minAccuracy);
451  workingSet(m_problem,i,j);
452  workingSet.reset();
453  }
454 
455  //update smo with the selected working set
456  m_problem.updateSMO(i,j);
457 
458  //do a shrinking every 1000 iterations. if variables got shrink
459  //notify working set selection
460  if(shrinkCounter == 0 && m_problem.shrink(stop.minAccuracy)){
461  shrinkCounter = std::max<std::size_t>(1000,m_problem.dimensions());
462  workingSet.reset();
463  }
464  iter++;
465  shrinkCounter--;
466  }
467 
468  if (prop != NULL)
469  {
470  double finish_time = Timer::now();
471 
472  std::size_t i = 0, j = 0;
473  prop->accuracy = workingSet(m_problem,i, j);
474  prop->value = m_problem.functionValue();
475  prop->iterations = iter;
476  prop->seconds = finish_time - start_time;
477  }
478  }
479 
480 protected:
481  Problem& m_problem;
482 };
483 
484 }
485 #endif