SvmProblems.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author T. Glasmachers, O.Krause
7  * \date 2013
8  *
9  *
10  * \par Copyright 1995-2017 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://shark-ml.org/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef SHARK_ALGORITHMS_QP_SVMPROBLEMS_H
31 #define SHARK_ALGORITHMS_QP_SVMPROBLEMS_H
32 
34 
35 namespace shark{
36 
37 // Working-Set-Selection-Criteria are applied as follows:
38 // Criterium crit;
39 // value = crit(problem, i, j);
40 
41 
42 ///\brief Computes the most violating pair of the problem
43 ///
44 /// The MVP is defined as the set of indices (i,j)
45 /// such that g(i) - g(j) is maximal and alpha_i < max_j
46 /// and alpha_j > min_j.
48  /// \brief Select the most violating pair (MVP)
49  ///
50  /// \return maximal KKT violation
51  /// \param problem the SVM problem to select the working set for
52  /// \param i first working set component
53  /// \param j second working set component
54  template<class Problem>
55  double operator()(Problem& problem, std::size_t& i, std::size_t& j)
56  {
57  double largestUp = -1e100;
58  double smallestDown = 1e100;
59 
60  for (std::size_t a=0; a < problem.active(); a++)
61  {
62  double ga = problem.gradient(a);
63  if (!problem.isUpperBound(a))
64  {
65  if (ga > largestUp)
66  {
67  largestUp = ga;
68  i = a;
69  }
70  }
71  if (!problem.isLowerBound(a))
72  {
73  if (ga < smallestDown)
74  {
75  smallestDown = ga;
76  j = a;
77  }
78  }
79  }
80 
81  // MVP stopping condition
82  return largestUp - smallestDown;
83  }
84 
85  void reset(){}
86 };
87 
88 
89 ///\brief Computes the maximum gian solution
90 ///
91 /// The first index is chosen based on the maximum gradient (first index of MVP)
92 /// and the second index is chosen such that the step of the corresponding alphas
93 /// produces the largest gain.
95 
96  /// \brief Select a working set according to the second order algorithm of LIBSVM 2.8
97  ///
98  /// \return maximal KKT vioation
99  /// \param problem the svm problem to select the working set for
100  /// \param i first working set component
101  /// \param j second working set component
102  template<class Problem>
103  double operator()(Problem& problem, std::size_t& i, std::size_t& j)
104  {
105  i = 0;
106  j = 1;
107 
108  double smallestDown = 1e100;
109  double largestUp = -1e100;
110 
111  for (std::size_t a=0; a < problem.active(); a++)
112  {
113  double ga = problem.gradient(a);
114  //if (aa < problem.boxMax(a))
115  if (!problem.isUpperBound(a))
116  {
117  if (ga > largestUp)
118  {
119  largestUp = ga;
120  i = a;
121  }
122  }
123  }
124  if (largestUp == -1e100) return 0.0;
125 
126  // find the second index using second order information
127  typename Problem::QpFloatType* q = problem.quadratic().row(i, 0, problem.active());
128  double best = 0.0;
129  for (std::size_t a = 0; a < problem.active(); a++){
130  double ga = problem.gradient(a);
131  if (!problem.isLowerBound(a))
132  {
133  smallestDown=std::min(smallestDown,ga);
134 
135  double gain = detail::maximumGainQuadratic2DOnLine(
136  problem.diagonal(i),
137  problem.diagonal(a),
138  q[a],
139  largestUp,ga
140  );
141  if (gain> best)
142  {
143  best = gain;
144  j = a;
145  }
146  }
147  }
148 
149  if (best == 0.0) return 0.0; // numerical accuracy reached :(
150 
151  // MVP stopping condition
152  return largestUp - smallestDown;
153  }
154 
155  void reset(){}
156 };
157 
159 public:
160  HMGSelectionCriterion():useLibSVM(true),smallProblem(false){}
161 
162  /// \brief Select a working set according to the hybrid maximum gain (HMG) algorithm
163  ///
164  /// \return maximal KKT vioation
165  /// \param problem the svm problem to select the working set for
166  /// \param i first working set component
167  /// \param j second working set component
168  template<class Problem>
169  double operator()(Problem& problem, std::size_t& i, std::size_t& j)
170  {
171  if (smallProblem || useLibSVM || isInCorner(problem))
172  {
173  useLibSVM = false;
174  if(!smallProblem && sqr(problem.active()) < problem.quadratic().getMaxCacheSize())
175  smallProblem = true;
176  LibSVMSelectionCriterion libSVMSelection;
177  double value = libSVMSelection(problem,i, j);
178  last_i = i;
179  last_j = j;
180  return value;
181  }
182  //old HMG
183  MGStep besti = selectMGVariable(problem,last_i);
184  if(besti.violation == 0.0)
185  return 0;
186  MGStep bestj = selectMGVariable(problem,last_j);
187 
188  if(bestj.gain > besti.gain){
189  i = last_j;
190  j = bestj.index;
191  }else{
192  i = last_i;
193  j = besti.index;
194  }
195  if (problem.gradient(i) < problem.gradient(j))
196  std::swap(i, j);
197  last_i = i;
198  last_j = j;
199  return besti.violation;
200  }
201 
202  void reset(){
203  useLibSVM = true;
204  smallProblem = false;
205  }
206 
207 private:
208  template<class Problem>
209  bool isInCorner(Problem const& problem)const{
210  double Li = problem.boxMin(last_i);
211  double Ui = problem.boxMax(last_i);
212  double Lj = problem.boxMin(last_j);
213  double Uj = problem.boxMax(last_j);
214  double eps_i = 1e-8 * (Ui - Li);
215  double eps_j = 1e-8 * (Uj - Lj);
216 
217  if ((problem.alpha(last_i) <= Li + eps_i || problem.alpha(last_i) >= Ui - eps_i)
218  && ((problem.alpha(last_j) <= Lj + eps_j || problem.alpha(last_j) >= Uj - eps_j)))
219  {
220  return true;
221  }
222  return false;
223  }
224  struct MGStep{
225  std::size_t index;//index of variable
226  double violation;//computed gradientValue
227  double gain;
228  };
229  template<class Problem>
230  MGStep selectMGVariable(Problem& problem,std::size_t i) const{
231 
232  //best variable pair found so far
233  std::size_t bestIndex = 0;//index of variable
234  double bestGain = 0;
235 
236  double largestUp = -1e100;
237  double smallestDown = 1e100;
238 
239  // try combinations with b = old_i
240  typename Problem::QpFloatType* q = problem.quadratic().row(i, 0, problem.active());
241  double ab = problem.alpha(i);
242  double db = problem.diagonal(i);
243  double Lb = problem.boxMin(i);
244  double Ub = problem.boxMax(i);
245  double gb = problem.gradient(i);
246  for (std::size_t a = 0; a < problem.active(); a++)
247  {
248  double ga = problem.gradient(a);
249 
250  if (!problem.isUpperBound(a))
251  largestUp = std::max(largestUp,ga);
252  if (!problem.isLowerBound(a))
253  smallestDown = std::min(smallestDown,ga);
254 
255  if (a == i) continue;
256  //get maximum unconstrained step length
257  double denominator = (problem.diagonal(a) + db - 2.0 * q[a]);
258  double mu_max = (ga - gb) / denominator;
259 
260  //check whether a step > 0 is possible at all
261  //~ if( mu_max > 0 && ( problem.isUpperBound(a) || problem.isLowerBound(b)))continue;
262  //~ if( mu_max < 0 && ( problem.isLowerBound(a) || problem.isUpperBound(b)))continue;
263 
264  //constraint step to box
265  double aa = problem.alpha(a);
266  double La = problem.boxMin(a);
267  double Ua = problem.boxMax(a);
268  double mu_star = mu_max;
269  if (aa + mu_star < La) mu_star = La - aa;
270  else if (mu_star + aa > Ua) mu_star = Ua - aa;
271  if (ab - mu_star < Lb) mu_star = ab - Lb;
272  else if (ab - mu_star > Ub) mu_star = ab - Ub;
273 
274  double gain = mu_star * (2.0 * mu_max - mu_star) * denominator;
275 
276  // select the largest gain
277  if (gain > bestGain)
278  {
279  bestGain = gain;
280  bestIndex = a;
281  }
282  }
283  MGStep step;
284  step.violation= largestUp-smallestDown;
285  step.index = bestIndex;
286  step.gain=bestGain;
287  return step;
288 
289  }
290 
291  std::size_t last_i;
292  std::size_t last_j;
293 
294  bool useLibSVM;
295  bool smallProblem;
296 };
297 
298 
299 template<class Problem>
301 public:
302  typedef typename Problem::QpFloatType QpFloatType;
303  typedef typename Problem::MatrixType MatrixType;
305 
306  SvmProblem(Problem& problem)
307  : m_problem(problem)
308  , m_gradient(problem.linear)
309  , m_active(problem.dimensions())
310  , m_alphaStatus(problem.dimensions(),AlphaFree){
311  //compute the gradient if alpha != 0
312  for (std::size_t i=0; i != dimensions(); i++){
313  double v = alpha(i);
314  if (v != 0.0){
315  QpFloatType* q = quadratic().row(i, 0, dimensions());
316  for (std::size_t a=0; a < dimensions(); a++)
317  m_gradient(a) -= q[a] * v;
318  }
319  updateAlphaStatus(i);
320  }
321  }
322  std::size_t dimensions()const{
323  return m_problem.dimensions();
324  }
325 
326  std::size_t active()const{
327  return m_active;
328  }
329 
330  double boxMin(std::size_t i)const{
331  return m_alphaStatus[i]==AlphaDeactivated? alpha(i): m_problem.boxMin(i);
332  }
333  double boxMax(std::size_t i)const{
334  return m_alphaStatus[i]==AlphaDeactivated? alpha(i): m_problem.boxMax(i);
335  }
336  bool isLowerBound(std::size_t i)const{
337  return m_alphaStatus[i] & AlphaLowerBound;
338  }
339  bool isUpperBound(std::size_t i)const{
340  return m_alphaStatus[i] & AlphaUpperBound;
341  }
342 
343  /// representation of the quadratic part of the objective function
344  MatrixType& quadratic(){
345  return m_problem.quadratic;
346  }
347 
348  double linear(std::size_t i)const{
349  return m_problem.linear(i);
350  }
351 
352  double alpha(std::size_t i)const{
353  return m_problem.alpha(i);
354  }
355 
356  double diagonal(std::size_t i)const{
357  return m_problem.diagonal(i);
358  }
359 
360  double gradient(std::size_t i)const{
361  return m_gradient(i);
362  }
363 
364  std::size_t permutation(std::size_t i)const{
365  return m_problem.permutation[i];
366  }
367 
368  RealVector getUnpermutedAlpha()const{
369  RealVector alpha(dimensions());
370  for (std::size_t i=0; i<dimensions(); i++)
371  alpha(m_problem.permutation[i]) = m_problem.alpha(i);
372  return alpha;
373  }
374 
375  ///\brief Does an update of SMO given a working set with indices i and j.
376  void updateSMO(std::size_t i, std::size_t j){
377  SIZE_CHECK(i < active());
378  SIZE_CHECK(j < active());
379  // get the matrix row corresponding to the first variable of the working set
380  QpFloatType* qi = quadratic().row(i, 0, active());
381 
382  // solve the sub-problem defined by i and j
383  double numerator = gradient(i) - gradient(j);
384  double denominator = diagonal(i) + diagonal(j) - 2.0 * qi[j];
385  denominator = std::max(denominator,1.e-12);
386  double step = numerator/denominator;
387 
388  //update alpha in a numerically stable way
389  // do the update of the alpha values carefully - avoid numerical problems
390  double Ui = boxMax(i);
391  double Lj = boxMin(j);
392  double aiOld = m_problem.alpha(i);
393  double ajOld = m_problem.alpha(j);
394  double& ai = m_problem.alpha(i);
395  double& aj = m_problem.alpha(j);
396  if (step >= std::min(Ui - ai, aj - Lj))
397  {
398  if (Ui - ai > aj - Lj)
399  {
400  step = aj - Lj;
401  ai += step;
402  aj = Lj;
403  }
404  else if (Ui - ai < aj - Lj)
405  {
406  step = Ui - ai;
407  ai = Ui;
408  aj -= step;
409  }
410  else
411  {
412  step = Ui - ai;
413  ai = Ui;
414  aj = Lj;
415  }
416  }
417  else
418  {
419  ai += step;
420  aj -= step;
421  }
422 
423  if(ai == aiOld && aj == ajOld)return;
424 
425  //Update internal data structures (gradient and alpha status)
426  QpFloatType* qj = quadratic().row(j, 0, active());
427  for (std::size_t a = 0; a < active(); a++)
428  m_gradient(a) -= step * qi[a] - step * qj[a];
429 
430  //update boundary status
431  updateAlphaStatus(i);
432  updateAlphaStatus(j);
433  }
434 
435  ///\brief Returns the current function value of the problem.
436  double functionValue()const{
437  return 0.5*inner_prod(m_gradient+m_problem.linear,m_problem.alpha);
438  }
439 
440  bool shrink(double){return false;}
441  void reshrink(){}
442  void unshrink(){}
443 
444  /// \brief Define the initial solution for the iterative solver.
445  ///
446  /// This method can be used to warm-start the solver. It requires a
447  /// feasible solution (alpha) and the corresponding gradient of the
448  /// dual objective function.
449  void setInitialSolution(RealVector const& alpha, RealVector const& gradient)
450  {
451  std::size_t n = dimensions();
452  SIZE_CHECK(alpha.size() == n);
453  SIZE_CHECK(gradient.size() == n);
454  for (std::size_t i=0; i<n; i++)
455  {
456  std::size_t j = permutation(i);
457  SHARK_ASSERT(alpha(j) >= boxMin(j) && alpha(j) <= boxMax(j));
458  m_problem.alpha(i) = alpha(j);
459  m_gradient(i) = gradient(j);
460  updateAlphaStatus(i);
461  }
462  }
463 
464  /// \brief Define the initial solution for the iterative solver.
465  ///
466  /// This method can be used to warm-start the solver. It requires a
467  /// feasible solution (alpha), for which it computes the gradient of
468  /// the dual objective function. Note that this is a quadratic time
469  /// operation in the number of non-zero coefficients.
470  void setInitialSolution(RealVector const& alpha)
471  {
472  std::size_t n = dimensions();
473  SIZE_CHECK(alpha.size() == n);
474  RealVector gradient = m_problem.linear;
475  blas::vector<QpFloatType> q(n);
476  for (std::size_t i=0; i<n; i++)
477  {
478  double a = alpha(i);
479  if (a == 0.0) continue;
480  m_problem.quadratic.row(i, 0, n, q.storage());
481  noalias(gradient) -= a * q;
482  }
483  setInitialSolution(alpha, gradient);
484  }
485 
486  ///\brief Remove the i-th example from the problem while taking the equality constraint into account.
487  ///
488  /// The i-th element is first set to zero and as well as an unspecified set corrected so
489  /// that the constraint is fulfilled.
490  void deactivateVariable(std::size_t i){
491  SIZE_CHECK(i < dimensions());
492  //we need to correct for the equality constraint
493  //that means we have to move enough variables to satisfy the constraint again.
494  for (std::size_t j=0; j<dimensions(); j++){
495  if (j == i || m_alphaStatus[j] == AlphaDeactivated) continue;
496  //propose the maximum step possible and let applyStep cut it down.
497  applyStep(i,j, -alpha(i));
498  if(alpha(i) == 0.0) break;
499  }
500  m_alphaStatus[i] = AlphaDeactivated;
501  }
502  ///\brief Reactivate an previously deactivated variable.
503  void activateVariable(std::size_t i){
504  SIZE_CHECK(i < dimensions());
505  m_alphaStatus[i] = AlphaFree;
506  updateAlphaStatus(i);
507  }
508 
509  /// exchange two variables via the permutation
510  void flipCoordinates(std::size_t i, std::size_t j)
511  {
512  SIZE_CHECK(i < dimensions());
513  SIZE_CHECK(j < dimensions());
514  if (i == j) return;
515 
516  m_problem.flipCoordinates(i, j);
517  std::swap( m_gradient[i], m_gradient[j]);
518  std::swap( m_alphaStatus[i], m_alphaStatus[j]);
519  }
520 
521  /// \brief Scales all box constraints by a constant factor and adapts the solution using a separate scaling
522  void scaleBoxConstraints(double factor, double variableScalingFactor){
523  m_problem.scaleBoxConstraints(factor,variableScalingFactor);
524  for(std::size_t i = 0; i != this->dimensions(); ++i){
525  //don't change deactivated variables
526  if(m_alphaStatus[i] == AlphaDeactivated) continue;
527  m_gradient(i) -= linear(i);
528  m_gradient(i) *= variableScalingFactor;
529  m_gradient(i) += linear(i);
530  updateAlphaStatus(i);
531  }
532  }
533 
534  /// \brief adapts the linear part of the problem and updates the internal data structures accordingly.
535  virtual void setLinear(std::size_t i, double newValue){
536  m_gradient(i) -= linear(i);
537  m_gradient(i) += newValue;
538  m_problem.linear(i) = newValue;
539  }
540 
541  double checkKKT()const{
542  double largestUp = -1e100;
543  double smallestDown = 1e100;
544  for (std::size_t a = 0; a < active(); a++){
545  if (!isLowerBound(a))
546  smallestDown = std::min(smallestDown,gradient(a));
547  if (!isUpperBound(a))
548  largestUp = std::max(largestUp,gradient(a));
549  }
550  return largestUp - smallestDown;
551  }
552 
553 protected:
554  Problem m_problem;
555 
556  /// gradient of the objective function at the current alpha
557  RealVector m_gradient;
558 
559  std::size_t m_active;
560 
561  /// \brief Stores the status, whther alpha is on the lower or upper bound, or whether it is free.
562  std::vector<char> m_alphaStatus;
563 
564  ///\brief Update the problem by a proposed step i taking the box constraints into account.
565  ///
566  /// A step length 0<=lambda<=1 is found so that
567  /// boxMin(i) <= alpha(i)+lambda*step <= boxMax(i)
568  /// and
569  /// boxMin(j) <= alpha(j)-lambda*step <= boxMax(j)
570  /// the update is performed in a numerically stable way and the internal data structures
571  /// are also updated.
572  virtual void applyStep(std::size_t i, std::size_t j, double step){
573  SIZE_CHECK(i < active());
574  SIZE_CHECK(j < active());
575  // do the update of the alpha values carefully - avoid numerical problems
576  double Ui = boxMax(i);
577  double Lj = boxMin(j);
578  double aiOld = m_problem.alpha(i);
579  double ajOld = m_problem.alpha(j);
580  double& ai = m_problem.alpha(i);
581  double& aj = m_problem.alpha(j);
582  if (step >= std::min(Ui - ai, aj - Lj))
583  {
584  if (Ui - ai > aj - Lj)
585  {
586  step = aj - Lj;
587  ai += step;
588  aj = Lj;
589  }
590  else if (Ui - ai < aj - Lj)
591  {
592  step = Ui - ai;
593  ai = Ui;
594  aj -= step;
595  }
596  else
597  {
598  step = Ui - ai;
599  ai = Ui;
600  aj = Lj;
601  }
602  }
603  else
604  {
605  ai += step;
606  aj -= step;
607  }
608 
609  if(ai == aiOld && aj == ajOld)return;
610 
611  //Update internal data structures (gradient and alpha status)
612  QpFloatType* qi = quadratic().row(i, 0, active());
613  QpFloatType* qj = quadratic().row(j, 0, active());
614  for (std::size_t a = 0; a < active(); a++)
615  m_gradient(a) -= step * qi[a] - step * qj[a];
616 
617  //update boundary status
618  updateAlphaStatus(i);
619  updateAlphaStatus(j);
620  }
621 
622 
623  void updateAlphaStatus(std::size_t i){
624  SIZE_CHECK(i < dimensions());
625  m_alphaStatus[i] = AlphaFree;
626  if(m_problem.alpha(i) == boxMax(i))
627  m_alphaStatus[i] |= AlphaUpperBound;
628  if(m_problem.alpha(i) == boxMin(i))
629  m_alphaStatus[i] |= AlphaLowerBound;
630  }
631 
632  bool testShrinkVariable(std::size_t a, double largestUp, double smallestDown)const{
633  if (
634  ( isLowerBound(a) && gradient(a) < smallestDown)
635  || ( isUpperBound(a) && gradient(a) >largestUp)
636  ){
637  // In this moment no feasible step including this variable
638  // can improve the objective. Thus deactivate the variable.
639  return true;
640  }
641  return false;
642  }
643 };
644 
645 template<class Problem>
646 class SvmShrinkingProblem: public BoxBasedShrinkingStrategy<SvmProblem<Problem> >{
647 public:
648  SvmShrinkingProblem(Problem& problem, bool shrink=true)
649  :BoxBasedShrinkingStrategy<SvmProblem<Problem> >(problem,shrink){}
650 };
651 
652 }
653 #endif