QpMcSimplexDecomp.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Quadratic programming problem for multi-class SVMs
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date 2007-2012
12  *
13  *
14  * \par Copyright 1995-2017 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://shark-ml.org/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_QP_QPMCSIMPLEXDECOMP_H
38 #define SHARK_ALGORITHMS_QP_QPMCSIMPLEXDECOMP_H
39 
42 #include <shark/Algorithms/QP/Impl/AnalyticProblems.h>
43 #include <shark/Core/Timer.h>
44 #include <shark/Data/Dataset.h>
45 
46 
47 namespace shark {
48 
49 template <class Matrix>
51 {
52 public:
53  typedef typename Matrix::QpFloatType QpFloatType;
54  /// \brief Working set selection eturning th S2DO working set
55  ///
56  /// This selection operator picks the first variable by maximum gradient,
57  /// the second by maximum unconstrained gain.
59  template<class Problem>
60  double operator()(Problem& problem, std::size_t& i, std::size_t& j){
61  //todo move implementation here
62  return problem.selectWorkingSet(i,j);
63  }
64 
65  void reset(){}
66  };
67 
68  /// Constructor
69  /// \param kernel kernel matrix - cache or pre-computed matrix
70  /// \param M kernel modifiers in the format \f$ M_(y_i, p, y_j, q) = _M(classes*(y_i*|P|+p_i)+y_j, q) \f$
71  /// \param target the target labels for the variables
72  /// \param linearMat the linear part of the problem
73  /// \param C upper bound for all box variables, lower bound is 0.
75  Matrix& kernel,
77  Data<unsigned int> const& target,
78  RealMatrix const& linearMat,
79  double C
80  )
81  : m_kernelMatrix(kernel)
82  , m_M(M)
83  , m_C(C)
84  , m_classes(numberOfClasses(target))
85  , m_cardP(linearMat.size2())
86  , m_numExamples(kernel.size())
89  , m_alpha(m_numVariables,0.0)
95  , m_useShrinking(true){
96  SHARK_RUNTIME_CHECK(target.numberOfElements() == kernel.size(), "size of kernel matrix and target vector do not agree");
97  SHARK_RUNTIME_CHECK(kernel.size() == linearMat.size1(), "size of kernel matrix and linear factor to not agree");
98 
99  // prepare problem internal variables
102  for (std::size_t v=0, i=0; i<m_numExamples; i++)
103  {
104  unsigned int y = target.element(i);
105  m_examples[i].index = i;
106  m_examples[i].y = y;
107  m_examples[i].active = m_cardP;
108  m_examples[i].var = &m_storage1[m_cardP * i];
109  m_examples[i].avar = &m_storage2[m_cardP * i];
110  m_examples[i].varsum = 0;
111  double k = m_kernelMatrix.entry(i, i);
112  m_examples[i].diagonal = k;
113  for (std::size_t p=0; p<m_cardP; p++, v++)
114  {
115  m_variables[v].example = i;
116  m_variables[v].p = p;
117  m_variables[v].index = p;
118  double Q = m_M(m_classes * (y * m_cardP + p) + y, p) * k;
119  m_variables[v].diagonal = Q;
120  m_storage1[v] = v;
121  m_storage2[v] = v;
122 
123  m_linear(v) = m_gradient(v) = linearMat(i,p);
124  }
125  }
126  // initialize unshrinking to make valgrind happy.
127  bUnshrinked = false;
128  }
129 
130  /// enable/disable shrinking
131  void setShrinking(bool shrinking = true)
132  {
133  m_useShrinking = shrinking;
134  }
135 
136  /// \brief Returns the solution found.
137  RealMatrix solution() const{
138  RealMatrix solutionMatrix(m_numVariables,m_cardP,0);
139  for (std::size_t v=0; v<m_numVariables; v++)
140  {
141  solutionMatrix(originalIndex(v),m_variables[v].p) = m_alpha(v);
142  }
143  return solutionMatrix;
144  }
145  /// \brief Returns the gradient of the solution.
146  RealMatrix solutionGradient() const{
147  RealMatrix solutionGradientMatrix(m_numVariables,m_cardP,0);
148  for (std::size_t v=0; v<m_numVariables; v++)
149  {
150  solutionGradientMatrix(originalIndex(v),m_variables[v].p) = m_gradient(v);
151  }
152  return solutionGradientMatrix;
153  }
154 
155  /// \brief Compute the objective value of the current solution.
156  double functionValue()const{
157  return 0.5*inner_prod(m_gradient+m_linear,m_alpha);
158  }
159 
160  unsigned int label(std::size_t i){
161  return m_examples[i].y;
162  }
163 
164  std::size_t dimensions()const{
165  return m_numVariables;
166  }
167  std::size_t cardP()const{
168  return m_cardP;
169  }
170 
171  std::size_t getNumExamples()const{
172  return m_numExamples;
173  }
174 
175  /// \brief change the linear part of the problem by some delta
176  void addDeltaLinear(RealMatrix const& deltaLinear){
177  SIZE_CHECK(deltaLinear.size1() == m_numExamples);
178  SIZE_CHECK(deltaLinear.size2() == m_cardP);
179  for (std::size_t v=0; v<m_numVariables; v++)
180  {
181  std::size_t p = m_variables[v].p;
182  m_gradient(v) += deltaLinear(originalIndex(v),p);
183  m_linear(v) += deltaLinear(originalIndex(v),p);
184  }
185  }
186 
187  void updateSMO(std::size_t v, std::size_t w){
188  SIZE_CHECK(v < m_activeVar);
189  SIZE_CHECK(w < m_activeVar);
190  // update
191  if (v == w)
192  {
193  // Limit case of a single variable;
194  std::size_t i = m_variables[v].example;
196  std::size_t p = m_variables[v].p;
197  unsigned int y = m_examples[i].y;
198  std::size_t r = m_cardP * y + p;
199  double& varsum = m_examples[i].varsum;
200  //the upper bound depends on the values of the variables of the other classes.
201  double upperBound = m_C-varsum+m_alpha(v);
202 
203  QpFloatType* q = m_kernelMatrix.row(i, 0, m_activeEx);
204  double Qvv = m_variables[v].diagonal;
205  double mu = -m_alpha(v);
206  detail::solveQuadraticEdge(m_alpha(v),m_gradient(v),Qvv,0,upperBound);
207  mu += m_alpha(v);
208  updateVarsum(i,mu);
209  gradientUpdate(r, mu, q);
210  }
211  else
212  {
213  // S2DO
214  std::size_t iv = m_variables[v].example;
215  SHARK_ASSERT(iv < m_activeEx);
216  std::size_t pv = m_variables[v].p;
217  unsigned int yv = m_examples[iv].y;
218  double& varsumv = m_examples[iv].varsum;
219 
220  std::size_t iw = m_variables[w].example;
221  SHARK_ASSERT(iw < m_activeEx);
222  std::size_t pw = m_variables[w].p;
223  unsigned int yw = m_examples[iw].y;
224  double& varsumw = m_examples[iw].varsum;
225 
226  // get the matrix rows corresponding to the working set
227  QpFloatType* qv = m_kernelMatrix.row(iv, 0, m_activeEx);
228  QpFloatType* qw = m_kernelMatrix.row(iw, 0, m_activeEx);
229  std::size_t rv = m_cardP*yv+pv;
230  std::size_t rw = m_cardP*yw+pw;
231 
232  // get the Q-matrix restricted to the working set
233  double Qvv = m_variables[v].diagonal;
234  double Qww = m_variables[w].diagonal;
235  double Qvw = m_M(m_classes * rv + yw, pw) * qv[iw];
236 
237  //same sample - simplex case
238  double mu_v = -m_alpha(v);
239  double mu_w = -m_alpha(w);
240  if(iv == iw){
241 
242  double upperBound = m_C-varsumv+m_alpha(v)+m_alpha(w);
243  // solve the sub-problem and update the gradient using the step sizes mu
244  detail::solveQuadratic2DTriangle(m_alpha(v), m_alpha(w),
245  m_gradient(v), m_gradient(w),
246  Qvv, Qvw, Qww,
247  upperBound
248  );
249  mu_v += m_alpha(v);
250  mu_w += m_alpha(w);
251  updateVarsum(iv,mu_v+mu_w);
252  }
253  else{
254  double Uv = m_C-varsumv+m_alpha(v);
255  double Uw = m_C-varsumw+m_alpha(w);
256  // solve the sub-problem and update the gradient using the step sizes mu
257  detail::solveQuadratic2DBox(m_alpha(v), m_alpha(w),
258  m_gradient(v), m_gradient(w),
259  Qvv, Qvw, Qww,
260  0, Uv, 0, Uw
261  );
262  mu_v += m_alpha(v);
263  mu_w += m_alpha(w);
264  updateVarsum(iv,mu_v);
265  updateVarsum(iw,mu_w);
266  }
267 
268  double varsumvo = 0;
269  for(std::size_t p = 0; p != m_cardP; ++p){
270  std::size_t varIndex = m_examples[iv].var[p];
271  varsumvo += m_alpha[varIndex];
272  }
273  double varsumwo = 0;
274  for(std::size_t p = 0; p != m_cardP; ++p){
275  std::size_t varIndex = m_examples[iw].var[p];
276  varsumwo += m_alpha[varIndex];
277  }
278  gradientUpdate(rv, mu_v, qv);
279  gradientUpdate(rw, mu_w, qw);
280  }
281  }
282 
283  /// Shrink the problem
284  bool shrink(double epsilon)
285  {
286  if(! m_useShrinking)
287  return false;
288  if (! bUnshrinked)
289  {
290  if (checkKKT() < 10.0 * epsilon)
291  {
292  // unshrink the problem at this accuracy level
293  unshrink();
294  bUnshrinked = true;
295  }
296  }
297 
298  //iterate through all simplices.
299  for (std::size_t i = m_activeEx; i > 0; i--){
300  Example const& ex = m_examples[i-1];
301  std::pair<std::pair<double,std::size_t>,std::pair<double,std::size_t> > pair = getSimplexMVP(ex);
302  double up = pair.first.first;
303  double down = pair.second.first;
304 
305  //check the simplex for possible search directions
306  //case 1: simplex is bounded and stays at the bound, in this case proceed as in MVP
307  if(down > 0 && ex.varsum == m_C && up-down > 0){
308  int pc = (int)ex.active;
309  for(int p = pc-1; p >= 0; --p){
310  double a = m_alpha(ex.avar[p]);
311  double g = m_gradient(ex.avar[p]);
312  //if we can't do a step along the simplex, we can shrink the variable.
313  if(a == 0 && g-down < 0){
314  deactivateVariable(ex.avar[p]);
315  }
316  else if (a == m_C && up-g < 0){
317  //shrinking this variable means, that the whole simplex can't move,
318  //so shrink every variable, even the ones that previously couldn't
319  //be shrinked
320  for(int q = (int)ex.active; q >= 0; --q){
321  deactivateVariable(ex.avar[q]);
322  }
323  p = 0;
324  }
325  }
326  }
327  //case 2: all variables are zero and pushed against the boundary
328  // -> shrink the example
329  else if(ex.varsum == 0 && up < 0){
330  int pc = (int)ex.active;
331  for(int p = pc-1; p >= 0; --p){
332  deactivateVariable(ex.avar[p]);
333  }
334  }
335  //case 3: the simplex is not bounded and there are free variables.
336  //in this case we currently do not shrink
337  //as a variable might get bounded at some point which means that all variables
338  //can be important again.
339  //else{
340  //}
341 
342  }
343 // std::cout<<"shrunk. remaining: "<<m_activeEx<<","<<m_activeVar<<std::endl;
344  return true;
345  }
346 
347  /// Activate all m_numVariables
348  void unshrink()
349  {
350  if (m_activeVar == m_numVariables) return;
351 
352  // compute the inactive m_gradient components (quadratic time complexity)
354  for (std::size_t v = 0; v != m_numVariables; v++)
355  {
356  double mu = m_alpha(v);
357  if (mu == 0.0) continue;
358 
359  std::size_t iv = m_variables[v].example;
360  std::size_t pv = m_variables[v].p;
361  unsigned int yv = m_examples[iv].y;
362  std::size_t r = m_cardP * yv + pv;
363  std::vector<QpFloatType> q(m_numExamples);
364  m_kernelMatrix.row(iv, 0, m_numExamples, &q[0]);
365 
366  for (std::size_t a = 0; a != m_numExamples; a++)
367  {
368  double k = q[a];
369  Example& ex = m_examples[a];
370  typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * r + ex.y);
371  QpFloatType def = row.defaultvalue;
372  for (std::size_t b=0; b<row.size; b++)
373  {
374  std::size_t f = ex.var[row.entry[b].index];
375  if (f >= m_activeVar)
376  m_gradient(f) -= mu * (row.entry[b].value - def) * k;
377  }
378  if (def != 0.0)
379  {
380  double upd = mu * def * k;
381  for (std::size_t b=ex.active; b<m_cardP; b++)
382  {
383  std::size_t f = ex.avar[b];
385  m_gradient(f) -= upd;
386  }
387  }
388  }
389  }
390 
391  for (std::size_t i=0; i<m_numExamples; i++)
392  m_examples[i].active = m_cardP;
395  }
396 
397  ///
398  /// \brief select the working set
399  ///
400  /// Select one or two numVariables for the sub-problem
401  /// and return the maximal KKT violation. The method
402  /// MAY select the same index for i and j. In that
403  /// case the working set consists of a single variables.
404  /// The working set may be invalid if the method reports
405  /// a KKT violation of zero, indicating optimality.
406  double selectWorkingSet(std::size_t& i, std::size_t& j)
407  {
408 
409  //first order selection
410  //we select the best variable along the box constraint (for a step between samples)
411  //and the best gradient and example index for a step along the simplex (step inside a sample)
412  double maxGradient = 0;//max gradient for variables between samples (box constraints)
413  double maxSimplexGradient = 0;//max gradient along the simplex constraints
414  std::size_t maxSimplexExample = 0;//example with the maximum simplex constraint
415  i = j = 0;
416  // first order selection
417  for (std::size_t e=0; e< m_activeEx; e++)
418  {
419  Example& ex = m_examples[e];
420  bool canGrow = ex.varsum < m_C;
421 
422  //calculate the maximum violationg pair for the example.
423  std::pair<std::pair<double,std::size_t>,std::pair<double,std::size_t> > pair = getSimplexMVP(ex);
424  double up = pair.first.first;
425  double down = pair.second.first;
426 
427  if(!canGrow && up - down > maxSimplexGradient){
428  maxSimplexGradient = up-down;
429  maxSimplexExample = e;
430  }
431  if (canGrow && up > maxGradient) {
432  maxGradient = up;
433  i = pair.first.second;
434  }
435  if (-down > maxGradient) {
436  maxGradient = -down;
437  i = pair.second.second;
438  }
439  }
440 
441  //find the best possible partner of the variable
442  //by searching every other sample
443  std::pair<std::pair<std::size_t,std::size_t> ,double > boxPair = maxGainBox(i);
444  double bestGain = boxPair.second;
445  std::pair<std::size_t, std::size_t> best = boxPair.first;
446 
447  //always search the simplex of the variable
448  std::pair<std::pair<std::size_t,std::size_t> ,double > simplexPairi = maxGainSimplex(m_variables[i].example);
449  if(simplexPairi.second > bestGain){
450  best = simplexPairi.first;
451  bestGain = simplexPairi.second;
452  }
453  //finally search also in the best simplex
454  if(maxSimplexGradient > 0){
455  std::pair<std::pair<std::size_t,std::size_t> ,double > simplexPairBest = maxGainSimplex(maxSimplexExample);
456  if(simplexPairBest.second > bestGain){
457  best = simplexPairBest.first;
458  bestGain = simplexPairBest.second;
459  }
460  }
461  i = best.first;
462  j = best.second;
463  //return the mvp gradient
464  return std::max(maxGradient,maxSimplexGradient);
465  }
466 
467  /// return the largest KKT violation
468  double checkKKT()const
469  {
470  double ret = 0.0;
471  for (std::size_t i=0; i<m_activeEx; i++){
472  Example const& ex = m_examples[i];
473  std::pair<std::pair<double,std::size_t>,std::pair<double,std::size_t> > pair = getSimplexMVP(ex);
474  double up = pair.first.first;
475  double down = pair.second.first;
476 
477  //check all search directions
478  //case 1: decreasing the value of a variable
479  ret = std::max(-down, ret);
480  //case 2: if we are not at the boundary increasing the variable
481  if(ex.varsum < m_C)
482  ret = std::max(up, ret);
483  //case 3: along the plane \sum_i alpha_i = m_C
484  if(ex.varsum == m_C)
485  ret = std::max(up-down, ret);
486  }
487  return ret;
488  }
489 
490 protected:
491 
492  /// data structure describing one variable of the problem
493  struct Variable
494  {
495  ///index into the example list
496  std::size_t example;
497  /// constraint corresponding to this variable
498  std::size_t p;
499  /// index into example->m_numVariables
500  std::size_t index;
501  /// diagonal entry of the big Q-matrix
502  double diagonal;
503  };
504 
505  /// data structure describing one training example
506  struct Example
507  {
508  /// example index in the dataset, not the example vector!
509  std::size_t index;
510  /// label of this example
511  unsigned int y;
512  /// number of active variables
513  std::size_t active;
514  /// list of all m_cardP variables, in order of the p-index
515  std::size_t* var;
516  /// list of active variables
517  std::size_t* avar;
518  /// total sum of all variables of this example
519  double varsum;
520  /// diagonal entry of the kernel matrix k(x,x);
521  double diagonal;
522  };
523 
524  /// \brief Finds the second variable of a working set using maximum gain and returns the pair and gain
525  ///
526  /// The variable is searched in-between samples. And not inside the simplex of i.
527  /// It returns the best pair (i,j) as well as the gain. If the first variable
528  /// can't make a step, gain 0 is returned with pair(i,i).
529  std::pair<std::pair<std::size_t,std::size_t>,double> maxGainBox(std::size_t i)const{
530  std::size_t e = m_variables[i].example;
532  std::size_t pi = m_variables[i].p;
533  unsigned int yi = m_examples[e].y;
534  double Qii = m_variables[i].diagonal;
535  double gi = m_gradient(i);
536  if(m_examples[e].varsum == m_C && gi > 0)
537  return std::make_pair(std::make_pair(i,i),0.0);
538 
539 
540  QpFloatType* k = m_kernelMatrix.row(e, 0, m_activeEx);
541 
542  std::size_t bestj = i;
543  double bestGain = gi * gi / Qii;
544 
545  for (std::size_t a=0; a<m_activeEx; a++)
546  {
547  //don't search the simplex of the first variable
548  if(a == e) continue;
549 
550  Example const& exa = m_examples[a];
551  unsigned int ya = exa.y;
552  bool canGrow = exa.varsum != m_C;
553 
554  typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * (yi * m_cardP + pi) + ya);
555  QpFloatType def = row.defaultvalue;
556 
557  for (std::size_t p=0, b=0; p < m_cardP; p++)
558  {
559  std::size_t j = exa.var[p];
560 
561  double Qjj = m_variables[j].diagonal;
562  double gj = m_gradient(j);
563  double Qij = def * k[a];
564  //check whether we are at an existing element of the sparse row
565  if( b != row.size && p == row.entry[b].index){
566  Qij = row.entry[b].value * k[a];
567  ++b;//move to next element
568  }
569 
570  //don't check variables which are shrinked or bounded
571  if(j >= m_activeVar || (m_alpha(j) == 0 && gj <= 0)|| (!canGrow && gj >= 0))
572  continue;
573 
574  double gain = detail::maximumGainQuadratic2D(Qii, Qjj, Qij, gi,gj);
575  if( bestGain < gain){
576  bestj = j;
577  bestGain = gain;
578  }
579  }
580  }
581  return std::make_pair(std::make_pair(i,bestj),bestGain);
582  }
583 
584  ///\brief Returns the best variable pair (i,j) and gain for a given example.
585  ///
586  /// For a given example all possible pairs of variables are checkd and the one giving
587  /// the maximum gain is returned. This method has a special handling for the
588  /// simplex case.
589  std::pair<std::pair<std::size_t,std::size_t>,double> maxGainSimplex(std::size_t e)const{
590  Example const& ex = m_examples[e];
591  std::size_t pc = ex.active;//number of active variables for this example
592  unsigned int y = ex.y;//label of the example
593  bool canGrow = ex.varsum < m_C; //are we inside the simplex?
594  double Qee = m_examples[e].diagonal; //kernel entry of the example
595 
596  double bestGain = -1e100;
597  std::size_t besti = 0;
598  std::size_t bestj = 0;
599 
600  //search through all possible variable pairs
601  //for every pair we will build the quadratic subproblem
602  //and than decide whether we can do
603  // 1.a valid step in the inside of the simplex
604  // that is canGrow==true or the gradients of both variables point inside
605  // 2. a valid step along the simplex constraint,
606  // that is cangrow == true and both variables point outside)
607  // 3. a 1-D step
608  // that is canGrow == true or alpha(i) > 0 & gradient(i) < 0
609 
610  //iterate over the active ones as the first variable
611  for(std::size_t p1 = 0; p1 != pc; ++p1){
612  std::size_t i = ex.avar[p1];
613  double gi = m_gradient(i);
614  double ai = m_alpha(i);
615  double Qii = m_variables[i].diagonal;
616 
617  //check whether a 1-D gain is possible
618  if((gi < 0 && m_alpha(i) > 0.0) || (gi > 0 && canGrow)){
619  double gain = gi * gi / Qii;
620  if(gain > bestGain){
621  bestGain= gain;
622  besti = i;
623  bestj = i;
624  }
625  }
626 
627  //now create the 2D problem for all possible variable pairs
628  //and than check for possible gain steps
629  //find first the row of coefficients for M(y,y,i,j) for all j
630  //question: is p1 == m_variables[ex.avar[p1]].p?
631  //otherwise: is p1 == m_variables[ex.var[p1]].p for *all* variables?
632  typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * (y * m_cardP + m_variables[i].p) + y);
633  QpFloatType def = row.defaultvalue;
634 
635  //we need to iterate over all vars instead of only the active variables to be in sync with the matrix row
636  //we will still overstep all inactive variables
637  for(std::size_t p2 = 0, b=0; p2 != m_cardP; ++p2){
638  std::size_t j = ex.var[p2];
639  double gj = m_gradient(j);
640  double aj = m_alpha(j);
641  double Qjj = m_variables[j].diagonal;
642 
643  //create the offdiagonal element of the 2D problem
644  double Qij = def * Qee;
645  //check whether we are at an existing element of the sparse row
646  if( b != row.size && p2 == row.entry[b].index){
647  Qij = row.entry[b].value * Qee;
648  ++b;//move to next element
649  }
650 
651  //ignore inactive variables or variables we already checked
652  if(j >= m_activeVar || j <= i ){
653  continue;
654  }
655 
656  double gain = 0;
657  //check whether we can move along the simplex constraint
658  if(!canGrow && gi > 0 && gj > 0){
659  double gainUp = 0;
660  double gainDown = 0;
661  //we need to check both search directions for ai
662  if(aj > 0 && gi-gj > 0){
663  gainUp = detail::maximumGainQuadratic2DOnLine(Qii, Qjj, Qij, gi,gj);
664  }
665  //also check whether a line search in the other direction works
666  if (ai > 0 &&gj-gi > 0){
667  gainDown = detail::maximumGainQuadratic2DOnLine(Qjj, Qii, Qij, gj,gi);
668  }
669  gain = std::max(gainUp,gainDown);
670  }
671  //else we are inside the simplex
672  //in this case only check that both variables can shrink if needed
673  else if(!(gi <= 0 && ai == 0) && !(gj<= 0 && aj == 0)){
674  gain = detail::maximumGainQuadratic2D(Qii, Qjj, Qij, gi,gj);
675  }
676 
677  //accept only maximum gain
678  if(gain > bestGain){
679  bestGain= gain;
680  besti = i;
681  bestj = j;
682  }
683 
684  }
685  }
686  //return best pair and possible gain
687  return std::make_pair(std::make_pair(besti,bestj),bestGain);
688  }
689 
690  /// \brief For a given simplex returns the MVP indicies (max_up,min_down)
691  std::pair<
692  std::pair<double,std::size_t>,
693  std::pair<double,std::size_t>
694  > getSimplexMVP(Example const& ex)const{
695  std::size_t pc = ex.active;
696  double up = -1e100;
697  double down = 1e100;
698  std::size_t maxUp = ex.avar[0];
699  std::size_t minDown = ex.avar[0];
700  for (std::size_t p = 0; p != pc; p++)
701  {
702  std::size_t v = ex.avar[p];
704  double a = m_alpha(v);
705  double g = m_gradient(v);
706  if (g > up) {
707  maxUp = v;
708  up = g;
709  }
710  if (a > 0.0 && g < down){
711  minDown = v;
712  down = g;
713  }
714  }
715  return std::make_pair(std::make_pair(up,maxUp),std::make_pair(down,minDown));
716  }
717 
718  void updateVarsum(std::size_t exampleId, double mu){
719  double& varsum = m_examples[exampleId].varsum;
720  varsum += mu;
721  if(varsum > 1.e-12 && m_C-varsum > 1.e-12*m_C)
722  return;
723  //recompute for numerical accuracy
724 
725  varsum = 0;
726  for(std::size_t p = 0; p != m_cardP; ++p){
727  std::size_t varIndex = m_examples[exampleId].var[p];
728  varsum += m_alpha[varIndex];
729  }
730 
731  if(varsum < 1.e-14)
732  varsum = 0;
733  if(m_C-varsum < 1.e-14*m_C)
734  varsum = m_C;
735  }
736 
737  void gradientUpdate(std::size_t r, double mu, QpFloatType* q)
738  {
739  for ( std::size_t a= 0; a< m_activeEx; a++)
740  {
741  double k = q[a];
742  Example& ex = m_examples[a];
743  typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * r + ex.y);
744  QpFloatType def = row.defaultvalue;
745  for (std::size_t b=0; b<row.size; b++){
746  std::size_t p = row.entry[b].index;
747  m_gradient(ex.var[p]) -= mu * (row.entry[b].value - def) * k;
748  }
749  if (def != 0.0){
750  double upd = mu* def * k;
751  for (std::size_t b=0; b<ex.active; b++)
752  m_gradient(ex.avar[b]) -= upd;
753  }
754  }
755  }
756 
757  /// shrink a variable
758  void deactivateVariable(std::size_t v)
759  {
760  std::size_t ev = m_variables[v].example;
761  std::size_t iv = m_variables[v].index;
762  std::size_t pv = m_variables[v].p;
763  Example* exv = &m_examples[ev];
764 
765  std::size_t ih = exv->active - 1;
766  std::size_t h = exv->avar[ih];
767  m_variables[v].index = ih;
768  m_variables[h].index = iv;
769  std::swap(exv->avar[iv], exv->avar[ih]);
770  iv = ih;
771  exv->active--;
772 
773  std::size_t j = m_activeVar - 1;
774  std::size_t ej = m_variables[j].example;
775  std::size_t ij = m_variables[j].index;
776  std::size_t pj = m_variables[j].p;
777  Example* exj = &m_examples[ej];
778 
779  // exchange entries in the lists
780  std::swap(m_alpha(v), m_alpha(j));
782  std::swap(m_linear(v), m_linear(j));
784 
785  m_variables[exv->avar[iv]].index = ij;
786  m_variables[exj->avar[ij]].index = iv;
787  exv->avar[iv] = j;
788  exv->var[pv] = j;
789  exj->avar[ij] = v;
790  exj->var[pj] = v;
791 
792  m_activeVar--;
793 
794  //finally check if the example is needed anymore
795  if(exv->active == 0)
796  deactivateExample(ev);
797  }
798 
799  /// shrink an example
800  void deactivateExample(std::size_t e)
801  {
803  std::size_t j = m_activeEx - 1;
804  m_activeEx--;
805  if(e == j) return;
806 
808 
809  std::size_t* pe = m_examples[e].var;
810  std::size_t* pj = m_examples[j].var;
811  for (std::size_t v = 0; v < m_cardP; v++)
812  {
813  SHARK_ASSERT(pj[v] >= m_activeVar);
814  m_variables[pe[v]].example = e;
815  m_variables[pj[v]].example = j;
816  }
817  m_kernelMatrix.flipColumnsAndRows(e, j);
818  }
819 
820  /// \brief Returns the original index of the example of a variable in the dataset before optimization.
821  ///
822  /// Shrinking is an internal detail so the communication with the outside world uses the original indizes.
823  std::size_t originalIndex(std::size_t v)const{
824  std::size_t i = m_variables[v].example;
825  return m_examples[i].index;//i before shrinking
826  }
827 
828  /// kernel matrix (precomputed matrix or matrix cache)
829  Matrix& m_kernelMatrix;
830 
831  /// kernel modifiers
832  QpSparseArray<QpFloatType> const& m_M; // M(|P|*y_i+p, y_j, q)
833 
834  /// complexity constant; upper bound on all variabless
835  double m_C;
836 
837  /// number of classes in the problem
838  std::size_t m_classes;
839 
840  /// number of dual variables per example
841  std::size_t m_cardP;
842 
843  /// number of examples in the problem (size of the kernel matrix)
844  std::size_t m_numExamples;
845 
846  /// number of variables in the problem = m_numExamples * m_cardP
847  std::size_t m_numVariables;
848 
849  /// linear part of the objective function
850  RealVector m_linear;
851 
852  /// solution candidate
853  RealVector m_alpha;
854 
855  /// gradient of the objective function
856  /// The m_gradient array is of fixed size and not subject to shrinking.
857  RealVector m_gradient;
858 
859  /// information about each training example
860  std::vector<Example> m_examples;
861 
862  /// information about each variable of the problem
863  std::vector<Variable> m_variables;
864 
865  /// space for the example[i].var pointers
866  std::vector<std::size_t> m_storage1;
867 
868  /// space for the example[i].avar pointers
869  std::vector<std::size_t> m_storage2;
870 
871  /// number of currently active examples
872  std::size_t m_activeEx;
873 
874  /// number of currently active variables
875  std::size_t m_activeVar;
876 
877  /// should the m_problem use the shrinking heuristics?
879 
880  /// true if the problem has already been unshrinked
882 };
883 
884 
885 template<class Matrix>
887 public:
888  typedef typename Matrix::QpFloatType QpFloatType;
889  BiasSolverSimplex(QpMcSimplexDecomp<Matrix>* problem) : m_problem(problem){}
890 
891  void solve(
892  RealVector& bias,
893  QpStoppingCondition& stop,
894  QpSparseArray<QpFloatType> const& nu,
895  bool sumToZero,
896  QpSolutionProperties* prop = NULL
897  ){
898  std::size_t classes = bias.size();
899  std::size_t numExamples = m_problem->getNumExamples();
900  std::size_t cardP = m_problem->cardP();
901  RealVector stepsize(classes, 0.01);
902  RealVector prev(classes,0);
903  RealVector step(classes);
904 
905  double start_time = Timer::now();
906  unsigned long long iterations = 0;
907 
908  do{
909  QpSolutionProperties propInner;
910  QpSolver<QpMcSimplexDecomp<Matrix> > solver(*m_problem);
911  solver.solve(stop, &propInner);
912  iterations += propInner.iterations;
913 
914  // Rprop loop to update the bias
915  while (true)
916  {
917  RealMatrix dualGradient = m_problem->solutionGradient();
918  // compute the primal m_gradient w.r.t. bias
919  RealVector grad(classes,0);
920 
921  for (std::size_t i=0; i<numExamples; i++){
922  std::size_t largestP = cardP;
923  double largest_value = 0.0;
924  for (std::size_t p=0; p<cardP; p++)
925  {
926  if (dualGradient(i,p) > largest_value)
927  {
928  largest_value = dualGradient(i,p);
929  largestP = p;
930  }
931  }
932  if (largestP < cardP)
933  {
934  unsigned int y = m_problem->label(i);
935  typename QpSparseArray<QpFloatType>::Row const& row = nu.row(y * cardP + largestP);
936  for (std::size_t b=0; b != row.size; b++)
937  grad(row.entry[b].index) -= row.entry[b].value;
938  }
939  }
940 
941  if (sumToZero)
942  {
943  // project the m_gradient
944  grad -= sum(grad) / classes;
945  }
946 
947  // Rprop
948  for (std::size_t c=0; c<classes; c++)
949  {
950  double g = grad(c);
951  if (g > 0.0)
952  step(c) = -stepsize(c);
953  else if (g < 0.0)
954  step(c) = stepsize(c);
955 
956  double gg = prev(c) * grad(c);
957  if (gg > 0.0)
958  stepsize(c) *= 1.2;
959  else
960  stepsize(c) *= 0.5;
961  }
962  prev = grad;
963 
964  if (sumToZero)
965  {
966  // project the step
967  step -= sum(step) / classes;
968  }
969 
970  // update the solution and the dual m_gradient
971  bias += step;
972  performBiasUpdate(step,nu);
973 
974  if (max(stepsize) < 0.01 * stop.minAccuracy) break;
975  }
976  }while(m_problem->checkKKT()> stop.minAccuracy);
977 
978  if (prop != NULL)
979  {
980  double finish_time = Timer::now();
981 
982  prop->accuracy = m_problem->checkKKT();
983  prop->value = m_problem->functionValue();
984  prop->iterations = iterations;
985  prop->seconds = finish_time - start_time;
986  }
987  }
988 private:
989  void performBiasUpdate(
990  RealVector const& step, QpSparseArray<QpFloatType> const& nu
991  ){
992  std::size_t numExamples = m_problem->getNumExamples();
993  std::size_t cardP = m_problem->cardP();
994  RealMatrix deltaLinear(numExamples,cardP,0.0);
995  for (std::size_t i=0; i<numExamples; i++){
996  for (std::size_t p=0; p<cardP; p++){
997  unsigned int y = m_problem->label(i);
998  typename QpSparseArray<QpFloatType>::Row const& row = nu.row(y * cardP +p);
999  for (std::size_t b=0; b<row.size; b++)
1000  {
1001  deltaLinear(i,p) -= row.entry[b].value * step(row.entry[b].index);
1002  }
1003  }
1004  }
1005  m_problem->addDeltaLinear(deltaLinear);
1006 
1007  }
1008  QpMcSimplexDecomp<Matrix>* m_problem;
1009 };
1010 
1011 
1012 }
1013 #endif