BoxConstrainedProblems.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief Quadratic program definitions.
5  *
6  *
7  *
8  * \author T. Glasmachers, O.Krause
9  * \date 2013
10  *
11  *
12  * \par Copyright 1995-2017 Shark Development Team
13  *
14  * <BR><HR>
15  * This file is part of Shark.
16  * <http://shark-ml.org/>
17  *
18  * Shark is free software: you can redistribute it and/or modify
19  * it under the terms of the GNU Lesser General Public License as published
20  * by the Free Software Foundation, either version 3 of the License, or
21  * (at your option) any later version.
22  *
23  * Shark is distributed in the hope that it will be useful,
24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26  * GNU Lesser General Public License for more details.
27  *
28  * You should have received a copy of the GNU Lesser General Public License
29  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
30  *
31  */
32 #ifndef SHARK_ALGORITHMS_QP_BOXCONSTRAINEDPROBLEMS_H
33 #define SHARK_ALGORITHMS_QP_BOXCONSTRAINEDPROBLEMS_H
34 
36 #include <shark/Algorithms/QP/Impl/AnalyticProblems.h>
38 
39 namespace shark {
40 
41 /// \brief Working set selection by maximization of the projected gradient.
42 ///
43 /// This selection operator picks the largest and second largest variable index if possible.
45  template<class Problem>
46  double operator()(Problem& problem, std::size_t& i, std::size_t& j){
47  i = 0;
48  j = 0;
49  double largestGradient = 0;
50  double secondLargestGradient = 0;
51 
52  for (std::size_t a = 0; a < problem.active(); a++){
53  double g = problem.gradient(a);
54  if (!problem.isUpperBound(a) && g > secondLargestGradient){
55  secondLargestGradient = g;
56  j = a;
57  }
58  if (!problem.isLowerBound(a) && -g > secondLargestGradient){
59  secondLargestGradient = -g;
60  j = a;
61  }
62  if(secondLargestGradient > largestGradient){
63  std::swap(secondLargestGradient,largestGradient);
64  std::swap(i,j);
65  }
66  }
67  if(secondLargestGradient == 0)
68  j = i;
69  return largestGradient;
70  }
71 
72  void reset(){}
73 };
74 
75 /// \brief Working set selection by maximization of the projected gradient.
76 ///
77 /// This selection operator picks a single variable index.
79  template<class Problem>
80  double operator()(Problem& problem, std::size_t& i, std::size_t& j){
82  double value = criterion(problem, i,j);
83  j = i; //we just use one variable here
84  return value;
85  }
86 
87  void reset(){}
88 };
89 
90 /// \brief Working set selection by maximization of the dual objective gain.
92  template<class Problem>
93  double operator()(Problem& problem, std::size_t& i, std::size_t& j){
94  //choose first variable by first order criterion
95  MaximumGradientCriterion firstOrder;
96  double maxGrad = firstOrder(problem,i,j);
97  if (maxGrad == 0.0)
98  return maxGrad;
99 
100  double gi = problem.gradient(i);
101  typename Problem::QpFloatType* q = problem.quadratic().row(i, 0, problem.active());
102  double Qii = problem.diagonal(i);
103 
104  // select second variable j with second order method
105  double maxGain = 0.0;
106  for (std::size_t a=0; a<problem.active(); a++)
107  {
108  if (a == i) continue;
109  double ga = problem.gradient(a);
110  if (
111  (!problem.isLowerBound(a) && ga < 0.0)
112  || (!problem.isUpperBound(a) && ga > 0.0)
113  ){
114  double Qia = q[a];
115  double Qaa = problem.diagonal(a);
116  double gain = detail::maximumGainQuadratic2D(Qii,Qaa,Qia,gi,ga);
117  if (gain > maxGain)
118  {
119  maxGain = gain;
120  j = a;
121  }
122  }
123  }
124 
125  return maxGrad; // solution is not optimal
126  }
127 
128  void reset(){}
129 };
130 
131 /// \brief Quadratic program with box constraints.
132 ///
133 /// \par
134 /// An instance of this class represents a quadratic program of the type
135 /// TODO: write documentation!
136 ///
137 template<class SVMProblem>
139 public:
140  typedef typename SVMProblem::QpFloatType QpFloatType;
143  //~ typedef MaximumGradientCriterion PreferedSelectionStrategy;
144 
145  BoxConstrainedProblem(SVMProblem& problem)
146  : m_problem(problem)
147  , m_gradient(problem.linear)
148  , m_active (problem.dimensions())
149  , m_alphaStatus(problem.dimensions(),AlphaFree){
150  //compute the gradient if alpha != 0
151  for (std::size_t i=0; i != dimensions(); i++){
152  double v = alpha(i);
153  if (v != 0.0){
154  QpFloatType* q = quadratic().row(i, 0, dimensions());
155  for (std::size_t a=0; a < dimensions(); a++)
156  m_gradient(a) -= q[a] * v;
157  }
158  updateAlphaStatus(i);
159  }
160  }
161  std::size_t dimensions()const{
162  return m_problem.dimensions();
163  }
164 
165  std::size_t active()const{
166  return m_active;
167  }
168 
169  double boxMin(std::size_t i)const{
170  return m_alphaStatus[i]==AlphaDeactivated? alpha(i): m_problem.boxMin(i);
171  }
172  double boxMax(std::size_t i)const{
173  return m_alphaStatus[i]==AlphaDeactivated? alpha(i): m_problem.boxMax(i);
174  }
175  bool isLowerBound(std::size_t i)const{
176  return m_alphaStatus[i] & AlphaLowerBound;
177  }
178  bool isUpperBound(std::size_t i)const{
179  return m_alphaStatus[i] & AlphaUpperBound;
180  }
181  bool isDeactivated(std::size_t i)const{
182  return isUpperBound(i) && isLowerBound(i);
183  }
184 
185  /// representation of the quadratic part of the objective function
186  MatrixType& quadratic(){
187  return m_problem.quadratic;
188  }
189 
190  double linear(std::size_t i)const{
191  return m_problem.linear(i);
192  }
193 
194  double alpha(std::size_t i)const{
195  return m_problem.alpha(i);
196  }
197 
198  double diagonal(std::size_t i)const{
199  return m_problem.diagonal(i);
200  }
201 
202  double gradient(std::size_t i)const{
203  return m_gradient(i);
204  }
205 
206  std::size_t permutation(std::size_t i)const{
207  return m_problem.permutation[i];
208  }
209 
210  RealVector getUnpermutedAlpha()const{
211  RealVector alpha(dimensions());
212  for (std::size_t i=0; i<dimensions(); i++)
213  alpha(m_problem.permutation[i]) = m_problem.alpha(i);
214  return alpha;
215  }
216 
217  ///\brief Does an update of SMO given a working set with indices i and j.
218  virtual void updateSMO(std::size_t i, std::size_t j){
219  SIZE_CHECK(i < active());
220  SIZE_CHECK(j < active());
221  if(i == j){//both variables are identical, thus solve the 1-d problem.
222  // get the matrix row corresponding to the working set
223  QpFloatType* q = quadratic().row(i, 0, active());
224 
225  // update alpha, that is, solve the sub-problem defined by i
226  // and compute the stepsize mu of the step
227  double mu = -alpha(i);
228  detail::solveQuadraticEdge(m_problem.alpha(i),gradient(i),diagonal(i),boxMin(i),boxMax(i));
229  mu+=alpha(i);
230 
231  // update the internal states
232  for (std::size_t a = 0; a < active(); a++)
233  m_gradient(a) -= mu * q[a];
234 
235  updateAlphaStatus(i);
236  return;
237  }
238 
239  double Li = boxMin(i);
240  double Ui = boxMax(i);
241  double Lj = boxMin(j);
242  double Uj = boxMax(j);
243 
244  // get the matrix rows corresponding to the working set
245  QpFloatType* qi = quadratic().row(i, 0, active());
246  QpFloatType* qj = quadratic().row(j, 0, active());
247 
248  // solve the 2D sub-problem imposed by the two chosen variables
249  // and compute the stepsizes mu
250  double mui = -alpha(i);
251  double muj = -alpha(j);
252  detail::solveQuadratic2DBox(m_problem.alpha(i), m_problem.alpha(j),
253  m_gradient(i), m_gradient(j),
254  diagonal(i), qi[j], diagonal(j),
255  Li, Ui, Lj, Uj
256  );
257  mui += alpha(i);
258  muj += alpha(j);
259 
260  // update the internal states
261  for (std::size_t a = 0; a < active(); a++)
262  m_gradient(a) -= mui * qi[a] + muj * qj[a];
263 
264  updateAlphaStatus(i);
265  updateAlphaStatus(j);
266  }
267 
268  ///\brief Returns the current function value of the problem.
269  double functionValue()const{
270  return 0.5*inner_prod(m_gradient+m_problem.linear,m_problem.alpha);
271  }
272 
273  bool shrink(double){return false;}
274  void reshrink(){}
275  void unshrink(){}
276 
277  /// \brief Define the initial solution for the iterative solver.
278  ///
279  /// This method can be used to warm-start the solver. It requires a
280  /// feasible solution (alpha) and the corresponding gradient of the
281  /// dual objective function.
282  void setInitialSolution(RealVector const& alpha, RealVector const& gradient)
283  {
284  std::size_t n = dimensions();
285  SIZE_CHECK(alpha.size() == n);
286  SIZE_CHECK(gradient.size() == n);
287  for (std::size_t i=0; i<n; i++)
288  {
289  std::size_t j = permutation(i);
290  SHARK_ASSERT(alpha(j) >= boxMin(j) && alpha(j) <= boxMax(j));
291  m_problem.alpha(i) = alpha(j);
292  m_gradient(i) = gradient(j);
293  updateAlphaStatus(i);
294  }
295  }
296 
297  /// \brief Define the initial solution for the iterative solver.
298  ///
299  /// This method can be used to warm-start the solver. It requires a
300  /// feasible solution (alpha), for which it computes the gradient of
301  /// the dual objective function. Note that this is a quadratic time
302  /// operation in the number of non-zero coefficients.
303  void setInitialSolution(RealVector const& alpha)
304  {
305  std::size_t n = dimensions();
306  SIZE_CHECK(alpha.size() == n);
307  RealVector gradient = m_problem.linear;
308  blas::vector<QpFloatType> q(n);
309  for (std::size_t i=0; i<n; i++)
310  {
311  double a = alpha(i);
312  if (a == 0.0) continue;
313  m_problem.quadratic.row(i, 0, n, q.storage());
314  noalias(gradient) -= a * q;
315  }
316  setInitialSolution(alpha, gradient);
317  }
318 
319  ///\brief Remove the i-th example from the problem.
320  void deactivateVariable(std::size_t i){
321  SIZE_CHECK(i < dimensions());
322  double alphai = alpha(i);
323  m_problem.alpha(i) = 0;
324  //update the internal state
325  QpFloatType* qi = quadratic().row(i, 0, active());
326  for (std::size_t a = 0; a < active(); a++)
327  m_gradient(a) += alphai * qi[a];
328  m_alphaStatus[i] = AlphaDeactivated;
329  }
330  ///\brief Reactivate an previously deactivated variable.
331  void activateVariable(std::size_t i){
332  SIZE_CHECK(i < dimensions());
333  updateAlphaStatus(i);
334  }
335 
336  /// exchange two variables via the permutation
337  void flipCoordinates(std::size_t i, std::size_t j)
338  {
339  SIZE_CHECK(i < dimensions());
340  SIZE_CHECK(j < dimensions());
341  if (i == j) return;
342 
343  m_problem.flipCoordinates(i, j);
344  std::swap( m_gradient[i], m_gradient[j]);
345  std::swap( m_alphaStatus[i], m_alphaStatus[j]);
346  }
347 
348  /// \brief adapts the linear part of the problem and updates the internal data structures accordingly.
349  virtual void setLinear(std::size_t i, double newValue){
350  m_gradient(i) -= linear(i);
351  m_gradient(i) += newValue;
352  m_problem.linear(i) = newValue;
353  }
354 
355  double checkKKT()const{
356  double maxViolation = 0.0;
357  for(std::size_t i = 0; i != dimensions(); ++i){
358  if(isDeactivated(i)) continue;
359  if(!isUpperBound(i)){
360  maxViolation = std::max(maxViolation, gradient(i));
361  }
362  if(!isLowerBound(i)){
363  maxViolation = std::max(maxViolation, -gradient(i));
364  }
365  }
366  return maxViolation;
367  }
368 
369 protected:
370  SVMProblem& m_problem;
371 
372  /// gradient of the objective function at the current alpha
373  RealVector m_gradient;
374 
375  std::size_t m_active;
376 
377  std::vector<char> m_alphaStatus;
378 
379  void updateAlphaStatus(std::size_t i){
380  SIZE_CHECK(i < dimensions());
381  m_alphaStatus[i] = AlphaFree;
382  if(m_problem.alpha(i) == boxMax(i))
383  m_alphaStatus[i] |= AlphaUpperBound;
384  if(m_problem.alpha(i) == boxMin(i))
385  m_alphaStatus[i] |= AlphaLowerBound;
386  }
387 
388  bool testShrinkVariable(std::size_t a, double largestUp, double smallestDown)const{
389  smallestDown = std::min(smallestDown, 0.0);
390  largestUp = std::max(largestUp, 0.0);
391  if (
392  ( isLowerBound(a) && gradient(a) < smallestDown)
393  || ( isUpperBound(a) && gradient(a) >largestUp)
394  ){
395  // In this moment no feasible step including this variable
396  // can improve the objective. Thus deactivate the variable.
397  return true;
398  }
399  return false;
400  }
401 };
402 
403 template<class Problem>
404 class BoxConstrainedShrinkingProblem: public BoxBasedShrinkingStrategy<BoxConstrainedProblem<Problem> >{
405 public:
406  BoxConstrainedShrinkingProblem(Problem& problem, bool shrink=true)
407  :BoxBasedShrinkingStrategy<BoxConstrainedProblem<Problem> >(problem,shrink){}
408 };
409 
410 }
411 #endif