BoxBasedShrinkingStrategy.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief Shrinking strategy based on box constraints
5  *
6  *
7  *
8  * \author T. Glasmachers, O.Krause
9  * \date 2017
10  *
11  *
12  * \par Copyright 1995-2017 Shark Development Team
13  *
14  * <BR><HR>
15  * This file is part of Shark.
16  * <http://shark-ml.org/>
17  *
18  * Shark is free software: you can redistribute it and/or modify
19  * it under the terms of the GNU Lesser General Public License as published
20  * by the Free Software Foundation, either version 3 of the License, or
21  * (at your option) any later version.
22  *
23  * Shark is distributed in the hope that it will be useful,
24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26  * GNU Lesser General Public License for more details.
27  *
28  * You should have received a copy of the GNU Lesser General Public License
29  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
30  *
31  */
32 #ifndef SHARK_ALGORITHMS_QP_BOXBASEDSHRINKINGSTRATEGY_H
33 #define SHARK_ALGORITHMS_QP_BOXBASEDSHRINKINGSTRATEGY_H
34 
35 namespace shark{
36 
37 
38 /// \brief Takes q boxx constrained QP-type problem and implements shrinking on it
39 ///
40 /// Given a QP-type-Problem, implements a strategy which allows to efficiently shrink
41 /// and unshrink the problem. If a value of the QP has an active box constraint,
42 /// it is shrinked from the problem when currently there is no possible step
43 /// using that variable that leads to a gain. This is problem dependent as
44 /// this might involve consideration of additional constraints.
45 /// Therefore, every Problem must implement the method testShrinkVariable.
46 template<class Problem>
47 struct BoxBasedShrinkingStrategy : public Problem{
48 public:
49  typedef typename Problem::QpFloatType QpFloatType;
50  typedef typename Problem::MatrixType MatrixType;
51  typedef typename Problem::PreferedSelectionStrategy PreferedSelectionStrategy;
52 
53  template<class ProblemT>
54  BoxBasedShrinkingStrategy(ProblemT& problem, bool shrink=true)
55  : Problem(problem)
56  , m_isUnshrinked(false)
57  , m_shrink(shrink)
58  , m_gradientEdge(problem.linear)
59  { }
60 
61  using Problem::alpha;
62  using Problem::gradient;
63  using Problem::linear;
64  using Problem::active;
65  using Problem::dimensions;
66  using Problem::quadratic;
67  using Problem::isLowerBound;
68  using Problem::isUpperBound;
69  using Problem::boxMin;
70  using Problem::boxMax;
71  using Problem::setInitialSolution;
72 
73  virtual void updateSMO(std::size_t i, std::size_t j){
74  double aiOld = alpha(i);
75  double ajOld = alpha(j);
76  //call base class to do the step
77  Problem::updateSMO(i,j);
78  double ai = alpha(i);
79  double aj = alpha(j);
80 
81  // update the gradient edge data structure to keep up with changes
82  updateGradientEdge(i,aiOld,ai);
83  updateGradientEdge(j,ajOld,aj);
84  }
85 
86  bool shrink(double epsilon){
87  if(!m_shrink) return false;
88 
89  double largestUp;
90  double smallestDown;
91  getMaxKKTViolations(largestUp,smallestDown,active());
92 
93  // check whether unshrinking is necessary at this accuracy level
94  if (!m_isUnshrinked && (largestUp - smallestDown < 10.0 * epsilon))
95  {
96  unshrink();
97  //recalculate maximum KKT violation for immediate re-shrinking
98  getMaxKKTViolations(largestUp, smallestDown, dimensions());
99  }
100  //shrink
101  auto& active = this->m_active;
102  for (std::size_t a = active; a > 0; --a){
103  std::size_t i = a-1;
104  if(Problem::testShrinkVariable(i, largestUp, smallestDown)){
105  Problem::flipCoordinates(i,active-1);
106  std::swap( m_gradientEdge[i], m_gradientEdge[active-1]);
107  --active;
108  }
109  }
110  return true;
111  }
112 
113  ///\brief Unshrink the problem
114  void unshrink(){
115  if (active() == dimensions()) return;
116  m_isUnshrinked = true;
117 
118  // Recompute the gradient of the whole problem.
119  // We assume here that all shrinked variables are on the border of the problem.
120  // The gradient of the active components is already correct and
121  // we store the gradient of the subset of variables which are on the
122  // borders of the box for the whole set.
123  // Thus we only have to recompute the part of the gradient which is
124  // based on variables in the active set which are not on the border.
125  for (std::size_t a = active(); a < dimensions(); a++)
126  this->m_gradient(a) = m_gradientEdge(a);
127 
128  for (std::size_t i = 0; i < active(); i++){
129  //check whether alpha value is already stored in gradientEdge
130  if (isUpperBound(i) || isLowerBound(i)) continue;
131 
132  QpFloatType* q = quadratic().row(i, 0, dimensions());
133  for (std::size_t a = active(); a < dimensions(); a++)
134  this->m_gradient(a) -= alpha(i) * q[a];
135  }
136 
137  this->m_active = dimensions();
138  }
139 
140  void setShrinking(bool shrinking){
141  m_shrink = shrinking;
142  if(!shrinking)
143  unshrink();
144  }
145 
146  /// \brief Define the initial solution for the iterative solver.
147  ///
148  /// This method can be used to warm-start the solver. It requires a
149  /// feasible solution (alpha) and the corresponding gradient of the
150  /// dual objective function.
151  void setInitialSolution(RealVector const& alpha, RealVector const& gradient, RealVector const& gradientEdge){
152  Problem::setInitialSolution(alpha,gradient);
153  std::size_t n = dimensions();
154  SIZE_CHECK(gradientEdge.size() == n);
155  for (std::size_t i=0; i<n; i++){
156  std::size_t j = this->permutation(i);
157  m_gradientEdge(i) = gradientEdge(j);
158  }
159  }
160 
161  /// \brief Define the initial solution for the iterative solver.
162  ///
163  /// This method can be used to warm-start the solver. It requires a
164  /// feasible solution (alpha), for which it computes the gradient of
165  /// the dual objective function. Note that this is a quadratic time
166  /// operation in the number of non-zero coefficients.
167  void setInitialSolution(RealVector const& alpha){
168  std::size_t n = dimensions();
169  SIZE_CHECK(alpha.size() == n);
170  RealVector gradient = this->m_problem.linear;
171  RealVector gradientEdge = this->m_problem.linear;
172  blas::vector<QpFloatType> q(n);
173  std::vector<std::size_t> inverse(n);
174  for (std::size_t i=0; i<n; i++)
175  inverse[this->permutation(i)] = i;
176  for (std::size_t i=0; i<n; i++)
177  {
178  double a = alpha(i);
179  if (a == 0.0) continue;
180  this->m_problem.quadratic.row(i, 0, n, q.raw_storage().values);
181  noalias(gradient) -= a * q;
182  std::size_t j = inverse[i];
183  if (a == boxMin(j) || a == boxMax(j)) gradientEdge -= a * q;
184  }
185  setInitialSolution(alpha, gradient, gradientEdge);
186  }
187 
188  ///\brief Remove the i-th example from the problem.
189  void deactivateVariable(std::size_t i){
190  SIZE_CHECK(i < dimensions());
191  RealVector alpha_old(dimensions);
192  for(std::size_t i = 0; i != dimensions; ++i){
193  updateGradientEdge(i,alpha_old(i), alpha(i));
194  }
195  }
196 
197  /// \brief Scales all box constraints by a constant factor and adapts the solution by scaling it by the same factor.
198  void scaleBoxConstraints(double factor, double variableScalingFactor){
199  Problem::scaleBoxConstraints(factor,variableScalingFactor);
200  if(factor != variableScalingFactor){
201  for(std::size_t i = 0; i != dimensions(); ++i){
202  m_gradientEdge(i) = linear(i);
203  }
204  }
205  else{
206  for(std::size_t i = 0; i != dimensions(); ++i){
207  m_gradientEdge(i) -= linear(i);
208  m_gradientEdge(i) *= factor;
209  m_gradientEdge(i) += linear(i);
210  }
211  }
212  }
213 
214  /// \brief adapts the linear part of the problem and updates the internal data structures accordingly.
215  virtual void setLinear(std::size_t i, double newValue){
216  m_gradientEdge(i) -= linear(i);
217  m_gradientEdge(i) += newValue;
218  Problem::setLinear(i,newValue);
219  }
220 
221  ///\brief swap indizes (i,j)
222  void flipCoordinates(std::size_t i,std::size_t j){
223  Problem::flipCoordinates(i,j);
224  std::swap( m_gradientEdge[i], m_gradientEdge[j]);
225  }
226 
227 private:
228  ///\brief Updates the edge-part of the gradient when an alpha value was changed
229  ///
230  /// This function overwites the base class method and is called whenever
231  /// an alpha value is changed.
232  void updateGradientEdge(std::size_t i, double oldAlpha, double newAlpha){
233  SIZE_CHECK(i < active());
234  if(!m_shrink || oldAlpha==newAlpha) return;
235  bool isInsideOld = oldAlpha > boxMin(i) && oldAlpha < boxMax(i);
236  bool isInsideNew = newAlpha > boxMin(i) && newAlpha < boxMax(i);
237  //check if variable is relevant at all, that means that old and new alpha value are inside
238  //or old alpha is 0 and new alpha inside
239  if((oldAlpha == 0 || isInsideOld) && isInsideNew)
240  return;
241 
242  //compute change to the gradient
243  double diff = 0;
244  if(!isInsideOld)//the value was on a border, so remove it's old influence on the gradient
245  diff -=oldAlpha;
246  if(!isInsideNew){//variable entered boundary or changed from one boundary to another
247  diff += newAlpha;
248  }
249 
250  QpFloatType* q = quadratic().row(i, 0, dimensions());
251  for(std::size_t a = 0; a != dimensions(); ++a){
252  m_gradientEdge(a) -= diff*q[a];
253  }
254  }
255 
256  void getMaxKKTViolations(double& largestUp, double& smallestDown, std::size_t maxIndex){
257  largestUp = -1e100;
258  smallestDown = 1e100;
259  for (std::size_t a = 0; a < maxIndex; a++){
260  if (!isLowerBound(a))
261  smallestDown = std::min(smallestDown,gradient(a));
262  if (!isUpperBound(a))
263  largestUp = std::max(largestUp,gradient(a));
264  }
265  }
266 
267  bool m_isUnshrinked;
268 
269  ///\brief true if shrinking is to be used.
270  bool m_shrink;
271 
272  ///\brief Stores the gradient of the alpha dimensions which are either 0 or C
273  RealVector m_gradientEdge;
274 };
275 
276 }
277 #endif
278