HypervolumeSubsetSelection2D.h
Go to the documentation of this file.
1 /*!
2  *
3  * \author O.Krause
4  * \date 2016
5  *
6  *
7  * \par Copyright 1995-2017 Shark Development Team
8  *
9  * <BR><HR>
10  * This file is part of Shark.
11  * <http://shark-ml.org/>
12  *
13  * Shark is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU Lesser General Public License as published
15  * by the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * Shark is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public License
24  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
25  *
26  */
27 #ifndef SHARK_ALGORITHMS_DIRECTSEARCH_HYPERVOLUMESUBSETSELECTION_2D_H
28 #define SHARK_ALGORITHMS_DIRECTSEARCH_HYPERVOLUMESUBSETSELECTION_2D_H
29 
30 #include <shark/LinAlg/Base.h>
31 
32 #include <algorithm>
33 #include <vector>
34 #include <deque>
35 
36 namespace shark {
37 /// \brief Implementation of the exact hypervolume subset selection algorithm in 2 dimensions.
38 ///
39 /// This algorithm solves the problem of selecting a subset of points with largest hypervolume in 2D.
40 /// The algorithm has complexity n (k+log(n))
41 ///
42 /// While this algorithm accepts fronts with dominated points in it, the caller has to ensure
43 /// that after domination checks there are at least as many points left as there are to select. The
44 /// Algorithm will throw an exception otherwise.
45 ///
46 /// This can easily be ensured by removing the nondominated points prior to calling this function.
47 ///
48 /// The algorithm is described in:
49 /// Bringmann, Karl, Tobias Friedrich, and Patrick Klitzke.
50 /// "Two-dimensional subset selection for hypervolume and epsilon-indicator."
51 /// Proceedings of the 2014 conference on Genetic and evolutionary computation.
52 /// ACM, 2014.
53 /// (although it is not very helpful)
55 private:
56 
57  struct Point{
58  Point(){}
59 
60  Point(double f1, double f2, std::size_t index)
61  : f1(f1)
62  , f2(f2)
63  , index(index)
64  , selected(false)
65  {}
66 
67  bool operator<(Point const& rhs) const{//for lexicographic sorting
68  if (f1 < rhs.f1) return true;
69  if (f1 > rhs.f1) return false;
70  return (f2 < rhs.f1);
71  }
72 
73  double f1;
74  double f2;
75  std::size_t index;
76  bool selected;
77  };
78 
79  ///\brief Linear function a*x+b where a is stored in first and b is stored in section.
80  ///
81  /// The linear function also stores an index to uniquely identify it.
82  ///
83  /// Linear functions are used in the algorithm to represent the
84  /// volume of a given set of points under the change of reference point.
85  /// more formally, let H^l_i be the volume of a set of points of size l with largest
86  /// x-value at the point (x_i,y_i) and reference point x_i(thus H^l_i can only use points
87  /// 1,...,i).
88  /// Then for x>x_i we have
89  /// f_i^l(x) = H_i^l+ y_i(x_i-x)=-x*y_i+y_i*x_i+H = a*x+b.
90  /// Later the algorithm will use an upper envelope over a set of those functions
91  /// to decide which points to add to the sets until the size of the sets is k.
92  ///
93  /// for this application the stored index is the same as index i of the point stated above.
94  struct LinearFunction{
95 
96  double a;
97  double b;
98  std::size_t index;
99 
100  LinearFunction(double a, double b, std::size_t index = 0):a(a), b(b), index(index){}
101  LinearFunction(){}
102 
103  double eval(double x)const{
104  return a*x + b;
105  }
106  };
107 
108  /// \brief Returns the intersection of two linear functions
109  double Intersection(LinearFunction f1, LinearFunction f2)const{
110  return (f2.b - f1.b) / (f1.a - f2.a);
111  }
112 
113 
114  /// \brief Calculates for each given x the maximum among the functions f, i.e. the upper envelope of f.
115  ///
116  /// Algorithm 2 in the paper. Complexity O(n)
117  /// given a set of functions f_1...f_n, ordered by slope such that f_1.a < f_2.a<...<f_n.a and points with x-coordinate x_1<...<x_n
118  /// computes h_i = max_{1 <= j <= i} f_j(x_i) for i=1,...,n as well as the index of the function leading to the value h_i
119  std::pair<std::vector<double>,std::vector<std::size_t> > upperEnvelope(
120  std::vector<LinearFunction>const& functions,
121  std::vector<Point> const& points
122  )const{
123  SHARK_ASSERT(functions.size() == points.size());
124  std::size_t n = points.size();
125  std::vector<double> h(n);
126  std::vector<std::size_t> chosen(n);
127  std::deque<LinearFunction> s;
128 
129  // This is the original algorithm 2 as in the paper. Even if the paper looks at maximum
130  // hypervolume where domination is given when one point has LARGER function
131  // values as the other, In section 3.2 they transform the problem to a problem
132  // where domination is given by SMALLER function values and then accordingly
133  // give the algorithm for this type. They just give the transformation but do not say
134  // what the transformation does so it is not clear until you implement it.
135  //
136  // This is a super confusing part of the paper, please kids, do not be like Bringmann et al.
137  // Keep it simple, stupid. Sometimes an additional index does not hurt.
138  //
139  //the algorithm works by inserting functions f_1 to f_i one-by-one, figuring out which functions
140  // are dominated (not being part of the upper envelope) and removing all functions which for
141  // function values x_i,x_i+1,... are already smaller than one of the other function.
142  // using the ordering relations given the set s contains the function ordered by (current) function value.
143  // so after iteration i we can just extract the largest function value for x_i by looking at the first element of s.
144  for (std::size_t i = 0; i != n; ++i) {
145 
146  // remove dominated functions.
147  // as we push back into s,
148  // at the end of s are the functions with largest slope.
149  // therefore if we have the last two elements as s_-1 and s_-2 and the new
150  // function f, knowing that the intersection of s_-1 and f is smaller than the intersection
151  // of s_-1 and s_-2 means that s_-1 is dominated by s_-2 and f and thus can be removed.
152  while (s.size() > 1 ) {
153 
154  double d1 = Intersection(functions[i], s.end()[-1]);
155  double d2 = Intersection(s.end()[-2], s.end()[-1]);
156 
157  if (d1 <= d2 || std::abs(d1-d2) < 1.e-10) {//check for numeric stability
158  s.pop_back();
159  } else {
160  break;
161  }
162  }
163  //include the new function and store its index.
164  s.push_back(functions[i]);
165  s.back().index = i;
166  // at the beginning of s are the functions with smallest slope
167  // if the first function in s has a smaller function value for the current
168  // x_i than the second function,
169  // we can safely remove it as it can not be part of the envelope any more
170  // (We are only looking at function values >=x from now on and thus the larger slope domintes)
171  while (s.size() > 1) {
172  double d1 = s[0].eval(points[i].f1);
173  double d2 = s[1].eval(points[i].f1);
174 
175  if (d1 < d2 || std::abs(d1-d2) < 1.e-10) {
176  s.pop_front();
177  } else {
178  break;
179  }
180  }
181  //assign maximum
182  //the functions in s are ordered by function value
183  // the function with the largest function value is currently at the front
184  h[i] = s[0].eval(points[i].f1);
185  chosen[i] = s[0].index;
186  }
187  return std::make_pair(std::move(h),std::move(chosen));
188  }
189 
190 
191  /// Fast calculation O(n*k) for the hypervolume selection problem.
192  /// for the selected points, it sets selected=true.
193  void hypSSP(std::vector<Point>& front,std::size_t k)const{
194  SHARK_RUNTIME_CHECK( k > 0, "k must be non-zero");
195  SHARK_RUNTIME_CHECK( k <= front.size(), "The front must have at least k nondominated points");
196 
197  std::size_t n = front.size();
198  std::vector<LinearFunction> functions(n);
199 
200  std::vector<std::vector<std::size_t> > chosen;
201  std::vector<double> h(n,0.0);
202  for(std::size_t j=0; j != k-1; ++j) {//compute until k-1 elements are chosen
203  for(std::size_t i=0; i != n; ++i ) {
204  functions[i] = LinearFunction( -front[i].f2, front[i].f1* front[i].f2 + h[i]);
205  }
206  auto result = upperEnvelope(functions, front);
207  h = result.first;
208  chosen.push_back(result.second);
209  }
210 
211  //choose the last element by simply iterating over all elements
212  std::size_t currentIndex = 0;
213  double res = -1;
214  for(std::size_t i=0; i != n; ++i ) {
215  LinearFunction f(-front[i].f2, front[i].f1*front[i].f2 + h[i]);
216  if(f.eval(0) > res) {
217  res = f.eval(0);
218  currentIndex = i;
219  }
220  }
221  front[currentIndex].selected = true;
222  //iterate backwards to reconstruct chosen indizes
223  for(auto pos = chosen.rbegin(); pos != chosen.rend(); ++pos){
224  currentIndex = (*pos)[currentIndex];
225  front[currentIndex].selected = true;
226  }
227  }
228 
229  template<typename Set>
230  std::vector<Point> createFront(Set const& points, double refX, double refY)const{
231  //copy points using the new reference frame with refPoint at (0,0). also store original index for later
232  std::vector<Point> front;
233  for(std::size_t i = 0; i != points.size(); ++i){
234  front.emplace_back(points[i](0) - refX, points[i](1) - refY,i);
235  }
236  std::sort(front.begin(),front.end());//sort lexicographically
237  //erase dominated points
238  auto newEnd = std::unique(front.begin(),front.end(),[](Point const& x, Point const& y){
239  return y.f2 >= x.f2;//by lexikographic sort we already have y.f1 >= x.f1
240  });
241  front.erase(newEnd,front.end());
242  return front;
243  }
244 public:
245  /// \brief Executes the algorithm.
246  /// While this algorithm in general accepts fronts with dominated points in it, the caller has to ensure
247  /// that after domination checks there are at least as many points left as there are to select. The
248  /// Algorithm will throw an exception otherwise.
249  ///
250  /// This can easily be ensured by removing the nondominated points prior to calling this function.
251  /// \param [in] points The set \f$S\f$ of points to select
252  /// \param [out] selected set of the same size as the set of points indicating whether the point is selected (1) or not (0)
253  /// \param [in] k number of points to select. Must be lrger than 0
254  /// \param [in] refPoint The reference point \f$\vec{r} \in \mathbb{R}^2\f$ for the hypervolume calculation, needs to fulfill: \f$ \forall s \in S: s \preceq \vec{r}\f$. .
255  template<typename Set, typename SelectedSet, typename VectorType >
256  void operator()( Set const& points, SelectedSet& selected, std::size_t k, VectorType const& refPoint){
257  SIZE_CHECK(points.size() == selected.size());
258  SHARK_RUNTIME_CHECK(k > 0, "k must be >0");
259  SHARK_RUNTIME_CHECK( k <= points.size(), "the number of points must be larger than k");
260  SIZE_CHECK( points.begin()->size() == 2 );
261  SIZE_CHECK( refPoint.size() == 2 );
262 
263  for(auto&& s: selected)
264  s = false;
265 
266  std::vector<Point> front = createFront(points, refPoint(0), refPoint(1));
267 
268  //find the optimal set in the front. afterwards selected points have selected=true
269  hypSSP(front,k);
270  //mark selected points in the original front
271  for(Point const& point: front){
272  if(point.selected){
273  selected[point.index] = true;
274  }
275  }
276  }
277 
278  /// \brief Executes the algorithm.
279  ///
280  /// This version does not use a reference point. instead the extreme points are always kept which implicitely defines a reference point
281  /// that after domination checks there are at least as many points left as there are to select. The
282  /// Algorithm will throw an exception otherwise.
283  ///
284  /// This can easily be ensured by removing the nondominated points prior to calling this function.
285  ///
286  /// \param [in] points The set \f$S\f$ of points to select
287  /// \param [out] selected set of the same size as the set of points indicating whether the point is selected (1) or not (0)
288  /// \param [in] k number of points to select, must be larger or equal 2
289  template<typename Set, typename SelectedSet>
290  void operator()( Set const& points, SelectedSet& selected, std::size_t k){
291  SIZE_CHECK(points.size() == selected.size());
292  SHARK_RUNTIME_CHECK( k >= 2, "k must be larger or equal 2");
293  SHARK_RUNTIME_CHECK( k <= points.size(), "the number of points mjust be larger than k");
294  SIZE_CHECK(points.size() == selected.size());
295  SIZE_CHECK( points.begin()->size() == 2 );
296 
297  for(auto&& s: selected)
298  s = false;
299 
300  //create front using "fake ref"
301  std::vector<Point> front = createFront(points, 0,0);
302 
303  //get reference value from extremal points
304  double refX= front.back().f1;
305  double refY= front.front().f2;
306 
307  for(auto&& point: front){
308  point.f1 -= refX;
309  point.f2 -= refY;
310  }
311 
312  //mark the extrema as chosen and remove them from the front
313  selected[front.front().index] = true;
314  selected[front.back().index] = true;
315  front.pop_back();
316  front.erase(front.begin(),front.begin()+1);
317  if(k == 2) return;
318 
319  //find the optimal set in the front. afterwards selected points have selected=true
320  hypSSP(front,k-2);
321  //mark selected points in the original front
322  for(Point const& point: front){
323  if(point.selected){
324  selected[point.index] = true;
325  }
326  }
327  }
328 };
329 
330 }
331 #endif