DCNonDominatedSort.h
Go to the documentation of this file.
1 ///
2 /// \brief Implements a divide-and-conquer non-dominated sorting algorithm.
3 ///
4 /// \author T. Glasmachers
5 /// \date 2016
6 ///
7 ///
8 /// \par Copyright 1995-2017 Shark Development Team
9 ///
10 /// <BR><HR>
11 /// This file is part of Shark.
12 /// <http://shark-ml.org/>
13 ///
14 /// Shark is free software: you can redistribute it and/or modify
15 /// it under the terms of the GNU Lesser General Public License as published
16 /// by the Free Software Foundation, either version 3 of the License, or
17 /// (at your option) any later version.
18 ///
19 /// Shark is distributed in the hope that it will be useful,
20 /// but WITHOUT ANY WARRANTY; without even the implied warranty of
21 /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 /// GNU Lesser General Public License for more details.
23 ///
24 /// You should have received a copy of the GNU Lesser General Public License
25 /// along with Shark. If not, see <http://www.gnu.org/licenses/>.
26 ///
27 
28 #ifndef SHARK_ALGORITHMS_DIRECTSEARCH_OPERATORS_DOMINATION_DCNONDOMINATEDSORT_H
29 #define SHARK_ALGORITHMS_DIRECTSEARCH_OPERATORS_DOMINATION_DCNONDOMINATEDSORT_H
30 
32 #include <shark/LinAlg/Base.h>
33 #include <vector>
34 #include <list>
35 #include <set>
36 #include <map>
37 #include <utility>
38 #include <algorithm>
39 #include <functional>
40 
41 
42 namespace shark {
43 
44 
45 ///
46 /// \brief Divide-and-conquer algorithm for non-dominated sorting.
47 ///
48 /// Assembles subsets/fronts of mutually non-dominated individuals.
49 /// Afterwards every individual is assigned a rank by pop[i].rank() = frontIndex.
50 /// The front of non-dominated points has the value 1.
51 ///
52 /// The algorithm is described in:
53 /// Generalizing the Improved Run-Time Complexity Algorithm for Non-Dominated Sorting.
54 /// Felix-Antoine Fortin, Simon Grenier, and Marc Parizeau,
55 /// Proceedings Genetic and Evolutionary Computation Conference (GECCO), pp. 615-622, 2013.
56 ///
57 /// The algorithm is based on an earlier publication by Jensen, 2003. Its runtime
58 /// complexity for n points and m objectives is claimed to be as low as
59 /// \f$ \mathcal{O}(n \log(n)^{m-1}) \f$, which is often better than the complexity
60 /// \f$ \mathcal{O}(n^2 m) \f$ of the of "fast non-dominated sorting" algorithm,
61 /// in particular for small m.
62 ///
63 /// However, the implementation has a complexity of \f$ \mathcal{O}(n \log(n)^{m-2}c) \f$,
64 /// where c is the number of distinct non-dominance ranks. If can hence be as bad as
65 /// \f$ \mathcal{O}(n^2 \log(n)^{m-2}) \f$, but in practice it is usually fast.
66 /// The same applies to the reference implementation in the DEAP library (at the time
67 /// of writing, March 2016).
68 ///
70 {
71 private:
72  struct Point
73  {
74  Point(): frt(1){}
75 
76  Point(RealVector const& objvec)
77  : obj(objvec.raw_storage().values)
78  , frt(1)
79  , m_size(objvec.size())
80  {}
81 
82  bool operator<(Point const& rhs) const{//for lexicographic sorting
83  for (std::size_t i=0; i<m_size; i++)
84  {
85  if (obj[i] < rhs.obj[i]) return true;
86  if (obj[i] > rhs.obj[i]) return false;
87  }
88  return false;
89  }
90  bool operator==(Point const& rhs) const{//for std::unique
91  for (std::size_t i=0; i<m_size; i++)
92  {
93  if (obj[i] != rhs.obj[i]) return false;
94  }
95  return true;
96  }
97 
98 
99  double const* obj;
100  unsigned int frt; // front index (1-based)
101  std::size_t m_size;
102  };
103 
104  typedef std::vector<Point*> ContainerType;
105  typedef std::map<unsigned int, double> MapType;
106  typedef std::set<std::size_t, std::greater<std::size_t> > SetType;
107  typedef std::map<double, SetType > InverseMapType;
108 
109 public:
110  /// \brief Executes the non-dominated sorting algorithm.
111  ///
112  /// Afterwards every individual is assigned a rank by pop[i].rank() = frontNumber.
113  /// The front of dominating points has the value 1.
114  ///
115  template<typename PointRange, typename RankRange>
116  void operator () (PointRange const& pointRange, RankRange& rankRange)
117  {
118  SIZE_CHECK(pointRange.size() == rankRange.size());
119  std::size_t m = pointRange[0].size();
120  // prepare set of unique objective vectors
121  std::vector<Point> points(pointRange.size());
122  std::transform(pointRange.begin(),pointRange.end(),points.begin(),[](RealVector const& point){
123  return Point(point);
124  });
125  std::sort(points.begin(),points.end());
126  points.erase(std::unique(points.begin(),points.end()),points.end());
127  ContainerType S(points.size());
128  for (std::size_t i = 0; i != points.size(); ++i)
129  {
130  S[i] = &points[i];
131  }
132 
133  // call recursive algorithm
134  ndHelperA(S,m);
135 
136  // assign ranks to individuals
137  for (std::size_t i = 0; i != pointRange.size(); ++i)
138  {
139  auto it = std::lower_bound(points.begin(), points.end(), Point(pointRange[i]));
140  SHARK_ASSERT(it != points.end());
141  rankRange[i] = it->frt;
142  }
143  }
144 
145 private:
146  // Dominance relation restricted to the first k components of the objective vector.
147  static DominanceRelation dominance(Point const* lhs, Point const* rhs, std::size_t k)
148  {
149  return shark::dominance(blas::adapt_vector(k,lhs->obj),blas::adapt_vector(k,rhs->obj));
150  }
151 
152  // figure 2 in the paper
153  //
154  // This function performs non-dominated sorting of S according to
155  // the first k objectives, while taking previously set front index
156  // values into account (which stem from dominance by points external
157  // to S).
158  void ndHelperA(ContainerType& S, std::size_t k)
159  {
160  if (S.size() < 2) return;
161  if (S.size() == 2)
162  {
163  if (dominance(S[0], S[1], k) == LHS_DOMINATES_RHS)
164  {
165  S[1]->frt = std::max(S[1]->frt, S[0]->frt + 1);
166  }
167  return;
168  }
169  if (k == 2)
170  {
171  sweepA(S);
172  return;
173  }
174 
175  // check condition |\{s_k | s \in S\}| = 1
176  bool k_equal = true;
177  for (std::size_t i=1; i<S.size(); i++)
178  {
179  if (S[0]->obj[k-1] != S[i]->obj[k-1])
180  {
181  k_equal = false;
182  break;
183  }
184  }
185 
186  if (k_equal)
187  {
188  ndHelperA(S, k-1);
189  return;
190  }
191  else
192  {
193  ContainerType L, H;
194  splitA(S, k, L, H);
195  ndHelperA(L, k);
196  ndHelperB(L, H, k-1);
197  ndHelperA(H, k);
198  return;
199  }
200  }
201 
202  // figure 3 in the paper
203  //
204  // Same functionality as ndHelperA, but a specialized sweeping
205  // algorithm for two objectives.
206  //
207  // Note: the paper (and also the original paper by Jensen) states
208  // that this is an O(n log(n)) operation. However, the reference
209  // implementation in the DEAP library implements it as an O(n^2)
210  // operation, or more exactly, as an O(n c) operation, where c is
211  // the number of fronts. It seems that this is because Jensen's
212  // O(n log(c)) algorithm works only if all initial FRT values
213  // coincide, which holds in the bi-objective case, but not in the
214  // more general case needed here.
215  void sweepA(ContainerType& S)
216  {
217  MapType T;
218  T[S[0]->frt] = S[0]->obj[1];
219  for (std::size_t i=1; i<S.size(); i++)
220  {
221  // O(T.size()) operation, should be logarithmic...?
222  unsigned int r = 0;
223  for (auto p : T)
224  {
225  if (p.second <= S[i]->obj[1]) r = std::max(r, p.first);
226  }
227 
228  if (r > 0) S[i]->frt = std::max(S[i]->frt, r + 1);
229 
230  T[S[i]->frt] = S[i]->obj[1];
231  }
232  }
233 
234  // median of the k-th objective over S
235  double median(ContainerType const& S, std::size_t k)
236  {
237  std::vector<double> value(S.size());
238  for (std::size_t i=0; i<S.size(); i++) value[i] = S[i]->obj[k];
239  std::nth_element(value.begin(), value.begin() + value.size() / 2, value.end());
240  double ret = value[value.size() / 2];
241  if (S.size() & 1) return ret;
242  ret += *std::max_element(value.begin(), value.begin() + value.size() / 2);
243  return ret / 2.0;
244  }
245 
246  // figure 5 in the paper
247  //
248  // Split the set S according to the median in objective k
249  // (one-based index) as balanced as possible into subsets
250  // L and H. The subsets remain lexicographically sorted.
251  void splitA(ContainerType const& S, std::size_t k, ContainerType& L, ContainerType& H)
252  {
253  k--; // index is zero-based
254  SHARK_ASSERT(L.empty());
255  SHARK_ASSERT(H.empty());
256  double med = median(S, k);
257  ContainerType La, Lb, Ha, Hb;
258  for (std::size_t i=0; i<S.size(); i++)
259  {
260  double v = S[i]->obj[k];
261  if (v < med)
262  {
263  La.push_back(S[i]);
264  Lb.push_back(S[i]);
265  }
266  else if (v > med)
267  {
268  Ha.push_back(S[i]);
269  Hb.push_back(S[i]);
270  }
271  else
272  {
273  La.push_back(S[i]);
274  Hb.push_back(S[i]);
275  }
276  }
277  if (Lb.size() < Ha.size())
278  {
279  using namespace std;
280  swap(L, La);
281  swap(H, Ha);
282  }
283  else
284  {
285  using namespace std;
286  swap(L, Lb);
287  swap(H, Hb);
288  }
289  }
290 
291  // figure 7 in the paper
292  //
293  // preconditions:
294  // * sets L and H are lexicographically orderes
295  // * all points in L are better than those in H according to objective k
296  // * the front indices for L are final
297  // * the front indices of H are all 1
298  // *
299  // postconditions:
300  // * each front index of h \in H is assigned the max of the front indices of l \in L dominating h, plus 1
301  void ndHelperB(ContainerType const& L, ContainerType& H, std::size_t k)
302  {
303  if (L.empty() || H.empty()) return;
304  if (L.size() == 1 || H.size() == 1)
305  {
306  for (std::size_t j=0; j<H.size(); j++)
307  {
308  for (std::size_t i=0; i<L.size(); i++)
309  {
310  DominanceRelation rel = dominance(L[i], H[j], k);
311  if (rel == LHS_DOMINATES_RHS || rel == EQUIVALENT)
312  {
313  H[j]->frt = std::max(H[j]->frt, L[i]->frt + 1);
314  }
315  }
316  }
317  return;
318  }
319  if (k == 2)
320  {
321  sweepB(L, H);
322  return;
323  }
324  double minLk = L[0]->obj[k-1];
325  double maxLk = L[0]->obj[k-1];
326  for (std::size_t i=1; i<L.size(); i++)
327  {
328  double v = L[i]->obj[k-1];
329  minLk = std::min(minLk, v);
330  maxLk = std::max(maxLk, v);
331  }
332  double minHk = H[0]->obj[k-1];
333  double maxHk = H[0]->obj[k-1];
334  for (std::size_t i=1; i<H.size(); i++)
335  {
336  double v = H[i]->obj[k-1];
337  minHk = std::min(minHk, v);
338  maxHk = std::max(maxHk, v);
339  }
340  if (maxLk <= minHk)
341  {
342  ndHelperB(L, H, k-1);
343  return;
344  }
345  if (minLk <= maxHk)
346  {
347  ContainerType L1, L2, H1, H2;
348  splitB(L, H, k, L1, L2, H1, H2);
349  ndHelperB(L1, H1, k);
350  ndHelperB(L1, H2, k - 1);
351  ndHelperB(L2, H2, k);
352  return;
353  }
354  }
355 
356  // figure 8 in the paper
357  //
358  // Same as ndHelperB, but a specialized sweeping algorithm for two objectives.
359  void sweepB(ContainerType const& L, ContainerType& H)
360  {
361  MapType T;
362  std::size_t i = 0;
363  for (std::size_t j=0; j<H.size(); j++)
364  {
365  while (i < L.size())
366  {
367  if (L[i]->obj[0] > H[j]->obj[0]) break;
368  if (L[i]->obj[0] == H[j]->obj[0] && L[i]->obj[1] > H[j]->obj[1]) break;
369  auto it = T.find(L[i]->frt);
370  if (it == T.end() || L[i]->obj[1] < it->second)
371  {
372  T[L[i]->frt] = L[i]->obj[1];
373  }
374  i++;
375  }
376 
377  // O(T.size()) operation, should be logarithmic...
378  unsigned int r = 0;
379  for (auto p : T)
380  {
381  if (p.second <= H[j]->obj[1]) r = std::max(r, p.first);
382  }
383 
384  if (r > 0) // U \not= \{\}
385  {
386  H[j]->frt = std::max(H[j]->frt, r + 1);
387  }
388  }
389  }
390 
391  // figure 9 in the paper
392  //
393  // This function splits the sets L and H into subsets L1, L2 and H1, H2,
394  // respectively, using a common split point in objective k. The split
395  // point is optimized to balance the subsets. The subsets remain
396  // lexicographically sorted.
397  void splitB(ContainerType const& L, ContainerType const& H, std::size_t k,
398  ContainerType& L1, ContainerType& L2, ContainerType& H1, ContainerType& H2)
399  {
400  k--; // index is zero-based
401  double pivot = median((L.size() > H.size()) ? L : H, k);
402 
403  ContainerType L1a, L1b, L2a, L2b;
404  for (std::size_t i=0; i<L.size(); i++)
405  {
406  double v = L[i]->obj[k];
407  if (v < pivot)
408  {
409  L1a.push_back(L[i]);
410  L1b.push_back(L[i]);
411  }
412  else if (v > pivot)
413  {
414  L2a.push_back(L[i]);
415  L2b.push_back(L[i]);
416  }
417  else
418  {
419  L1a.push_back(L[i]);
420  L2b.push_back(L[i]);
421  }
422  }
423 
424  ContainerType H1a, H1b, H2a, H2b;
425  for (std::size_t i=0; i<H.size(); i++)
426  {
427  double v = H[i]->obj[k];
428  if (v < pivot)
429  {
430  H1a.push_back(H[i]);
431  H1b.push_back(H[i]);
432  }
433  else if (v > pivot)
434  {
435  H2a.push_back(H[i]);
436  H2b.push_back(H[i]);
437  }
438  else
439  {
440  H1a.push_back(H[i]);
441  H2b.push_back(H[i]);
442  }
443  }
444 
445  if (L1b.size() + H1b.size() <= L2a.size() + H2a.size())
446  {
447  using namespace std;
448  swap(L1, L1a);
449  swap(L2, L2a);
450  swap(H1, H1a);
451  swap(H2, H2a);
452  }
453  else
454  {
455  using namespace std;
456  swap(L1, L1b);
457  swap(L2, L2b);
458  swap(H1, H1b);
459  swap(H2, H2b);
460  }
461  }
462 };
463 
464 
465 template<class PointRange, class RankRange>
466 void dcNonDominatedSort(PointRange const& points, RankRange& ranks) {
467  BaseDCNonDominatedSort sorter;
468  sorter(points,ranks);
469 }
470 
471 } // namespace shark
472 #endif