VDCMA.h
Go to the documentation of this file.
1 /*!
2  * \brief Implements the VD-CMA-ES Algorithm
3  *
4  * \author Oswin Krause
5  * \date April 2014
6  *
7  * \par Copyright 1995-2017 Shark Development Team
8  *
9  * <BR><HR>
10  * This file is part of Shark.
11  * <http://shark-ml.org/>
12  *
13  * Shark is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU Lesser General Public License as published
15  * by the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * Shark is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public License
24  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
25  *
26  */
27 
28 #ifndef SHARK_ALGORITHMS_DIRECT_SEARCH_VD_CMA_H
29 #define SHARK_ALGORITHMS_DIRECT_SEARCH_VD_CMA_H
30 
34 
37 
38 
39 /// \brief Implements the VD-CMA-ES Algorithm
40 ///
41 /// The VD-CMA-ES implements a restricted form of the CMA-ES where the covariance matrix is restriced to be (D+vv^T)
42 /// where D is a diagonal matrix and v a single vector. Therefore this variant is capable of large-scale optimisation
43 ///
44 /// For more reference, see the paper
45 /// Akimoto, Y., A. Auger, and N. Hansen (2014). Comparison-Based Natural Gradient Optimization in High Dimension.
46 /// To appear in Genetic and Evolutionary Computation Conference (GECCO 2014), Proceedings, ACM
47 ///
48 /// The implementation differs from the paper to be closer to the reference implementation and to have better numerical
49 /// accuracy.
50 namespace shark {
51 class VDCMA : public AbstractSingleObjectiveOptimizer<RealVector >
52 {
53 private:
54  double chi( std::size_t n ) {
55  return( std::sqrt( static_cast<double>( n ) )*(1. - 1./(4.*n) + 1./(21.*n*n)) );
56  }
57 public:
58 
59  /// \brief Default c'tor.
60  VDCMA(random::rng_type& rng = random::globalRng):m_initialSigma(0.0), mpe_rng(&rng){
62  }
63 
64  /// \brief From INameable: return the class name.
65  std::string name() const
66  { return "VDCMA-ES"; }
67 
68  /// \brief Calculates lambda for the supplied dimensionality n.
69  std::size_t suggestLambda( std::size_t dimension ) {
70  return unsigned( 4. + ::floor( 3. * ::log( static_cast<double>( dimension ) ) ) ); // eq. (44)
71  }
72 
73  /// \brief Calculates mu for the supplied lambda and the recombination strategy.
74  std::size_t suggestMu( std::size_t lambda) {
75  return lambda / 2; // eq. (44)
76  }
77 
79 
80  void init( ObjectiveFunctionType const& function, SearchPointType const& p) {
81  checkFeatures(function);
82 
83  std::size_t lambda = suggestLambda( p.size() );
84  std::size_t mu = suggestMu( lambda );
85  double sigma = m_initialSigma;
86  if(m_initialSigma == 0)
87  sigma = 1.0/std::sqrt(double(p.size()));
88 
89  init( function,
90  p,
91  lambda,
92  mu,
93  sigma
94  );
95  }
96 
97  /// \brief Initializes the algorithm for the supplied objective function.
98  void init(
99  ObjectiveFunctionType const& function,
100  SearchPointType const& initialSearchPoint,
101  std::size_t lambda,
102  std::size_t mu,
103  double initialSigma
104  ) {
105 
106  m_numberOfVariables = function.numberOfVariables();
107  m_lambda = lambda;
108  m_mu = mu;
109  m_sigma = initialSigma;
110 
111  m_mean = blas::repeat(0.0,m_numberOfVariables);
112  m_vn.resize(m_numberOfVariables);
113  for(std::size_t i = 0; i != m_numberOfVariables;++i){
114  m_vn(i) = random::uni(*mpe_rng,0,1.0/m_numberOfVariables);
115  }
116  m_normv = norm_2(m_vn);
117  m_vn /= m_normv;
118 
119  m_D = blas::repeat(1.0,m_numberOfVariables);
120  m_evolutionPathC = blas::repeat(0.0,m_numberOfVariables);
121  m_evolutionPathSigma = blas::repeat(0.0,m_numberOfVariables);
122 
123  //set initial point
124  m_mean = initialSearchPoint;
125  m_best.point = initialSearchPoint;
126  m_best.value = function(initialSearchPoint);
127 
128  m_counter = 0;//first iteration
129 
130  //weighting of the mu-best individuals
131  m_weights.resize(m_mu);
132  for (std::size_t i = 0; i < m_mu; i++){
133  m_weights(i) = ::log(mu + 0.5) - ::log(1. + i);
134  }
135  m_weights /= sum(m_weights);
136 
137  // constants based on (4) and Step 3 in the algorithm
138  m_muEff = 1. / sum(sqr(m_weights)); // equal to sum(m_weights)^2 / sum(sqr(m_weights))
139  m_cSigma = 0.5/(1+std::sqrt(m_numberOfVariables/m_muEff));
140  m_dSigma = 1. + 2. * std::max(0., std::sqrt((m_muEff-1.)/(m_numberOfVariables+1)) - 1.) + m_cSigma;
141 
142  m_cC = (4. + m_muEff / m_numberOfVariables) / (m_numberOfVariables + 4. + 2 * m_muEff / m_numberOfVariables);
143  double correction = (m_numberOfVariables - 5.0)/6.0;
144  m_c1 = correction*2 / (sqr(m_numberOfVariables + 1.3) + m_muEff);
145  m_cMu = std::min(1. - m_c1, correction* 2 * (m_muEff - 2. + 1./m_muEff) / (sqr(m_numberOfVariables + 2) + m_muEff));
146  }
147 
148  /// \brief Executes one iteration of the algorithm.
149  void step(ObjectiveFunctionType const& function){
151  std::vector< IndividualType > offspring( m_lambda );
152 
153  PenalizingEvaluator penalizingEvaluator;
154  for( std::size_t i = 0; i < offspring.size(); i++ ) {
155  createSample(offspring[i].searchPoint(),offspring[i].chromosome());
156  }
157  penalizingEvaluator( function, offspring.begin(), offspring.end() );
158 
159  // Selection
160  std::vector< IndividualType > parents( m_mu );
162  selection(offspring.begin(),offspring.end(),parents.begin(), parents.end());
163  // Strategy parameter update
164  m_counter++; // increase generation counter
165  updateStrategyParameters( parents );
166 
167  m_best.point= parents[ 0 ].searchPoint();
168  m_best.value= parents[ 0 ].unpenalizedFitness();
169  }
170 
171  /// \brief Accesses the current step size.
172  double sigma() const {
173  return m_sigma;
174  }
175 
176  /// \brief Accesses the current step size.
177  void setSigma(double sigma) {
178  m_sigma = sigma;
179  }
180 
181  /// \brief set the initial step size of the algorithm.
182  ///
183  /// Sets the initial sigma at init to a given value. If this is 0, which it is
184  /// by default, the default initialisation will be sigma= 1/sqrt(N) where N
185  /// is the number of variables to optimize.
186  ///
187  /// this method is the prefered one instead of init()
188  void setInitialSigma(double initialSigma){
189  m_initialSigma = initialSigma;
190  }
191 
192 
193  /// \brief Accesses the current population mean.
194  RealVector const& mean() const {
195  return m_mean;
196  }
197 
198  /// \brief Accesses the current weighting vector.
199  RealVector const& weights() const {
200  return m_weights;
201  }
202 
203  /// \brief Accesses the evolution path for the covariance matrix update.
204  RealVector const& evolutionPath() const {
205  return m_evolutionPathC;
206  }
207 
208  /// \brief Accesses the evolution path for the step size update.
209  RealVector const& evolutionPathSigma() const {
210  return m_evolutionPathSigma;
211  }
212 
213  ///\brief Returns the size of the parent population \f$\mu\f$.
214  std::size_t mu() const {
215  return m_mu;
216  }
217 
218  ///\brief Returns a mutabl reference to the size of the parent population \f$\mu\f$.
219  std::size_t& mu(){
220  return m_mu;
221  }
222 
223  ///\brief Returns a immutable reference to the size of the offspring population \f$\mu\f$.
224  std::size_t lambda()const{
225  return m_lambda;
226  }
227 
228  ///\brief Returns a mutable reference to the size of the offspring population \f$\mu\f$.
229  std::size_t & lambda(){
230  return m_lambda;
231  }
232 
233 private:
234  /// \brief Updates the strategy parameters based on the supplied offspring population.
235  ///
236  /// The chromosome stores the y-vector that is the step from the mean in D=1, sigma=1 space.
237  void updateStrategyParameters( std::vector<Individual<RealVector, double, RealVector> >& offspring ) {
238  RealVector m( m_numberOfVariables, 0. );
239  RealVector z( m_numberOfVariables, 0. );
240 
241  for( std::size_t j = 0; j < offspring.size(); j++ ){
242  noalias(m) += m_weights( j ) * offspring[j].searchPoint();
243  noalias(z) += m_weights( j ) * offspring[j].chromosome();
244  }
245  //compute z from y= (1+(sqrt(1+||v||^2)-1)v_n v_n^T)z
246  //therefore z= (1+(1/sqrt(1+||v||^2)-1)v_n v_n^T)y
247  double b=(1/std::sqrt(1+sqr(m_normv))-1);
248  noalias(z)+= b*inner_prod(z,m_vn)*m_vn;
249 
250  //update paths
251  noalias(m_evolutionPathSigma) = (1. - m_cSigma)*m_evolutionPathSigma + std::sqrt( m_cSigma * (2. - m_cSigma) * m_muEff ) * z;
252  // compute h_sigma
253  double hSigLHS = norm_2( m_evolutionPathSigma ) / std::sqrt(1. - pow((1 - m_cSigma), 2.*(m_counter+1)));
254  double hSigRHS = (1.4 + 2 / (m_numberOfVariables+1.)) * chi( m_numberOfVariables );
255  double hSig = 0;
256  if(hSigLHS < hSigRHS) hSig = 1.;
257  noalias(m_evolutionPathC) = (1. - m_cC ) * m_evolutionPathC + hSig * std::sqrt( m_cC * (2. - m_cC) * m_muEff ) * (m - m_mean) / m_sigma;
258 
259 
260 
261  //we split the computation of s and t in the paper in two parts
262  //we compute the first two steps and then compute the weighted mean over samples and
263  //evolution path. afterwards we compute the rest using the mean result
264  //the paper describes this as first computing S and T for all samples and compute the weighted
265  //mean of that, but the reference implementation does it the other way to prevent numerical instabilities
266  RealVector meanS(m_numberOfVariables,0.0);
267  RealVector meanT(m_numberOfVariables,0.0);
268  for(std::size_t j = 0; j != mu(); ++j){
269  computeSAndTFirst(offspring[j].chromosome(),meanS,meanT,m_cMu*m_weights(j));
270  }
271  computeSAndTFirst(m_evolutionPathC/m_D,meanS,meanT,hSig*m_c1);
272 
273  //compute the remaining mean S and T steps
274  computeSAndTSecond(meanS,meanT);
275 
276  //compute update to v and d
277  noalias(m_D) += m_D*meanS;
278  noalias(m_vn) = m_vn*m_normv+meanT/m_normv;//result is v and not vn
279  //store the new v separately as vn and its norm
280  m_normv = norm_2(m_vn);
281  m_vn /= m_normv;
282 
283  //update step length
284  m_sigma *= std::exp( (m_cSigma / m_dSigma) * (norm_2(m_evolutionPathSigma)/ chi( m_numberOfVariables ) - 1.) ); // eq. (39)
285 
286  //update mean
287  m_mean = m;
288  }
289 
290  //samples a point and stores additionally y=(x-m_mean)/(sigma*D)
291  //as this is required for calculation later
292  void createSample(RealVector& x,RealVector& y)const{
293  x.resize(m_numberOfVariables);
294  y.resize(m_numberOfVariables);
295  for(std::size_t i = 0; i != m_numberOfVariables; ++i){
296  y(i) = random::gauss(*mpe_rng,0,1);
297  }
298  double a = std::sqrt(1+sqr(m_normv))-1;
299  a *= inner_prod(y,m_vn);
300  noalias(y) +=a*m_vn;
301  noalias(x) = m_mean+ m_sigma*m_D*y;
302  }
303 
304  ///\brief computes the sample wise first two steps of S and T of theorem 3.6 in the paper
305  ///
306  /// S and T arguments accordingly
307  void computeSAndTFirst(RealVector const& y, RealVector& s,RealVector& t, double weight )const{
308  if(weight == 0) return;//nothing to do
309  double yvn = inner_prod(y,m_vn);
310  double normv2 = sqr(m_normv);
311  double gammav = 1+normv2;
312  //step 1
313  noalias(s) += weight*(sqr(y) - (normv2/gammav*yvn)*(y*m_vn)- 1.0);
314  //step 2
315  noalias(t) += weight*(yvn*y - 0.5*(sqr(yvn)+gammav)*m_vn);
316  }
317 
318  ///\brief computes the last three steps of S and T of theorem 3.6 in the paper
319  void computeSAndTSecond(RealVector& s,RealVector& t)const{
320  RealVector vn2 = m_vn*m_vn;
321  double normv2 = sqr(m_normv);
322  double gammav = 1+normv2;
323  //alpha of 3.5
324  double alpha = sqr(normv2)+(2*gammav - std::sqrt(gammav))/max(vn2);
325  alpha=std::sqrt(alpha);
326  alpha /= 2+normv2;
327  alpha = std::min(alpha,1.0);
328  //constants (b,A) of 3.4
329  double b=-(1-sqr(alpha))*sqr(normv2)/gammav+2*sqr(alpha);
330  RealVector A= 2.0 - (b+2*sqr(alpha))*vn2;
331  RealVector invAvn2= vn2/A;
332 
333  //step 3
334  noalias(s) -= alpha/gammav*((2+normv2)*(m_vn*t)-normv2*inner_prod(m_vn,t)*vn2);
335  //step 4
336  noalias(s) = s/A -b*inner_prod(s,invAvn2)/(1+b*inner_prod(vn2,invAvn2))*invAvn2;
337  //step 5
338  noalias(t) -= alpha*((2+normv2)*(m_vn*s)-inner_prod(s,vn2)*m_vn);
339  }
340 
341  std::size_t m_numberOfVariables; ///< Stores the dimensionality of the search space.
342  std::size_t m_mu; ///< The size of the parent population.
343  std::size_t m_lambda; ///< The size of the offspring population, needs to be larger than mu.
344 
345  double m_initialSigma;///0 by default which indicates initial sigma = 1/sqrt(N)
346  double m_sigma;
347  double m_cC;
348  double m_c1;
349  double m_cMu;
350  double m_cSigma;
351  double m_dSigma;
352  double m_muEff;
353 
354  RealVector m_mean;
355  RealVector m_weights;
356 
357  RealVector m_evolutionPathC;
358  RealVector m_evolutionPathSigma;
359 
360  ///\brief normalised vector v
361  RealVector m_vn;
362  ///\brief norm of the vector v, therefore v=m_vn*m_normv
363  double m_normv;
364 
365  RealVector m_D;
366 
367  unsigned m_counter; ///< counter for generations
368 
369  random::rng_type* mpe_rng;
370 
371 
372 };
373 
374 }
375 
376 #endif