GruauPole.h
Go to the documentation of this file.
1 /*!
2  *
3  * \brief Objective function for double non-Markov pole with special fitness functions
4  *
5  * Class for balancing two poles on a cart using a fitness function
6  * that punishes oscillating, i.e. quickly moving the cart back and
7  * forth to balance the poles.
8  *
9  * Based on code written by Verena Heidrich-Meisner for the paper
10  *
11  * V. Heidrich-Meisner and
12  * C. Igel. Neuroevolution strategies for episodic reinforcement
13  * learning. Journal of Algorithms, 64(4):152–168, 2009.
14  *
15  * Special fitness function from the paper
16  *
17  * F. Gruau, D. Whitley, L. Pyeatt, A comparison between cellular
18  * encoding and direct encoding for genetic neural networks, in:
19  * J.R. Koza, D.E. Goldberg, D.B. Fogel, R.L. Riol (Eds.), Genetic
20  * Programming 1996: Proceedings of the First Annual Conference, MIT
21  * Press, 1996, pp. 81–89.
22  *
23  * \author Johan Valentin Damgaard
24  * \date -
25  *
26  *
27  * \par Copyright 1995-2017 Shark Development Team
28  *
29  * This file is part of Shark.
30  * <http://shark-ml.org/>
31  *
32  * Shark is free software: you can redistribute it and/or modify
33  * it under the terms of the GNU Lesser General Public License as published
34  * by the Free Software Foundation, either version 3 of the License, or
35  * (at your option) any later version.
36  *
37  * Shark is distributed in the hope that it will be useful,
38  * but WITHOUT ANY WARRANTY; without even the implied warranty of
39  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
40  * GNU Lesser General Public License for more details.
41  *
42  * You should have received a copy of the GNU Lesser General Public License
43  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
44  *
45  */
46 #ifndef SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_GRUAU_OBJECTIVE_FUNCTION
47 #define SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_GRUAU_OBJECTIVE_FUNCTION
48 
49 #include <iostream>
50 #include <exception>
51 
54 #include <shark/LinAlg/Base.h>
55 
57 
58 namespace shark {
59 
60 //! \brief
61 //!
62 //! Class for balancing two poles on a cart using a fitness function that punishes
63 //! oscillating, i.e. quickly moving the cart back and forth to balance the poles.
64 //! Based on code written by Verena Heidrich-Meisner for the paper
65 //!
66 //! V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for episodic reinforcement learn-ing. Journal of Algorithms, 64(4):152–168, 2009.
67 //!
68 //!
69 //! Special fitness function from the paper
70 //!
71 //! F. Gruau, D. Whitley, L. Pyeatt, A comparison between cellular encoding and direct encoding for genetic neural networks, in: J.R. Koza, D.E. Goldberg,
72 //! D.B. Fogel, R.L. Riol (Eds.), Genetic Programming 1996: Proceedings of the First Annual Conference, MIT Press, 1996, pp. 81–89.
74 
75 public:
76  //! \param hidden Number of hidden neurons in underlying neural network
77  //! \param bias Whether to use bias in neural network
78  //! \param sigmoidType Activation sigmoid function for neural network
79  //! \param normalize Whether to normalize input before use in neural network
80  //! \param max_pole_evaluations Balance goal of the function, i.e. number of steps that pole should be able to balance without failure
81  GruauPole(std::size_t hidden, bool bias,
83  bool normalize = true,
84  std::size_t max_pole_evaluations = 1000)
85  : m_maxPoleEvals(max_pole_evaluations),
86  m_normalize(normalize) {
87 
88  if (sigmoidType == RecurrentStructure::Linear) {
89  std::cerr << "Cannot use linear activation function for pole balancing."
90  << std::endl;
91  exit(EXIT_FAILURE);
92  }
93 
94  std::size_t inputs = 3;
95 
96  // set features
98 
99  // set number of variables/weights.
100  // number of outputs is always 1.
101  // dimensions depend on whether we use bias
102  if (bias){
103  m_dimensions = (hidden + 1) * (hidden + 1) +
104  inputs * (hidden + 1) + hidden + 1;
105  }
106  else {
107  m_dimensions = (hidden + 1) * (hidden + 1) + inputs * (hidden + 1);
108  }
109 
110  // make RNNet
111  mp_struct = new RecurrentStructure();
112  mp_struct->setStructure(inputs, hidden, 1, bias, sigmoidType);
113  mp_net = new PoleRNNet(mp_struct);
114 
115  // check dimensions match
116  if(m_dimensions != mp_net->numberOfParameters()) {
117  std::cerr << "Gruau pole RNNet: Dimensions do not match, "
118  << m_dimensions
119  << " != " << mp_net->numberOfParameters() << std::endl;
120  exit(EXIT_FAILURE);
121  }
122 
123  // set eval count
125  }
126 
128  delete mp_struct;
129  delete mp_net;
130  }
131 
132 
133  std::string name() {
134  return "Objective Function for non-Markov double pole with Gruau fitness.";
135  }
136 
137  //! \brief Returns degrees of freedom
138  std::size_t numberOfVariables()const{
139  return m_dimensions;
140  }
141 
142  //! \brief Always proposes to start in a zero vector with appropriate degrees of freedom
144  SearchPointType startingPoint(m_dimensions);
145  for(std::size_t i = 0; i != m_dimensions; i++) {
146  startingPoint(i) = 0.0; // starting mean = 0
147  }
148  return startingPoint;
149  }
150 
151  //! \brief Evaluates weight vector on special fitness function from Gruau paper
152  //! \param input Vector to be evaluated.
153  //! \return Fitness of vector
154  ResultType eval(const SearchPointType &input) const{
155  SIZE_CHECK(input.size() == m_dimensions );
157  return gruauFit(input);
158  }
159 
160  //! \brief Evaluates weight vector on special fitness function from Gruau paper
161  //! \param input Vector to be evaluated.
162  //! \return Fitness of vector
163  ResultType gruauFit(const SearchPointType &input) const{
164  SIZE_CHECK(input.size() == m_dimensions );
165  double init_angle = 0.07;
166  DoublePole pole(false, m_normalize);
167  RealVector state(3);
168  RealMatrix output(1,1);
169  double totalJiggle = 0;
170  std::size_t jiggleSize = 100;
171  double jiggle[jiggleSize]; // jiggle for last 100 evals
172  std::size_t eval_count = 0;
173  bool failed = false;
174 
175  pole.init(init_angle);
176  mp_net->resetInternalState();
177  mp_net->setParameterVector(input);
178 
179  while(!failed && eval_count < m_maxPoleEvals) {
180  pole.getState(state);
181  RealMatrix newState(1,3);
182  row(newState,0) = state;
183  mp_net->eval(newState,output);
184  pole.move(convertToPoleMovement(output(0,0)));
185  jiggle[eval_count % jiggleSize] = pole.getJiggle();
186  failed = pole.failure();
187  eval_count++;
188  }
189 
190  if(eval_count >= jiggleSize){
191  for(std::size_t i = 0; i < jiggleSize;i++){
192  totalJiggle += jiggle[i];
193  }
194  totalJiggle = .75 / totalJiggle;
195  }
196 
197  // fitness gets lower the less the pole jiggles and the more it balances.
198  // no theoretical minimum, but usually close to maxFit
199  // maxFit set to 1 for precision reasons
200  // never seen the fitness result go below 0.2 in experiments
201  std::size_t maxFit = 1.0;
202  double result = maxFit - (0.1 * eval_count / 1000
203  + 0.9 * totalJiggle);
204 
205  return result;
206  }
207 
208  //! \brief Evaluates weight vector on normal balancing function
209  //! \param input Vector to be evaluated.
210  //! \param maxEvals Balance goal of the function, i.e. number of steps that pole should be able to balance without failure
211  //! \return Fitness of vector
213  std::size_t maxEvals = 100000) {
214  SIZE_CHECK(input.size() == m_dimensions);
215  double init_angle = 0.07;
216  DoublePole pole(false, m_normalize);
217  RealVector state(3);
218  RealMatrix output(1,1);
219  RealMatrix inState(1,3);
220  std::size_t eval_count = 0;
221  bool failed = false;
222 
223  pole.init(init_angle);
224  mp_net->resetInternalState();
225  mp_net->setParameterVector(input);
226 
227  while(!failed && eval_count < maxEvals) {
228  pole.getState(state);
229  row(inState,0) = state;
230  mp_net->eval(inState,output);
231  pole.move(convertToPoleMovement(output(0,0)));
232  failed = pole.failure();
233  eval_count++;
234  }
235 
236  // gets lower as number of evaluations grows. min = 0
237  return maxEvals - eval_count;
238  }
239 
240  //! \brief Evaluates weight vector on normal balancing function using 256 different starting positions
241  //! \param input Vector to be evaluated.
242  //! \param maxEvals Balance goal of the function, i.e. number of steps that pole should be able to balance without failure
243  //! \return Number of trials (out of 256) that did not fail
245  std::size_t maxEvals = 1000){
246  SIZE_CHECK(input.size() == m_dimensions);
247  double init_angle = 0.07;
248  DoublePole pole(false, m_normalize);
249  RealVector state(3);
250  RealMatrix output(1,1);
251  bool failed = false;
252  const double statevals[5] = {0.05, 0.25, 0.5, 0.75, 0.95};
253  std::size_t successes = 0;
254 
255  for (std::size_t s0c = 0; s0c < 5; ++s0c){
256  for (std::size_t s1c = 0; s1c < 5; ++s1c){
257  for (std::size_t s2c = 0; s2c < 5; ++s2c){
258  for (std::size_t s3c = 0; s3c < 5; ++s3c) {
259  std::size_t eval_count = 0; // reset counter
260  pole.init(statevals[s0c] * 4.32 - 2.16,
261  statevals[s1c] * 2.70 - 1.35,
262  statevals[s2c] * 0.12566304 - 0.06283152,
263  statevals[s3c] * 0.30019504 - 0.15009752);
264  mp_net->resetInternalState(); // reset t-1 activations
265  mp_net->setParameterVector(input);
266 
267  while(!failed && eval_count < maxEvals) {
268  pole.getState(state);
269  RealMatrix newState(1,3);
270  row(newState,0) = state;
271  mp_net->eval(newState,output);
272  pole.move(convertToPoleMovement(output(0,0)));
273  failed = pole.failure();
274  eval_count++;
275  }
276 
277  if(eval_count == maxEvals){
278  successes++;
279  }
280  }
281  }
282  }
283  }
284  return successes;
285  }
286 
287 private:
288 
289  // private class for recurrent neural network. not be used outside main class.
290  class PoleRNNet : public OnlineRNNet {
291  public:
292  PoleRNNet(RecurrentStructure* structure) : OnlineRNNet(structure){}
293  boost::shared_ptr<State> createState()const{
294  throw std::logic_error("State not available for PoleRNNet.");
295  }
296  void eval(BatchInputType const & patterns, BatchOutputType &outputs,
297  State& state) const{
298  throw std::logic_error("Batch not available for PoleRNNet.");
299  }
300  };
301 
302  //! \brief Converts neural network output for use with pole simulator
303  //! \param output Output of the neural network.
304  //! \return double precision floating point between 0 and 1.
305  double convertToPoleMovement(double output) const{
306  switch(mp_struct->sigmoidType())
307  {
309  return output;
311  return (output + 1.) / 2.;
313  return (output + 1.) / 2.;
314  default:
315  std::cerr << "Unsupported activation function for pole balancing." << std::endl;
316  exit(EXIT_FAILURE);
317  }
318 
319 
320  }
321 
322  //! True if neural network input is normalized, false otherwise
323  bool m_normalize;
324  //! Degrees of freedom
325  std::size_t m_dimensions;
326  //! Balance goal
327  std::size_t m_maxPoleEvals;
328 
329  //! Neural network
330  RecurrentStructure *mp_struct;
331  OnlineRNNet *mp_net;
332 
333 };
334 
335 }
336 #endif