MarkovPole.h
Go to the documentation of this file.
1 /*!
2  *
3  * \brief Objective function for single and double poles with full state information (Markovian task)
4  *
5  *
6  * Class for balancing one or two poles on a cart using a fitness
7  * function that decreases the longer the pole(s) balance(s). Based
8  * on code written by Verena Heidrich-Meisner for the paper
9  *
10  * V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for
11  * episodic reinforcement learning. Journal of Algorithms,
12  * 64(4):152–168, 2009.
13  *
14  * \author Johan Valentin Damgaard
15  * \date -
16  *
17  *
18  * \par Copyright 1995-2017 Shark Development Team
19  *
20  * This file is part of Shark.
21  * <http://shark-ml.org/>
22  *
23  * Shark is free software: you can redistribute it and/or modify
24  * it under the terms of the GNU Lesser General Public License as published
25  * by the Free Software Foundation, either version 3 of the License, or
26  * (at your option) any later version.
27  *
28  * Shark is distributed in the hope that it will be useful,
29  * but WITHOUT ANY WARRANTY; without even the implied warranty of
30  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31  * GNU Lesser General Public License for more details.
32  *
33  * You should have received a copy of the GNU Lesser General Public License
34  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
35  *
36  */
37 #ifndef SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_MARKOV_OBJECTIVE_FUNCTION
38 #define SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_MARKOV_OBJECTIVE_FUNCTION
39 
40 #include <iostream>
41 #include <typeinfo>
42 
44 #include <shark/LinAlg/Base.h>
45 #include <shark/Models/FFNet.h>
46 
49 
50 namespace shark {
51 
52 //! Uses templates to allow changing the neural network activation function
53 //! since FFNet uses templates.
54 //! The FastSigmoidNeuron is recommended, as it gives better results overall.
55 //! If errors are encountered using a specific neuron, one can try without normalization, as it fixes it in the single pole LogisticNeuron case at least.
56 template<class HiddenNeuron,class OutputNeuron>
57 
58 //!
59 //! Class for balancing one or two poles on a cart using a fitness function
60 //! that decreases the longer the pole(s) balance(s).
61 //! Based on code written by Verena Heidrich-Meisner for the paper
62 //!
63 //! V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for episodic reinforcement learn-ing. Journal of Algorithms, 64(4):152–168, 2009.
65 public:
66  //! \param single_pole Indicates whether the cast has a single pole (true) or two poles (false)
67  //! \param hidden Number of hidden neurons in underlying neural network
68  //! \param shortcuts Whether to use shortcuts in neural network
69  //! \param bias Whether to use bias in neural network
70  //! \param normalize Whether to normalize input before use in neural network
71  //! \param max_pole_evaluations Balance goal of the function, i.e. number of steps that pole should be able to balance without failure
72  MarkovPole(bool single_pole, std::size_t hidden, bool shortcuts, bool bias,
73  bool normalize = true, std::size_t max_pole_evaluations = 100000)
74  : m_single(single_pole),
75  m_maxPoleEvals(max_pole_evaluations),
76  m_normalize(normalize) {
77  // number of inputs should be 4 for single pole, 6 for double.
78  std::size_t inputs = 0;
79  if (single_pole) {
80  inputs = 4;
81  }
82  else {
83  inputs = 6;
84  }
85  // set features
87 
88  // set number of variables/weights.
89  // number of outputs is always 1.
90  // dimensions depend on whether we use bias and/or shortcuts
91  if (bias && shortcuts){
92  m_dimensions = hidden * (inputs + 1) + inputs + hidden + 1;
93  }
94  else if (shortcuts) {
95  m_dimensions = hidden * (inputs + 1) + inputs;
96  }
97  else if (bias) {
98  m_dimensions = hidden * (inputs + 1) + hidden + 1;
99  }
100  else {
101  m_dimensions = hidden * (inputs + 1);
102  }
103 
104  // make FFNet
105  mp_net = new FFNet<HiddenNeuron, OutputNeuron>();
106  FFNetStructures::ConnectionType type = shortcuts ?
107  FFNetStructures::InputOutputShortcut : FFNetStructures::Normal;
108  mp_net->setStructure(inputs, hidden, 1, type, bias);
109 
110  // check dimensions match
111  if(m_dimensions != mp_net->numberOfParameters()) {
112  std::cerr << "Markov pole FFNet: Dimensions do not match, " << m_dimensions
113  << " != " << mp_net->numberOfParameters() << std::endl;
114  exit(EXIT_FAILURE);
115  }
116 
117  // set eval count
118  m_evaluationCounter = 0;
119  }
120 
122  delete mp_net;
123  }
124 
125 
126  std::string name() {
127  return "Objective Function for Markovian pole balancing.";
128  }
129 
130  //! \brief Returns degrees of freedom
131  std::size_t numberOfVariables()const{
132  return m_dimensions;
133  }
134 
135  //! \brief Always proposes to start in a zero vector with appropriate degrees of freedom
137  SearchPointType startingPoint(m_dimensions);
138  for(std::size_t i = 0; i != m_dimensions; i++) {
139  startingPoint(i) = 0.0;
140  }
141  return startingPoint;
142  }
143 
144  //! \brief Evaluates weight vector on fitness function
145  //! \param input Vector to be evaluated.
146  //! \return Fitness of vector
147  ResultType eval(const SearchPointType &input) const{
148  SIZE_CHECK(input.size() == m_dimensions);
149 
151 
152  if(m_single) {
153  return evalSingle(input);
154  }
155  else {
156  return evalDouble(input);
157  }
158  }
159 
160 private:
161 
162  //! \brief Converts neural network output for use with pole simulator
163  //! \param output Output of the neural network.
164  //! \return double precision floating point between 0 and 1.
165  double convertToPoleMovement(double output) const{
166  if (typeid(mp_net->outputActivationFunction())
167  == typeid(LogisticNeuron)) {
168  return output;
169  }
170  else if (typeid(mp_net->outputActivationFunction())
171  == typeid(FastSigmoidNeuron)) {
172  return (output + 1.) / 2.;
173  }
174  else if (typeid(mp_net->outputActivationFunction()) == typeid(TanhNeuron)) {
175  return (output + 1.) / 2.;
176  }
177  else {
178  std::cerr << "Unsupported neuron type in Markov pole FFNet." << std::endl;
179  exit(EXIT_FAILURE);
180  }
181  }
182 
183  //! \brief Fitness function for single poles. Gets lower as pole balances for longer.
184  //! \param input Vector to be evaluated.
185  //! \return Fitness of vector
186  ResultType evalSingle(const SearchPointType &input) const{
187  double init_angle = 0.07;
188  SinglePole pole(true, m_normalize);
189  RealVector state(4);
190  RealVector output(1);
191  std::size_t eval_count = 0;
192  bool failed = false;
193 
194  pole.init(init_angle);
195 
196  mp_net->setParameterVector(input);
197 
198  while(!failed && eval_count < m_maxPoleEvals) {
199  pole.getState(state);
200  mp_net->eval(state,output);
201  pole.move(convertToPoleMovement(output(0)));
202  failed = pole.failure();
203  eval_count++;
204  }
205 
206  // gets lower as number of evaluations grows. min = 0
207  return m_maxPoleEvals - eval_count;
208  }
209 
210  //! \brief Fitness function for double poles. Gets lower as poles balance for longer.
211  //! \param input Vector to be evaluated.
212  //! \return Fitness of vector
213  ResultType evalDouble(const SearchPointType &input) const{
214  double init_angle = 0.07;
215  DoublePole pole(true, m_normalize);
216  RealVector state(6);
217  RealVector output(1);
218  std::size_t eval_count = 0;
219  bool failed = false;
220 
221  pole.init(init_angle);
222  mp_net->setParameterVector(input);
223 
224  while(!failed && eval_count < m_maxPoleEvals) {
225  pole.getState(state);
226  mp_net->eval(state,output);
227  pole.move(convertToPoleMovement(output(0)));
228  failed = pole.failure();
229  eval_count++;
230  }
231 
232  // gets lower as number of evaluations grows. min = 0
233  return m_maxPoleEvals - eval_count;
234  }
235 
236  //! True if this is a single pole, false if double pole.
237  bool m_single;
238  //! True if neural network input is normalized, false otherwise
239  bool m_normalize;
240  //! Degrees of freedom
241  std::size_t m_dimensions;
242  //! Balance goal
243  std::size_t m_maxPoleEvals;
244 
245  //! Neural network
246  FFNet<HiddenNeuron, OutputNeuron> *mp_net;
247  HiddenNeuron m_hiddenNeuron;
248  OutputNeuron m_outputNeuron;
249 
250 };
251 
252 }
253 #endif