NonMarkovPole.h
Go to the documentation of this file.
1 /*!
2  *
3  * \brief Objective function for single and double poles with partial state information (non-Markovian task)
4  *
5  *
6  * Class for balancing one or two poles on a cart using a fitness
7  * function that decreases the longer the pole(s) balance(s). Based
8  * on code written by Verena Heidrich-Meisner for the paper
9  *
10  * V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for
11  * episodic reinforcement learning. Journal of Algorithms,
12  * 64(4):152–168, 2009.
13  *
14  * \author Johan Valentin Damgaard
15  * \date -
16  *
17  *
18  * \par Copyright 1995-2017 Shark Development Team
19  *
20  * This file is part of Shark.
21  * <http://shark-ml.org/>
22  *
23  * Shark is free software: you can redistribute it and/or modify
24  * it under the terms of the GNU Lesser General Public License as published
25  * by the Free Software Foundation, either version 3 of the License, or
26  * (at your option) any later version.
27  *
28  * Shark is distributed in the hope that it will be useful,
29  * but WITHOUT ANY WARRANTY; without even the implied warranty of
30  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31  * GNU Lesser General Public License for more details.
32  *
33  * You should have received a copy of the GNU Lesser General Public License
34  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
35  *
36  */
37 #ifndef SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_NONMARKOV_OBJECTIVE_FUNCTION
38 #define SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_NONMARKOV_OBJECTIVE_FUNCTION
39 
40 #include <iostream>
41 #include <exception>
42 
44 #include <shark/Models/OnlineRNNet.h>
45 #include <shark/LinAlg/Base.h>
46 
49 
50 namespace shark {
51 
52 //! \brief Objective function for single and double non-Markov poles
53 //!
54 //! Class for balancing one or two poles on a cart using a fitness function
55 //! that decreases the longer the pole(s) balance(s).
56 //! Based on code written by Verena Heidrich-Meisner for the paper
57 //!
58 //! V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for episodic reinforcement learn-ing. Journal of Algorithms, 64(4):152–168, 2009.
60 
61 public:
62  //! \param single Is this an instance of the single pole problem?
63  //! \param hidden Number of hidden neurons in underlying neural network
64  //! \param bias Whether to use bias in neural network
65  //! \param sigmoidType Activation sigmoid function for neural network
66  //! \param normalize Whether to normalize input before use in neural network
67  //! \param max_pole_evaluations Balance goal of the function, i.e. number of steps that pole should be able to balance without failure
68  NonMarkovPole(bool single, std::size_t hidden, bool bias,
69  RecurrentStructure::SigmoidType sigmoidType = RecurrentStructure::FastSigmoid,
70  bool normalize = true,
71  std::size_t max_pole_evaluations = 100000)
72  : m_single(single),
73  m_maxPoleEvals(max_pole_evaluations),
74  m_normalize(normalize) {
75  if (sigmoidType == RecurrentStructure::Linear) {
76  std::cerr << "Cannot use linear activation function for pole balancing."
77  << std::endl;
78  exit(EXIT_FAILURE);
79  }
80 
81  // number of inputs should be 2 for single pole, 3 for double.
82  std::size_t inputs = 0;
83  if (single) {
84  inputs = 2;
85  }
86  else {
87  inputs = 3;
88  }
89  // set features
91 
92  // set number of variables/weights.
93  // number of outputs is always 1.
94  // dimensions depend on whether we use bias
95  if (bias){
96  m_dimensions = (hidden + 1) * (hidden + 1) +
97  inputs * (hidden + 1) + hidden + 1;
98  }
99  else {
100  m_dimensions = (hidden + 1) * (hidden + 1) + inputs * (hidden + 1);
101  }
102 
103  // make RNNet
104  mp_struct = new RecurrentStructure();
105  mp_struct->setStructure(inputs, hidden, 1, bias, sigmoidType);
106  mp_net = new PoleRNNet(mp_struct);
107 
108  // check dimensions match
109  if(m_dimensions != mp_net->numberOfParameters()) {
110  std::cerr << "Non-Markov pole RNNet: Dimensions do not match, "
111  << m_dimensions << " != " << mp_net->numberOfParameters() << std::endl;
112  exit(EXIT_FAILURE);
113  }
114 
115  // set eval count
117 
118  }
119 
121  delete mp_struct;
122  delete mp_net;
123  }
124 
125  std::string name() {
126  return "Objective Function for Non-Markovian pole balancing.";
127  }
128 
129  //! \brief Returns degrees of freedom
130  std::size_t numberOfVariables()const{
131  return m_dimensions;
132  }
133 
134  //! \brief Always proposes to start in a zero vector with appropriate degrees of freedom
136  SearchPointType startingPoint(m_dimensions);
137  for(std::size_t i = 0; i != m_dimensions; i++) {
138  startingPoint(i) = 0.0;
139  }
140  return startingPoint;
141  }
142 
143  //! \brief Evaluates weight vector on fitness function
144  //! \param input Vector to be evaluated.
145  //! \return Fitness of vector
146  ResultType eval(const SearchPointType &input) const{
147  SIZE_CHECK(input.size() == m_dimensions);
148 
150 
151  if(m_single) {
152  return evalSingle(input);
153  }
154  else {
155  return evalDouble(input);
156  }
157  }
158 
159 private:
160 
161  // private class for recurrent neural network. not be used outside main class.
162  class PoleRNNet : public OnlineRNNet {
163  public:
164  PoleRNNet(RecurrentStructure* structure) : OnlineRNNet(structure){}
165  boost::shared_ptr<State> createState()const{
166  throw std::logic_error("State not available for PoleRNNet.");
167  }
168  void eval(BatchInputType const & patterns, BatchOutputType &outputs,
169  State& state) const{
170  throw std::logic_error("Batch not available for PoleRNNet.");
171  }
172  };
173 
174  //! \brief Converts neural network output for use with pole simulator
175  //! \param output Output of the neural network.
176  //! \return double precision floating point between 0 and 1.
177  double convertToPoleMovement(double output) const{
178  switch(mp_struct->sigmoidType())
179  {
180  case RecurrentStructure::Logistic:
181  return output;
182  case RecurrentStructure::FastSigmoid:
183  return (output + 1.) / 2.;
184  case RecurrentStructure::Tanh:
185  return (output + 1.) / 2.;
186  default:
187  std::cerr << "Unsupported activation function for pole balancing." << std::endl;
188  exit(EXIT_FAILURE);
189  }
190 
191  }
192 
193  //! \brief Fitness function for single poles. Gets lower as pole balances for longer.
194  //! \param input Vector to be evaluated.
195  //! \return Fitness of vector
196  ResultType evalSingle(const SearchPointType &input) const{
197  double init_angle = 0.07;
198  SinglePole pole(false, m_normalize);
199  RealVector state(2);
200  RealMatrix output(1,1);
201  RealMatrix inState(1,2);
202  std::size_t eval_count = 0;
203  bool failed = false;
204 
205  pole.init(init_angle);
206  mp_net->resetInternalState();
207  mp_net->setParameterVector(input);
208 
209  while(!failed && eval_count < m_maxPoleEvals) {
210  pole.getState(state);
211  row(inState,0) = state;
212  mp_net->eval(inState,output);
213  pole.move(convertToPoleMovement(output(0,0)));
214  failed = pole.failure();
215  eval_count++;
216  }
217 
218  // gets lower as number of evaluations grows. min = 0
219  return m_maxPoleEvals - eval_count;
220  }
221 
222  //! \brief Fitness function for double poles. Gets lower as poles balance for longer.
223  //! \param input Vector to be evaluated.
224  //! \return Fitness of vector
225  ResultType evalDouble(const SearchPointType &input) const{
226  double init_angle = 0.07;
227  DoublePole pole(false, m_normalize);
228  RealVector state(3);
229  RealMatrix output(1,1);
230  RealMatrix inState(1,3);
231  std::size_t eval_count = 0;
232  bool failed = false;
233 
234  pole.init(init_angle);
235  mp_net->resetInternalState();
236  mp_net->setParameterVector(input);
237 
238  while(!failed && eval_count < m_maxPoleEvals) {
239  pole.getState(state);
240  row(inState,0) = state;
241  mp_net->eval(inState,output);
242  pole.move(convertToPoleMovement(output(0,0)));
243  failed = pole.failure();
244  eval_count++;
245  }
246  // gets lower as number of evaluations grows. min = 0
247  return m_maxPoleEvals - eval_count;
248  }
249 
250  //! True if this is a single pole, false if double pole.
251  bool m_single;
252  //! True if neural network input is normalized, false otherwise
253  bool m_normalize;
254  //! Degrees of freedom
255  std::size_t m_dimensions;
256  //! Balance goal
257  std::size_t m_maxPoleEvals;
258 
259  //! Neural network
260  RecurrentStructure *mp_struct;
261  OnlineRNNet *mp_net;
262 
263 };
264 
265 }
266 #endif