SinglePole.h
Go to the documentation of this file.
1 /*!
2  *
3  * \brief Pole balancing simulation for double pole
4  *
5  * Class for simulating a single pole balancing on a cart.
6  * Based on code written by Verena Heidrich-Meisner for the paper
7  *
8  * V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for
9  * episodic reinforcement learning. Journal of Algorithms,
10  * 64(4):152–168, 2009.
11  *
12  * which was in turn based on code available at http://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c
13  * as of 2015/4/19, written by Rich Sutton and Chuck Anderson and later modified.
14  * Faustino Gomez wrote the physics code using the differential equations from
15  * Alexis Weiland's paper and added the Runge-Kutta solver.
16  *
17  * \author Johan Valentin Damgaard
18  * \date -
19  *
20  *
21  * \par Copyright 1995-2017 Shark Development Team
22  *
23  * This file is part of Shark.
24  * <http://shark-ml.org/>
25  *
26  * Shark is free software: you can redistribute it and/or modify
27  * it under the terms of the GNU Lesser General Public License as published
28  * by the Free Software Foundation, either version 3 of the License, or
29  * (at your option) any later version.
30  *
31  * Shark is distributed in the hope that it will be useful,
32  * but WITHOUT ANY WARRANTY; without even the implied warranty of
33  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34  * GNU Lesser General Public License for more details.
35  *
36  * You should have received a copy of the GNU Lesser General Public License
37  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
38  *
39  */
40 
41 #ifndef SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_SUTTON_ANDERSON
42 #define SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_SUTTON_ANDERSON
43 
44 #include <math.h>
45 #include <shark/LinAlg/Base.h>
46 
47 
48 namespace shark {
49 
50 class SinglePole {
51 public:
52 
53  //! Convert degrees to radians
54  static double degrad(double x) {
55  return x * M_PI / 180;
56  };
57 
58  //! Number of variables
59  unsigned noVars() const {
60  if(m_isMarkov)
61  return 4;
62  return 2;
63  }
64 
65  //! \param markovian Whether to return velocities in getState
66  //! \param normalize Whether to normalize return values in getState
67  SinglePole(bool markovian, bool normalize = true) {
68  this->m_isMarkov = markovian;
69 
70  // current normalization constants are the same as used for
71  // the double pole simulation in Heidrich-Meisner code
72  // perhaps tweak them for use with single pole?
73  if(normalize) {
74  this->m_normal_cart = 4.8;
75  this->m_normal_pole = 0.52;
76  this->m_normal_velo = 2;
77  }
78  else {
79  this->m_normal_cart = 1.;
80  this->m_normal_pole = 1.;
81  this->m_normal_velo = 1.;
82  }
83  }
84 
85  //! \brief Initialize with specific pole angle
86  //! \param state2init initial pole angle (in degrees)
87  void initDegree(double state2init) {
88  init(state2init * M_PI / 180);
89  }
90 
91 
92  //! \brief Initialize with specific pole angle
93  //! \param state2init initial pole angle (in radians)
94  void init(double state2init = 0.07) {
95  m_state[0] = m_state[1] = m_state[3] = 0.0;
96  m_state[2] = state2init;
97  }
98 
99  //! \brief Initialize full m_state
100  //! \param a initial cart position
101  //! \param b initial cart velocity
102  //! \param c initial pole angle (in radians)
103  //! \param d initial pole angular velocity
104  void init(double a, double b, double c, double d) {
105  m_state[0] = a;
106  m_state[1] = b;
107  m_state[2] = c;
108  m_state[3] = d;
109  }
110 
111  //! \brief Place m_state in a vector
112  //! \param v vector to place m_state in (assumed to be correct size already)
113  void getState(RealVector &v) {
114  // normalize the m_state variables
115  if(m_isMarkov) {
116  v(0) = m_state[0] / m_normal_cart;
117  v(1) = m_state[1] / m_normal_velo;
118  v(2) = m_state[2] / m_normal_pole;
119  v(3) = m_state[3] / m_normal_velo;
120  }
121  else {
122  v(0) = m_state[0] / m_normal_cart;
123  v(1) = m_state[2] / m_normal_pole;
124  }
125  }
126 
127  //! Returns true when this pole is in an illegal position
128  bool failure() {
129  const double twelve_degrees = degrad(12);
130  if (m_state[0] < -2.4 || m_state[0] > 2.4 || m_state[2] < -twelve_degrees ||
131  m_state[2] > twelve_degrees) return true;
132  return false;
133  }
134 
135  //! \brief Move the pole with some force
136  //! \param output Force to apply. Expects values in [0,1], where values below 0.5 indicate applying force towards the left side and values above indicate force towards the right.
137  void move(double output) {
138  double dydx[4];
139 
140  dydx[1] = 0.;
141  dydx[3] = 0.;
142 
143  for(int i = 0; i < 2; i++) {
144  dydx[0] = m_state[1];
145  dydx[2] = m_state[3];
146  step(output,m_state,dydx);
147  rk4(output,m_state,dydx,m_state);
148  }
149  }
150 
151 private:
152  void step(double output, double *st, double *derivs) {
153 
154  const double MUP = 0.0;
155  const double GRAVITY = -9.8;
156  const double MASSCART = 1.0;
157  const double MASSPOLE = 0.1;
158  const double LENGTH = 0.5;
159  const double FORCE_MAG = 10.0;
160 
161  double force = (output -0.5) * FORCE_MAG * 2;
162  double costheta = cos(st[2]);
163  double sintheta = sin(st[2]);
164  double gsintheta = GRAVITY * sintheta;
165 
166  double ml = LENGTH * MASSPOLE;
167  double temp = MUP * st[3] / ml;
168  double fi = (ml * st[3] * st[3] * sintheta) + (0.75 * MASSPOLE * costheta * (temp + gsintheta));
169  double mi = MASSPOLE * (1 - (0.75 * costheta * costheta));
170 
171  derivs[1] = (force + fi) / (mi + MASSCART);
172 
173  derivs[3] = -0.75 * (derivs[1] * costheta + gsintheta + temp) / LENGTH;
174  }
175 
176  void rk4(double f,double y[], double dydx[], double yout[]) {
177  const double TAU = 0.01;
178 
179  double dym[4],dyt[4],yt[4];
180 
181  double hh = TAU*0.5;
182  double h6 = TAU/6.0;
183  for (int i = 0; i <= 3; i++) {
184  yt[i] = y[i]+hh*dydx[i];
185  }
186  step(f,yt,dyt);
187  dyt[0] = yt[1];
188  dyt[2] = yt[3];
189  for (int i = 0; i <= 3; i++) {
190  yt[i] = y[i]+hh*dyt[i];
191  }
192  step(f,yt,dym);
193  dym[0] = yt[1];
194  dym[2] = yt[3];
195  for (int i = 0; i <= 3; i++) {
196  yt[i] = y[i] + TAU * dym[i];
197  dym[i] += dyt[i];
198  }
199  step(f,yt,dyt);
200  dyt[0] = yt[1];
201  dyt[2] = yt[3];
202  for (int i = 0; i <= 3; i++) {
203  yout[i] = y[i]+h6*(dydx[i]+dyt[i]+2.0*dym[i]);
204  }
205  }
206 
207  double m_state[4];
208  bool m_isMarkov;
209  double m_normal_cart;
210  double m_normal_pole;
211  double m_normal_velo;
212 
213 };
214 
215 }
216 #endif