DoublePole.h
Go to the documentation of this file.
1 /*!
2  *
3  * \brief Pole balancing simulation for double poles
4  *
5  * Class for simulating two poles balancing on a cart.
6  * Based on code written by Verena Heidrich-Meisner for the paper
7  *
8  * V. Heidrich-Meisner and C. Igel. Neuroevolution strategies for episodic reinforcement learning. Journal of Algorithms, 64(4):152–168, 2009.
9  *
10  * which was in turn based on code available at http://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c
11  * as of 2015/4/19, written by Rich Sutton and Chuck Anderson and later modified.
12  * Faustino Gomez wrote the physics code using the differential equations from
13  * Alexis Weiland's paper and added the Runge-Kutta solver.
14  *
15  * \author Johan Valentin Damgaard
16  * \date -
17  *
18  *
19  * \par Copyright 1995-2017 Shark Development Team
20  *
21  * This file is part of Shark.
22  * <http://shark-ml.org/>
23  *
24  * Shark is free software: you can redistribute it and/or modify
25  * it under the terms of the GNU Lesser General Public License as published
26  * by the Free Software Foundation, either version 3 of the License, or
27  * (at your option) any later version.
28  *
29  * Shark is distributed in the hope that it will be useful,
30  * but WITHOUT ANY WARRANTY; without even the implied warranty of
31  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32  * GNU Lesser General Public License for more details.
33  *
34  * You should have received a copy of the GNU Lesser General Public License
35  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
36  *
37  */
38 #ifndef SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_TEXAS
39 #define SHARK_OBJECTIVEFUNCTIONS_BENCHMARKS_POLE_TEXAS
40 
41 #include <math.h>
42 #include <shark/LinAlg/Base.h>
43 
44 
45 
46 namespace shark {
47 
48 class DoublePole {
49 public:
50 
51  //! Number of variables
52  unsigned noVars() const {
53  if(m_isMarkovian)
54  return 6;
55  return 3;
56  }
57 
58  //! \param markovian Whether to return velocities in getState
59  //! \param normalize Whether to normalize return values in getState
60  DoublePole(bool markovian, bool normalize = true) {
61  m_isMarkovian = markovian;
62 
63  // current normalization constants are the same as used
64  // in the Heidrich-Meisner code
65  // perhaps tweak them?
66  if(normalize) {
67  this->m_normal_cart = 4.8;
68  this->m_normal_pole = 0.52;
69  this->m_normal_velo = 2;
70  }
71  else {
72  this->m_normal_cart = 1.;
73  this->m_normal_pole = 1.;
74  this->m_normal_velo = 1.;
75  }
76 
77  }
78 
79 
80  //! \brief Place m_state in a vector
81  //! \param v vector to place m_state in (assumed to be correct size already)
82  void getState(RealVector &v) {
83  // normalize the m_state variables.
84  if(m_isMarkovian) {
85  v(0) = m_state[0] / m_normal_cart;
86  v(1) = m_state[1] / m_normal_velo;
87  v(2) = m_state[2] / m_normal_pole;
88  v(3) = m_state[3] / m_normal_velo;
89  v(4) = m_state[4] / m_normal_pole;
90  v(5) = m_state[5] / m_normal_velo;
91  }
92  else {
93  v(0) = m_state[0] / m_normal_cart;
94  v(1) = m_state[2] / m_normal_pole;
95  v(2) = m_state[4] / m_normal_pole;
96  }
97  }
98 
99  //! \brief Initialize with specific angle for large pole
100  //! \param state2init initial pole angle (in radians)
101  void init(double state2init = 0.07) {
102  m_state[0] = m_state[1] = m_state[3] = m_state[4] = m_state[5] = 0;
103  m_state[2] = state2init;
104  }
105 
106  //! \brief Initialize with specific angle for large pole
107  //! \param state2init initial pole angle (in degrees)
108  void initDegree(double state2init) {
109  init(state2init * M_PI / 180);
110  }
111 
112  //! \brief Initialize full m_state
113  //! \param a initial cart position
114  //! \param b initial cart velocity
115  //! \param c initial large pole angle (in radians)
116  //! \param d initial large pole angular velocity
117  //! \param e initial small pole angle (in radians)
118  //! \param f initial small pole angular velocity
119  void init(double a, double b, double c, double d, double e = 0., double f = 0.) {
120  m_state[0] = a;
121  m_state[1] = b;
122  m_state[2] = c;
123  m_state[3] = d;
124  m_state[4] = e;
125  m_state[5] = f;
126  }
127 
128  //! Returns true when this pole is in an illegal position
129  bool failure()
130  {
131  const double thirty_six_degrees= 0.628329;
132  const double failureAngle = thirty_six_degrees;
133 
134  return(m_state[0] < -2.4 ||
135  m_state[0] > 2.4 ||
136  m_state[2] < -failureAngle ||
137  m_state[2] > failureAngle ||
138  m_state[4] < -failureAngle ||
139  m_state[4] > failureAngle);
140  }
141 
142  //! Return "jiggle", abstract representation of how much the the cart oscillates
143  double getJiggle() {
144  return (std::abs(m_state[0]) + std::abs(m_state[1]) + std::abs(m_state[2]) + std::abs(m_state[3]));
145  }
146 
147  //! \brief Move the pole with some force
148  //! \param output Force to apply. Expects values in [0,1], where values below 0.5 indicate applying force towards the left side and values above indicate force towards the right.
149  void move(double output)
150  {
151  double dydx[6];
152  const double TAU= 0.01;
153 
154  const bool useRK4=true;
155  if(useRK4) {
156  for(int i=0; i<2; ++i) {
157  dydx[0] = m_state[1];
158  dydx[2] = m_state[3];
159  dydx[4] = m_state[5];
160  step(output,m_state,dydx);
161  rk4(output,m_state,dydx,m_state);
162  }
163  }
164  else {
165  const double EULER_STEPS = 4.;
166  const double EULER_TAU= TAU/EULER_STEPS;
167  for(int i=0; i<2 * EULER_STEPS; ++i) {
168  step(output,m_state,dydx);
169  m_state[0] += EULER_TAU * m_state[1];
170  m_state[1] += EULER_TAU * dydx[1];
171  m_state[2] += EULER_TAU * m_state[3];
172  m_state[3] += EULER_TAU * dydx[3];
173  m_state[4] += EULER_TAU * m_state[5];
174  m_state[5] += EULER_TAU * dydx[5];
175  }
176  }
177  }
178 
179 private:
180  void step(double action, double *st, double *derivs)
181  {
182  const double MUP = 0.000002;
183  const double MUC = 0.0005;
184  const double GRAVITY= -9.8;
185  const double MASSCART= 1.0;
186  const double LENGTH_1 = 0.5;
187  const double MASSPOLE_1 = 0.1;
188  const double LENGTH_2 = 0.1 * LENGTH_1;
189  const double MASSPOLE_2 = 0.1 * MASSPOLE_1;
190  const double FORCE_MAG= 10.0;
191 
192  double signum = 0.;
193 
194  if (st[1] < 0) {
195  signum = -1;
196  }
197  if (st[1] > 0) {
198  signum = 1;
199  }
200 
201  double force = ((action - 0.5) * FORCE_MAG * 2) - (MUC*signum);
202  double costheta_1 = cos(st[2]);
203  double sintheta_1 = sin(st[2]);
204  double gsintheta_1 = GRAVITY * sintheta_1;
205  double costheta_2 = cos(st[4]);
206  double sintheta_2 = sin(st[4]);
207  double gsintheta_2 = GRAVITY * sintheta_2;
208 
209  double ml_1 = LENGTH_1 * MASSPOLE_1;
210  double ml_2 = LENGTH_2 * MASSPOLE_2;
211  double temp_1 = MUP * st[3] / ml_1;
212  double temp_2 = MUP * st[5] / ml_2;
213  double fi_1 = (ml_1 * st[3] * st[3] * sintheta_1) +
214  (0.75 * MASSPOLE_1 * costheta_1 * (temp_1 + gsintheta_1));
215  double fi_2 = (ml_2 * st[5] * st[5] * sintheta_2) +
216  (0.75 * MASSPOLE_2 * costheta_2 * (temp_2 + gsintheta_2));
217  double mi_1 = MASSPOLE_1 * (1 - (0.75 * costheta_1 * costheta_1));
218  double mi_2 = MASSPOLE_2 * (1 - (0.75 * costheta_2 * costheta_2));
219 
220  derivs[1] = (force + fi_1 + fi_2)
221  / (mi_1 + mi_2 + MASSCART);
222 
223  derivs[3] = -0.75 * (derivs[1] * costheta_1 + gsintheta_1 + temp_1)
224  / LENGTH_1;
225  derivs[5] = -0.75 * (derivs[1] * costheta_2 + gsintheta_2 + temp_2)
226  / LENGTH_2;
227 
228  }
229 
230  void rk4(double f, double y[], double dydx[], double yout[])
231  {
232  const double TAU= 0.01;
233  double dym[6],dyt[6],yt[6];
234 
235  double hh=TAU*0.5;
236  double h6=TAU/6.0;
237  for (int i=0; i<=5; i++) yt[i]=y[i]+hh*dydx[i];
238  step(f,yt,dyt);
239  dyt[0] = yt[1];
240  dyt[2] = yt[3];
241  dyt[4] = yt[5];
242  for (int i=0; i<=5; i++) yt[i]=y[i]+hh*dyt[i];
243  step(f,yt,dym);
244  dym[0] = yt[1];
245  dym[2] = yt[3];
246  dym[4] = yt[5];
247  for (int i=0; i<=5; i++) {
248  yt[i]=y[i]+TAU*dym[i];
249  dym[i] += dyt[i];
250  }
251  step(f,yt,dyt);
252  dyt[0] = yt[1];
253  dyt[2] = yt[3];
254  dyt[4] = yt[5];
255  for (int i=0; i<=5; i++)
256  yout[i]=y[i]+h6*(dydx[i]+dyt[i]+2.0*dym[i]);
257  }
258 
259  bool m_isMarkovian;
260 
261  double m_state[6];
262 
263  double m_normal_cart;
264  double m_normal_pole;
265  double m_normal_velo;
266 
267 };
268 }
269 #endif