-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathcart_pole_env.py
331 lines (265 loc) · 11.8 KB
/
cart_pole_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
"""
Classic cart-pole example implemented with an FMU simulating a cart-pole system.
Implementation inspired by OpenAI Gym examples:
/~https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
"""
import logging
import math
import numpy as np
from gym import spaces
from modelicagym.environment import FMI2CSEnv, FMI1CSEnv, FMI2MEEnv
logger = logging.getLogger(__name__)
NINETY_DEGREES_IN_RAD = (90 / 180) * math.pi
TWELVE_DEGREES_IN_RAD = (12 / 180) * math.pi
class CartPoleEnv:
"""
Class extracting common logic for JModelica and Dymola environments for CartPole experiments.
Allows to avoid code duplication.
Implements all methods for connection to the OpenAI Gym as an environment.
"""
# modelicagym API implementation
def _is_done(self):
"""
Internal logic that is utilized by parent classes.
Checks if cart position or pole angle are inside required bounds, defined by thresholds:
x_threshold - 2.4
angle threshold - 12 degrees
:return: boolean flag if current state of the environment indicates that experiment has ended.
True, if cart is not further than 2.4 from the starting point
and angle of pole deflection from vertical is less than 12 degrees
"""
x, x_dot, theta, theta_dot = self.state
logger.debug("x: {0}, x_dot: {1}, theta: {2}, theta_dot: {3}".format(x, x_dot, theta, theta_dot))
theta = abs(theta - NINETY_DEGREES_IN_RAD)
if abs(x) > self.x_threshold:
done = True
elif theta > self.theta_threshold:
done = True
else:
done = False
return done
def _get_action_space(self):
"""
Internal logic that is utilized by parent classes.
Returns action space according to OpenAI Gym API requirements
:return: Discrete action space of size 2, as only 2 actions are available: push left or push right.
"""
return spaces.Discrete(2)
def _get_observation_space(self):
"""
Internal logic that is utilized by parent classes.
Returns observation space according to OpenAI Gym API requirements
:return: Box state space with specified lower and upper bounds for state variables.
"""
high = np.array([self.x_threshold, np.inf, self.theta_threshold, np.inf])
return spaces.Box(-high, high)
# OpenAI Gym API implementation
def step(self, action):
"""
OpenAI Gym API. Executes one step in the environment:
in the current state perform given action to move to the next action.
Applies force of the defined magnitude in one of two directions, depending on the action parameter sign.
:param action: alias of an action to be performed. If action > 0 - push to the right, else - push left.
:return: next (resulting) state
"""
action = self.force if action > 0 else -self.force
return super().step(action)
# This function was heavily inspired by OpenAI example:
# /~https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
def render(self, mode='human', close=False):
"""
OpenAI Gym API. Determines how current environment state should be rendered.
Draws cart-pole with the built-in gym tools.
:param mode: rendering mode. Read more in Gym docs.
:param close: flag if rendering procedure should be finished and resources cleaned.
Used, when environment is closed.
:return: rendering result
"""
if close:
if self.viewer is not None:
self.viewer.close()
self.viewer = None
return True
screen_width = 600
screen_height = 400
scene_width = self.x_threshold * 2
scale = screen_width / scene_width
cart_y = 100 # TOP OF CART
pole_width = 10.0
pole_len = scale * 1.0
cart_width = 50.0
cart_height = 30.0
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height, display=self.display)
# add cart to the rendering
l, r, t, b = -cart_width / 2, cart_width / 2, cart_height / 2, -cart_height / 2
cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
self.cart_transform = rendering.Transform()
cart.add_attr(self.cart_transform)
self.viewer.add_geom(cart)
# add pole to the rendering
pole_joint_depth = cart_height / 4
l, r, t, b = -pole_width / 2, pole_width / 2, pole_len - pole_width / 2, -pole_width / 2
pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
pole.set_color(.8, .6, .4)
self.pole_transform = rendering.Transform(translation=(0, pole_joint_depth))
pole.add_attr(self.pole_transform)
pole.add_attr(self.cart_transform)
self.viewer.add_geom(pole)
# add joint to the rendering
joint = rendering.make_circle(pole_width / 2)
joint.add_attr(self.pole_transform)
joint.add_attr(self.cart_transform)
joint.set_color(.5, .5, .8)
self.viewer.add_geom(joint)
# add bottom line to the rendering
track = rendering.Line((0, cart_y - cart_height / 2), (screen_width, cart_y - cart_height / 2))
track.set_color(0, 0, 0)
self.viewer.add_geom(track)
# set new position according to the environment current state
x, _, theta, _ = self.state
cart_x = x * scale + screen_width / 2.0 # MIDDLE OF CART
self.cart_transform.set_translation(cart_x, cart_y)
self.pole_transform.set_rotation(theta - NINETY_DEGREES_IN_RAD)
return self.viewer.render(return_rgb_array=mode == 'rgb_array')
def close(self):
"""
OpenAI Gym API. Closes environment and all related resources.
Closes rendering.
:return: True if everything worked out.
"""
return self.render(close=True)
class JModelicaCSCartPoleEnv(CartPoleEnv, FMI2CSEnv):
"""
Wrapper class for creation of cart-pole environment using JModelica-compiled FMU (FMI standard v.2.0).
Attributes:
m_cart (float): mass of a cart.
m_pole (float): mass of a pole.
theta_0 (float): angle of the pole, when experiment starts.
It is counted from the positive direction of X-axis. Specified in radians.
1/2*pi means pole standing straight on the cast.
theta_dot_0 (float): angle speed of the poles mass center. I.e. how fast pole angle is changing.
time_step (float): time difference between simulation steps.
positive_reward (int): positive reward for RL agent.
negative_reward (int): negative reward for RL agent.
"""
def __init__(self,
m_cart,
m_pole,
theta_0,
theta_dot_0,
time_step,
positive_reward,
negative_reward,
force,
log_level,
path="../resources/jmodelica/linux/ModelicaGym_CartPole_CS.fmu"):
logger.setLevel(log_level)
self.force = force
self.theta_threshold = TWELVE_DEGREES_IN_RAD
self.x_threshold = 2.4
self.viewer = None
self.display = None
self.pole_transform = None
self.cart_transform = None
config = {
'model_input_names': ['f'],
'model_output_names': ['x', 'x_dot', 'theta', 'theta_dot'],
'model_parameters': {'m_cart': m_cart, 'm_pole': m_pole,
'theta_0': theta_0, 'theta_dot_0': theta_dot_0},
'initial_state': (0, 0, 85 / 180 * math.pi, 0),
'time_step': time_step,
'positive_reward': positive_reward,
'negative_reward': negative_reward
}
super().__init__(path, config, log_level)
class JModelicaMECartPoleEnv(CartPoleEnv, FMI2MEEnv):
"""
Wrapper class for creation of cart-pole environment using JModelica-compiled FMU (FMI standard v.2.0).
Attributes:
m_cart (float): mass of a cart.
m_pole (float): mass of a pole.
theta_0 (float): angle of the pole, when experiment starts.
It is counted from the positive direction of X-axis. Specified in radians.
1/2*pi means pole standing straight on the cast.
theta_dot_0 (float): angle speed of the poles mass center. I.e. how fast pole angle is changing.
time_step (float): time difference between simulation steps.
positive_reward (int): positive reward for RL agent.
negative_reward (int): negative reward for RL agent.
"""
def __init__(self,
m_cart,
m_pole,
theta_0,
theta_dot_0,
time_step,
positive_reward,
negative_reward,
force,
log_level,
path="../resources/jmodelica/linux/ModelicaGym_CartPole_ME.fmu"):
logger.setLevel(log_level)
self.force = force
self.theta_threshold = TWELVE_DEGREES_IN_RAD
self.x_threshold = 2.4
self.viewer = None
self.display = None
self.pole_transform = None
self.cart_transform = None
config = {
'model_input_names': ['f'],
'model_output_names': ['x', 'x_dot', 'theta', 'theta_dot'],
'model_parameters': {'m_cart': m_cart, 'm_pole': m_pole,
'theta_0': theta_0, 'theta_dot_0': theta_dot_0},
'initial_state': (0, 0, 85 / 180 * math.pi, 0),
'time_step': time_step,
'positive_reward': positive_reward,
'negative_reward': negative_reward
}
super().__init__(path, config, log_level)
class DymolaCSCartPoleEnv(CartPoleEnv, FMI1CSEnv):
"""
Wrapper class for creation of cart-pole environment using Dymola-compiled FMU (FMI standard v.1.0).
Attributes:
m_cart (float): mass of a cart.
m_pole (float): mass of a pole.
phi1_start (float): angle of the pole, when experiment starts.
It is counted from the positive direction of X-axis. Specified in radians.
1/2*pi means pole standing straight on the cast.
w1_start (float): angle speed of the poles mass center. I.e. how fast pole angle is changing.
time_step (float): time difference between simulation steps.
positive_reward (int): positive reward for RL agent.
negative_reward (int): negative reward for RL agent.
"""
def __init__(self,
m_cart,
m_pole,
phi1_start,
w1_start,
time_step,
positive_reward,
negative_reward,
force,
log_level,
path="../resources/dymola/linux/ModelicaGym_CartPole.fmu"):
logger.setLevel(log_level)
self.force = force
self.theta_threshold = TWELVE_DEGREES_IN_RAD
self.x_threshold = 2.4
self.viewer = None
self.display = None
self.pole_transform = None
self.cart_transform = None
config = {
'model_input_names': ['u'],
'model_output_names': ['s', 'v', 'phi1', 'w'],
'model_parameters': {'m_trolley': m_cart, 'm_load': m_pole,
'phi1_start': phi1_start, 'w1_start': w1_start},
'initial_state': (0, 0, 85 / 180 * math.pi, 0),
'time_step': time_step,
'positive_reward': positive_reward,
'negative_reward': negative_reward
}
# loads FMU corresponding to the Modelica type required
super().__init__(path, config, log_level)