-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathparameters.yml
319 lines (305 loc) · 7.51 KB
/
parameters.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
# Environment cofiguration
env:
# Random seed used to generate rails
# (14 medium - 14 big)
seed: 1
# Number of trains to spawn
# (3, 5, 7 medium - 5, 7, 10 big)
num_trains: 7
# Environment width
# (48 medium - 64 big)
width: 48
# Environment height
# (27 medium - 36 big)
height: 27
# Maximum number of cities where agents can start or end
# (5 medium - 9 big)
max_cities: 5
# Type of city distribution
grid: False
# Maximum number of tracks allowed between cities
# (2 medium - 5 big)
max_rails_between_cities: 2
# Maximum number of parallel tracks within a city
# (3 medium - 5 big)
max_rails_in_cities: 3
# Enable variable speed
variable_speed: False
# Malfunctions config
malfunctions:
# Enable malfunctions
enabled: False
# Malfunction rate
# (0.0125 medium - 0.0125 big)
rate: !!float 0.005
# Malfunction minimum duration
min_duration: 15
# Malfunction maximum duration
max_duration: 50
rewards:
stop_penalty: 2.0
# Observator config
observator:
# Maximum depth for the observator
max_depth: 21
# Binary tree observation configs
binary_tree:
# Observation radius
radius: 30
# Tree observation configs
tree:
# Observation radius
radius: 10
# Predictor config
predictor:
# Maximum depth for the predictor
# (for the shortest/deviation predictor
# it indicates the maximum number of deviations)
max_depth: 4
# Policy config
policy:
# Select policy type
type:
# Graph observation policy
graph: False
# Decentralized FOV observation policy
decentralized_fov: True
# Standard tree observation policy
tree: False
# Binary tree observation policy
binary_tree: False
# Random policy
random: False
# Action selector config
action_selector:
# Type of action selector
type:
# Epsilon-greedy action selection
eps_greedy: True
# Boltzmann action selection
boltzmann: False
# Random action selection
random: False
# Greedy action selection
greedy: False
# Categorical action selection
categorical: False
# Action selection parameter decay config
parameter_decay:
# Type of decay
type:
# Linear decay (param - decay)
linear: True
# Exponential decay (param * decay)
exponential: False
# No decay
none: False
# Initial exploration
start: !!float 1.0
# Final exploration
end: !!float 0.01
# Percentage of episode with parameter greater than final exploration
decaying_episodes: !!float 0.70
# Learning config
learning:
# Learning rate
learning_rate: !!float 0.5e-4
# Weight multiplier for target network soft update
tau: !!float 1e-3
# Discount multiplier for expected Q-value of targets
discount: !!float 0.99
# Bellman fuction
softmax_bellman:
# Enable or disable
enabled: True
# Temperature parameter in the softmax
temperature: !!float 0.5
# Loss function
loss:
# (Masked) Huber loss
huber: True
# (Masked) MSE loss
mse: False
# Type of gradient clipping
gradient:
# Maximum value for the norm of the gradients
max_norm: 10
# Maximum symmetrical limit for the values of the gradients
value_limit: 1
# Clip the norm of the gradients (`max_grad_norm`)
clip_norm: True
# Clamp the gradient itself (-`grad_value_limit`, `grad_value_limit`)
clamp_values: False
# Model config
model:
# DQN configuration
dqn:
# Dueling DQN configurations
dueling:
# Enable or disable dueling DQN
enabled: True
# How to aggregate advantages
aggregation:
# Use the mean function
mean: True
# Use the max function
max: False
# Enable or disable double DQN
double: True
# The number of hidden layer with their hidden sizes
hidden_sizes:
- 128
- 128
# Non-linear function
nonlinearity:
# ReLU function
relu: False
# Tanh function
tanh: True
# Entire GNN configuration
entire_gnn:
# Size of the output embedding
embedding_size: 4
# Hidden embedding size in GNN layers
hidden_size: 8
# Number of embeddings to use as DQN input
pos_size: 3
# Dropout value
dropout: !!float 0.2
# Non-linear function
nonlinearity:
# ReLU function
relu: True
# Tanh function
tanh: False
# Multi GNN configuration
multi_gnn:
# CNN configuration
cnn_encoder:
# Conv2d settings
conv:
# Filter dimension
kernel_size: 3
# Filter stride
stride: 1
# Filter padding
padding: 1
# MaxPool2d settings
pool:
# Filter dimension
kernel_size: 2
# Filter stride
stride: 2
# Filter padding
padding: 0
# CNN encoder hidden channels number and sizes
hidden_channels:
- 32
- 32
- 64
- 64
# CNN encoder size of the output channel
output_channels: 128
# MLP configuration
mlp_compression:
# The number of hidden FC layers with their hidden sizes
# (to map the encoder output to a fixed size)
hidden_sizes: []
# Final MLP output size
output_size: 128
# GNN configuration
gnn_communication:
# Number and hidden embedding sizes of graph convs to perform
hidden_sizes:
- 128
# Size of the output embedding
embedding_size: 128
# Dropout value
dropout: !!float 0.2
# Non-linear function
nonlinearity:
# ReLU function
relu: True
# Tanh function
tanh: False
# Replay buffer config
replay_buffer:
# Maximum buffer dimension
size: 100000
# Batch size
batch_size: 128
# Try to learn after this many steps
checkpoint: 4
# Replay buffer to restore
load: ""
# Save replay buffer at each checkpoint
save: False
# Generic config
generic:
# Number of threads PyTorch can use
num_threads: 1
# Fix all the possible sources of randomness
fix_random: True
# Random seed used when `fix_random` is True
random_seed: 1
# Device config
use_gpu: False
# Enable wandb logging
enable_wandb: True
# Checkpoint to save model to wandb
wandb_checkpoint: 500
# Gradients logging in wandb
wandb_gradients:
# Enable or disable logging
enabled: False
# How often to log gradients
checkpoint: 200
# Training config
training:
# Checkpoint interval (how often to evaluate and save the model)
checkpoint: 500
# Train environment config
train_env:
# Path to the train environment file
load: ""
# Number of training episodes to run
episodes: 7500
# Train on random enviroments or on the same one
all_random: True
# Evaluation environment config
eval_env:
# Path to the evaluation environment file
load: ""
# Number of evaluation episodes
episodes: 20
# Evaluate on random enviroments or on the same one
all_random: True
# Renderer config
renderer:
# Render episodes during training
training: False
# How often to render an episode in training
train_checkpoint: 1
# Render episodes during evaluation
evaluation: False
# How often to render an episode in evaluation
eval_checkpoint: 5
# Save frames
save_frames: False
testing:
# Number of episodes to run
episodes: 500
# Path to the environment file
load: ""
# Path to the model to load
model: ""
# Verbose option
verbose: True
# Renderer config
renderer:
# Enable renderer
enabled: True
# Seconds to sleep between moves
sleep: 2
# Save intermediate renderer frames
save_frames: True