-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathmain_MPMAB.py
253 lines (198 loc) · 13.2 KB
/
main_MPMAB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# -*- coding: utf-8 -*-
"""
@author: Wenbo Wang
[Wang2020] Wenbo Wang, Amir Leshem, Dusit Niyato and Zhu Han, "Decentralized Learning for Channel
Allocation inIoT Networks over Unlicensed Bandwidth as aContextual Multi-player Multi-armed Bandit Game"
License:
This program is licensed under the GPLv2 license. If you in any way use this code for research
that results in publications, please cite our original article listed above.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
"""
# Used for the simulations in the paper "Decentralized Learning for Channel Allocation in IoT Networks over Unlicensed
# Bandwidth as a Contextual Multi-player Multi-armed Bandit Game", by Wenbo Wang et al.
# This file is the main entrance of all the simulations except that for those w.r.t. network sizes.
__author__ = "Wenbo Wang"
import numpy as np
import pandas as pd
import time
import datetime
import argparse
from GameEvaluator import AlgEvaluator
from plotutils import plot_data_frame, plot_repeated_simu_results
import simu_config as CONFIG
def simulation_execution(game_config):
"""
simulation_execution() is the main body of the MP-MAP algorithm simulations
"""
print("MAB game with configuration '{}' starts to play...".format(game_config.__repr__()))
game_horizon = game_config.game_horizon
alg_engine = AlgEvaluator(game_config.env_config)
#add algorithms
for alg_id in range(len(game_config.alg_types)):
alg_engine.add_algorithm(algo_type=game_config.alg_types[alg_id],
custome_params=game_config.alg_configs[alg_id])
print("MAB game prepares the environment for arm type '{}' of {} rounds".format(game_config.env_config['env_type'], game_horizon))
alg_engine.prepare_arm_samples()
# simulation 1: reward plotting to compare the efficiency of the algorithms
if "enable_efficiency_simulation" in game_config.__dict__ and game_config.enable_efficiency_simulation:
start_time_oneshot = time.time()
#######################################################################
#
if game_config.flag_parallel != True:
# print("starting single-process simulation...")
alg_engine.play_game(flag_progress_bar=game_config.flag_progress_bar)
else:
# print("starting parallel simulation...")
alg_engine.play_game_parallel(flag_progress_bar=game_config.flag_progress_bar)
#
#######################################################################
alg_engine.plot_rewards(save_fig = game_config.flag_save_figure, save_data = game_config.save_data)
# printing
running_time = time.time() - start_time_oneshot
print("Single-shot simulation completes in {} for {} iterations.".format( \
datetime.timedelta(seconds=running_time), game_horizon))
# simulation 2/3/4: plotting regret or total rewards over horizon
if ("enable_regret_simulation" in game_config.__dict__ and game_config.enable_regret_simulation) or \
("enable_reward_simulation" in game_config.__dict__ and game_config.enable_reward_simulation) or \
("enable_switching_simulation" in game_config.__dict__ and game_config.enable_switching_simulation):
start = game_config.T_start
nb_point = game_config.T_step
horizon_list = np.exp(np.linspace(np.log(start), np.log(game_horizon), nb_point))
simu_rounds = game_config.T_simu_rounds
start_time_repeated = time.time()
#######################################################################
#
if game_config.flag_parallel != True:
# print("starting single-process simulation...")
simulation_results = alg_engine.play_repeated_game(horizon_list, simulation_rounds=simu_rounds,
flag_progress_bar=game_config.flag_progress_bar)
else:
# print("starting parallel simulation...")
simulation_results = alg_engine.play_repeated_game_parallel(horizon_list, simulation_rounds=simu_rounds,
flag_progress_bar=game_config.flag_progress_bar)
#
#######################################################################
# printing
running_time = time.time() - start_time_repeated
print("Repeated simulation completes in {} with maximum horizon {} in {} rounds of plays...".format(\
datetime.timedelta(seconds=running_time), game_horizon, simu_rounds))
# virtualization for simulation 2
if "enable_regret_simulation" in game_config.__dict__ and game_config.enable_regret_simulation:
# locate the reference algorithm
optimal_alg_id = 0
len_horizon = simulation_results['horizon'].shape[1]
time_series = np.empty((0, len_horizon))
alg_indicator_series = []
avg_regret_series = np.empty((0, len_horizon))
for alg_id in range(len(simulation_results['algorithm_name'])):
if alg_id != optimal_alg_id:
# the returned value simulation_results['reward_series'] is organized as an array:
# (len(algorithm_ids), simulation_rounds*len(horizon_list))
horizon_series = simulation_results['horizon'][alg_id,:]
avg_regret = (simulation_results['reward_series'][optimal_alg_id,:] -
simulation_results['reward_series'][alg_id,:]) / horizon_series
avg_regret_series = np.append(avg_regret_series, avg_regret) # flatten
time_series = np.append(time_series, horizon_series)
alg_indicator_series.extend([simulation_results['algorithm_name'][alg_id]] * len(horizon_series))
prepared_results = {}
prepared_results['Average regret'] = avg_regret_series
prepared_results['Total number of plays'] = time_series
prepared_results['Algorithms'] = alg_indicator_series
simu_data_frame = pd.DataFrame(prepared_results)
# plot and save the figure
file_name = "monte_carlo_regret" if game_config.flag_save_figure==True else None
sns_figure_unused, repeated_play_data_name = plot_data_frame(simu_data_frame,
xlabel="Total number of plays", ylabel="Average regret", huelabel='Algorithms',
save_file_name=file_name, save_data_name=game_config.repeated_play_data_name)
# post processing, add the theoretical bound to the figure
flag_bound = False
if hasattr(game_config, 'flag_regret_bound'):
flag_bound = game_config.flag_regret_bound
else:
flag_bound = False
plot_repeated_simu_results(start=start, horzion=game_horizon, nbPoints=nb_point, flag_bound=flag_bound,
data_file_name=repeated_play_data_name)
# virtualization for simulation 3
if "enable_reward_simulation" in game_config.__dict__ and game_config.enable_reward_simulation:
len_horizon = simulation_results['horizon'].shape[1]
time_series = np.empty((0, len_horizon))
alg_indicator_series = []
reward_series = np.array([])
for alg_id in range(len(simulation_results['algorithm_name'])):
horizon_series = simulation_results['horizon'][alg_id,:]
avg_rewards = simulation_results['reward_series'][alg_id, :] / horizon_series
reward_series = np.append(reward_series, avg_rewards) # flatten
time_series = np.append(time_series, horizon_series)
alg_indicator_series.extend([simulation_results['algorithm_name'][alg_id]] * len(horizon_series))
prepared_results = {}
prepared_results['Average sum of rewards'] = reward_series
prepared_results['Total number of plays'] = time_series
prepared_results['Algorithms'] = alg_indicator_series
simu_data_frame = pd.DataFrame(prepared_results)
#plot and save the figure
file_name = "monte_carlo_rewards" if game_config.flag_save_figure==True else None
plot_data_frame(simu_data_frame,
xlabel="Total number of plays", ylabel="Average sum of rewards", huelabel='Algorithms',
flag_semilogx = False,
save_file_name=file_name, save_data_name=game_config.repeated_play_data_name)
# virtualization for simulation 4
if "enable_switching_simulation" in game_config.__dict__ and game_config.enable_switching_simulation:
len_horizon = simulation_results['horizon'].shape[1]
time_series = np.empty((0, len_horizon))
alg_indicator_series = []
switching_series = np.array([])
collision_series = np.array([])
for alg_id in range(len(simulation_results['algorithm_name'])):
horizon_series = simulation_results['horizon'][alg_id,:]
switching = simulation_results['switching_count_series'][alg_id, :]
collisions = simulation_results['collision_series'][alg_id, :]
switching_series = np.append(switching_series, switching) # flatten
collision_series = np.append(collision_series, collisions) # flatten
time_series = np.append(time_series, horizon_series)
alg_indicator_series.extend([simulation_results['algorithm_name'][alg_id]] * len(horizon_series))
prepared_results = {}
prepared_results['Accumulated switching counts'] = switching_series
prepared_results['Accumulated collision counts'] = collision_series
prepared_results['Total number of plays'] = time_series
prepared_results['Algorithms'] = alg_indicator_series
assert len(switching_series) == len(collision_series), "switching array must be of the same length: {}, {}".format(
len(switching_series), len(collision_series))
simu_data_frame = pd.DataFrame(prepared_results)
#plot and save the figure: 1
file_name = "monte_carlo_switching" if game_config.flag_save_figure==True else None
plot_data_frame(simu_data_frame,
xlabel="Total number of plays", ylabel="Accumulated switching counts", huelabel='Algorithms',
flag_semilogx = False,
save_file_name=file_name, save_data_name=game_config.repeated_play_data_name)
#plot and save the figure: 2
file_name = "monte_carlo_collision" if game_config.flag_save_figure==True else None
plot_data_frame(simu_data_frame,
xlabel="Total number of plays", ylabel="Accumulated collision counts", huelabel='Algorithms',
flag_semilogx = False,
save_file_name=file_name, save_data_name=game_config.repeated_play_data_name)
if __name__ == '__main__':
"""
Parallel processing is suggested to be turned on for repeated simulations (see simu_config.py)
It is approximately 2X to 4X faster in terms of the total time than the single-process simulation
"""
arg_parser = argparse.ArgumentParser(description='Select a configuration set in \'simu_config.py\' to run the simulations')
# Add the arguments
arg_parser.add_argument('-id', metavar='ID', type=int,
help='Choose the configuration ID between [1-13], see the summary of simu_config.py')
args = arg_parser.parse_args()
if args.id is None:
# default choice of configuration for a simulation
game_config = CONFIG.ENV_SCENARIO_1_PARALLEL #
else:
if args.id in CONFIG.CONFIGURATION_DICT.keys():
game_config = CONFIG.CONFIGURATION_DICT[args.id]
else:
raise Exception('the input configuration ID is not valid')
# beginning of the game
start_time = time.time()# record the starting time of the simulation, start simulations
simulation_execution(game_config)
#end of the game
running_time = time.time() - start_time
print("Simulation completes in {}.".format(datetime.timedelta(seconds=running_time)))