From 4bb5d6ade182eade10092ef6dc9f466939643c6a Mon Sep 17 00:00:00 2001 From: DennisSoemers Date: Fri, 16 Jun 2023 16:12:09 +0200 Subject: [PATCH 1/4] Documenting shapes in get_action_mask(). --- gym_microrts/envs/vec_env.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gym_microrts/envs/vec_env.py b/gym_microrts/envs/vec_env.py index 244e56b4..92094065 100644 --- a/gym_microrts/envs/vec_env.py +++ b/gym_microrts/envs/vec_env.py @@ -279,7 +279,13 @@ def close(self): jpype.shutdownJVM() def get_action_mask(self): + """ + :return: Mask for action types and action parameters, + of shape [num_envs, map height * width, action types + params] + """ + # action_mask shape: [num_envs, map height, map width, 1 + action types + params] action_mask = np.array(self.vec_client.getMasks(0)) + # self.source_unit_mask shape: [num_envs, map height * map width * 1] self.source_unit_mask = action_mask[:, :, :, 0].reshape(self.num_envs, -1) action_type_and_parameter_mask = action_mask[:, :, :, 1:].reshape(self.num_envs, self.height * self.width, -1) return action_type_and_parameter_mask From 128b920457a6a4f92d663eedb63b592bc643e177 Mon Sep 17 00:00:00 2001 From: DennisSoemers Date: Tue, 20 Jun 2023 17:07:30 +0200 Subject: [PATCH 2/4] More comments. --- experiments/ppo_gridnet_large.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/experiments/ppo_gridnet_large.py b/experiments/ppo_gridnet_large.py index 2499babe..98999dc4 100644 --- a/experiments/ppo_gridnet_large.py +++ b/experiments/ppo_gridnet_large.py @@ -203,6 +203,14 @@ def __init__(self, envs, mapsize=16 * 16): self.register_buffer("mask_value", torch.tensor(-1e8)) def get_action_and_value(self, x, action=None, invalid_action_masks=None, envs=None, device=None): + """ + :return: + (1) action (shape = [1, width*height, 7], where 7 = dimensionality of per-unit action) + (2) log probability of action (shape = [1]) + (3) entropy (shape = [1]) + (4) invalid action masks + (5) Critic's prediction + """ hidden = self.encoder(x) logits = self.actor(hidden) grid_logits = logits.reshape(-1, envs.action_plane_space.nvec.sum()) From 2e41c16250f7aabc0446dfc2ad646add41a819c6 Mon Sep 17 00:00:00 2001 From: DennisSoemers Date: Wed, 21 Jun 2023 15:46:24 +0200 Subject: [PATCH 3/4] Fixed comments. --- experiments/ppo_gridnet_large.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/experiments/ppo_gridnet_large.py b/experiments/ppo_gridnet_large.py index 98999dc4..ccade4a7 100644 --- a/experiments/ppo_gridnet_large.py +++ b/experiments/ppo_gridnet_large.py @@ -205,9 +205,9 @@ def __init__(self, envs, mapsize=16 * 16): def get_action_and_value(self, x, action=None, invalid_action_masks=None, envs=None, device=None): """ :return: - (1) action (shape = [1, width*height, 7], where 7 = dimensionality of per-unit action) - (2) log probability of action (shape = [1]) - (3) entropy (shape = [1]) + (1) action (shape = [num_envs, width*height, 7], where 7 = dimensionality of per-unit action) + (2) log probability of action (shape = [num_envs]) + (3) entropy (shape = [num_envs]) (4) invalid action masks (5) Critic's prediction """ From d10cdaae22e715917f9aa72c01ce43cb00177c49 Mon Sep 17 00:00:00 2001 From: DennisSoemers Date: Tue, 22 Aug 2023 12:36:35 +0200 Subject: [PATCH 4/4] updated java submodule --- gym_microrts/microrts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gym_microrts/microrts b/gym_microrts/microrts index 77411e7d..515ceff9 160000 --- a/gym_microrts/microrts +++ b/gym_microrts/microrts @@ -1 +1 @@ -Subproject commit 77411e7d133820cd199a91382474e0f1bb3b7316 +Subproject commit 515ceff955611ad32a726756bb0c96782978126d